2022-07-11 00:52:59 +02:00
// way more specific example --
// assumes all files dropped into the uploader have a youtube-id somewhere in the filename,
2022-07-12 03:16:30 +02:00
// locates the youtube-ids and passes them to an API which returns a list of IDs which should be uploaded
2022-07-11 01:50:18 +02:00
//
2022-07-26 22:09:18 +02:00
// also tries to find the youtube-id in the embedded metadata
//
2022-07-11 01:50:18 +02:00
// assumes copyparty is behind nginx as /ytq is a standalone service which must be rproxied in place
2022-07-11 00:52:59 +02:00
function up2k _namefilter ( good _files , nil _files , bad _files , hooks ) {
2022-07-26 22:09:18 +02:00
var passthru = up2k . uc . fsearch ;
if ( passthru )
return hooks [ 0 ] ( good _files , nil _files , bad _files , hooks . slice ( 1 ) ) ;
a _up2k _namefilter ( good _files , nil _files , bad _files , hooks ) . then ( ( ) => { } ) ;
}
2022-09-04 00:06:42 +02:00
// ebi('op_up2k').appendChild(mknod('input','unick'));
2022-07-26 22:09:18 +02:00
function bstrpos ( buf , ptn ) {
var ofs = 0 ,
ch0 = ptn [ 0 ] ,
sz = buf . byteLength ;
while ( true ) {
ofs = buf . indexOf ( ch0 , ofs ) ;
if ( ofs < 0 || ofs >= sz )
return - 1 ;
for ( var a = 1 ; a < ptn . length ; a ++ )
if ( buf [ ofs + a ] !== ptn [ a ] )
break ;
2022-07-11 00:52:59 +02:00
2022-07-26 22:09:18 +02:00
if ( a === ptn . length )
return ofs ;
++ ofs ;
}
}
async function a _up2k _namefilter ( good _files , nil _files , bad _files , hooks ) {
var t0 = Date . now ( ) ,
yt _ids = new Set ( ) ,
textdec = new TextDecoder ( 'latin1' ) ,
md _ptn = new TextEncoder ( ) . encode ( 'youtube.com/watch?v=' ) ,
file _ids = [ ] , // all IDs found for each good_files
2022-08-05 19:26:24 +02:00
md _only = [ ] , // `${id} ${fn}` where ID was only found in metadata
2022-07-26 22:09:18 +02:00
mofs = 0 ,
mnchk = 0 ,
2022-08-24 23:25:03 +02:00
mfile = '' ,
myid = localStorage . getItem ( 'ytid_t0' ) ;
if ( ! myid )
localStorage . setItem ( 'ytid_t0' , myid = Date . now ( ) ) ;
2022-07-26 22:09:18 +02:00
for ( var a = 0 ; a < good _files . length ; a ++ ) {
var [ fobj , name ] = good _files [ a ] ,
2022-08-05 19:26:24 +02:00
cname = name , // will clobber
2022-07-26 22:09:18 +02:00
sz = fobj . size ,
ids = [ ] ,
2022-08-15 21:52:41 +02:00
fn _ids = [ ] ,
md _ids = [ ] ,
2022-07-26 22:09:18 +02:00
id _ok = false ,
m ;
// all IDs found in this file
file _ids . push ( ids ) ;
// look for ID in filename; reduce the
// metadata-scan intensity if the id looks safe
2022-08-05 19:26:24 +02:00
m = /[\[(-]([\w-]{11})[\])]?\.(?:mp4|webm|mkv|flv|opus|ogg|mp3|m4a|aac)$/i . exec ( name ) ;
2022-07-26 22:09:18 +02:00
id _ok = ! ! m ;
while ( true ) {
// fuzzy catch-all;
// some ytdl fork did %(title)-%(id).%(ext) ...
2022-08-05 19:26:24 +02:00
m = /(?:^|[^\w])([\w-]{11})(?:$|[^\w-])/ . exec ( cname ) ;
2022-07-26 22:09:18 +02:00
if ( ! m )
break ;
2022-08-05 19:26:24 +02:00
cname = cname . replace ( m [ 1 ] , '' ) ;
2022-07-26 22:09:18 +02:00
yt _ids . add ( m [ 1 ] ) ;
2022-08-15 21:52:41 +02:00
fn _ids . unshift ( m [ 1 ] ) ;
2022-07-26 22:09:18 +02:00
}
// look for IDs in video metadata,
2022-08-05 19:26:24 +02:00
if ( /\.(mp4|webm|mkv|flv|opus|ogg|mp3|m4a|aac)$/i . exec ( name ) ) {
2022-07-26 22:09:18 +02:00
toast . show ( 'inf r' , 0 , ` analyzing file ${ a + 1 } / ${ good _files . length } : \n ${ name } \n \n have analysed ${ ++ mnchk } files in ${ ( Date . now ( ) - t0 ) / 1000 } seconds, ${ humantime ( ( good _files . length - ( a + 1 ) ) * ( ( ( Date . now ( ) - t0 ) / 1000 ) / mnchk ) ) } remaining, \n \n biggest offset so far is ${ mofs } , in this file: \n \n ${ mfile } ` ) ;
// check first and last 128 MiB;
// pWxOroN5WCo.mkv @ 6edb98 (6.92M)
// Nf-nN1wF5Xo.mp4 @ 4a98034 (74.6M)
var chunksz = 1024 * 1024 * 2 , // byte
aspan = id _ok ? 128 : 512 ; // MiB
aspan = parseInt ( Math . min ( sz / 2 , aspan * 1024 * 1024 ) / chunksz ) * chunksz ;
2022-09-03 15:08:08 +02:00
if ( ! aspan )
aspan = Math . min ( sz , chunksz ) ;
2022-07-26 22:09:18 +02:00
for ( var side = 0 ; side < 2 ; side ++ ) {
var ofs = side ? Math . max ( 0 , sz - aspan ) : 0 ,
nchunks = aspan / chunksz ;
for ( var chunk = 0 ; chunk < nchunks ; chunk ++ ) {
var bchunk = await fobj . slice ( ofs , ofs + chunksz + 16 ) . arrayBuffer ( ) ,
uchunk = new Uint8Array ( bchunk , 0 , bchunk . byteLength ) ,
bofs = bstrpos ( uchunk , md _ptn ) ,
absofs = Math . min ( ofs + bofs , ( sz - ofs ) + bofs ) ,
txt = bofs < 0 ? '' : textdec . decode ( uchunk . subarray ( bofs ) ) ,
m ;
//console.log(`side ${ side }, chunk ${ chunk }, ofs ${ ofs }, bchunk ${ bchunk.byteLength }, txt ${ txt.length }`);
while ( true ) {
// mkv/webm have [a-z] immediately after url
m = /(youtube\.com\/watch\?v=[\w-]{11})/ . exec ( txt ) ;
if ( ! m )
break ;
txt = txt . replace ( m [ 1 ] , '' ) ;
m = m [ 1 ] . slice ( - 11 ) ;
console . log ( ` found ${ m } @ ${ bofs } , ${ name } ` ) ;
yt _ids . add ( m ) ;
2022-08-15 21:52:41 +02:00
if ( ! has ( fn _ids , m ) && ! has ( md _ids , m ) ) {
md _ids . push ( m ) ;
2022-08-05 19:26:24 +02:00
md _only . push ( ` ${ m } ${ name } ` ) ;
}
2022-08-15 21:52:41 +02:00
else
// id appears several times; make it preferred
md _ids . unshift ( m ) ;
2022-07-26 22:09:18 +02:00
// bail after next iteration
chunk = nchunks - 1 ;
side = 9 ;
if ( mofs < absofs ) {
mofs = absofs ;
mfile = name ;
}
}
ofs += chunksz ;
if ( ofs >= sz )
break ;
}
2022-07-11 00:52:59 +02:00
}
}
2022-08-15 21:52:41 +02:00
for ( var yi of md _ids )
ids . push ( yi ) ;
for ( var yi of fn _ids )
if ( ! has ( ids , yi ) )
ids . push ( yi ) ;
2022-07-26 22:09:18 +02:00
}
2022-08-05 19:26:24 +02:00
if ( md _only . length )
console . log ( 'recovered the following youtube-IDs by inspecting metadata:\n\n' + md _only . join ( '\n' ) ) ;
else if ( yt _ids . size )
console . log ( 'did not discover any additional youtube-IDs by inspecting metadata; all the IDs also existed in the filenames' ) ;
else
console . log ( 'failed to find any youtube-IDs at all, sorry' ) ;
2022-07-26 22:09:18 +02:00
if ( false ) {
var msg = ` finished analysing ${ mnchk } files in ${ ( Date . now ( ) - t0 ) / 1000 } seconds, \n \n biggest offset was ${ mofs } in this file: \n \n ${ mfile } ` ,
mfun = function ( ) { toast . ok ( 0 , msg ) ; } ;
mfun ( ) ;
setTimeout ( mfun , 200 ) ;
return hooks [ 0 ] ( [ ] , [ ] , [ ] , hooks . slice ( 1 ) ) ;
}
2022-07-11 00:52:59 +02:00
2022-09-04 00:06:42 +02:00
var el = ebi ( 'unick' ) , unick = el ? el . value : '' ;
if ( unick ) {
console . log ( ` sending uploader nickname [ ${ unick } ] ` ) ;
fetch ( document . location , {
method : 'POST' ,
headers : { 'Content-Type' : 'application/x-www-form-urlencoded;charset=UTF-8' } ,
body : 'msg=' + encodeURIComponent ( unick )
} ) ;
}
2022-08-05 19:26:24 +02:00
toast . inf ( 5 , ` running query for ${ yt _ids . size } youtube-IDs... ` ) ;
2022-07-11 00:52:59 +02:00
var xhr = new XHR ( ) ;
xhr . open ( 'POST' , '/ytq' , true ) ;
xhr . setRequestHeader ( 'Content-Type' , 'text/plain' ) ;
xhr . onload = xhr . onerror = function ( ) {
if ( this . status != 200 )
2022-07-29 00:13:18 +02:00
return toast . err ( 0 , ` sorry, database query failed ;_; \n \n please let us know so we can look at it, thx!! \n \n error ${ this . status } : ${ ( this . response && this . response . err ) || this . responseText } ` ) ;
2022-07-26 22:09:18 +02:00
process _id _list ( this . responseText ) ;
} ;
xhr . send ( Array . from ( yt _ids ) . join ( '\n' ) ) ;
2022-07-11 00:52:59 +02:00
2022-07-26 22:09:18 +02:00
function process _id _list ( txt ) {
var wanted _ids = new Set ( txt . trim ( ) . split ( '\n' ) ) ,
2022-08-15 21:52:41 +02:00
name _id = { } ,
2022-09-04 12:20:40 +02:00
wanted _names = new Set ( ) , // basenames with a wanted ID -- not including relpath
wanted _names _scoped = { } , // basenames with a wanted ID -> list of dirs to search under
2022-07-26 22:09:18 +02:00
wanted _files = new Set ( ) ; // filedrops
2022-07-11 00:52:59 +02:00
2022-07-26 22:09:18 +02:00
for ( var a = 0 ; a < good _files . length ; a ++ ) {
var name = good _files [ a ] [ 1 ] ;
for ( var b = 0 ; b < file _ids [ a ] . length ; b ++ )
if ( wanted _ids . has ( file _ids [ a ] [ b ] ) ) {
2022-09-04 12:20:40 +02:00
// let the next stage handle this to prevent dupes
//wanted_files.add(good_files[a]);
2022-07-11 00:52:59 +02:00
2022-08-05 19:26:24 +02:00
var m = /(.*)\.(mp4|webm|mkv|flv|opus|ogg|mp3|m4a|aac)$/i . exec ( name ) ;
2022-08-15 21:52:41 +02:00
if ( ! m )
continue ;
2022-09-04 12:20:40 +02:00
var [ rd , fn ] = vsplit ( m [ 1 ] ) ;
if ( fn in wanted _names _scoped )
wanted _names _scoped [ fn ] . push ( rd ) ;
else
wanted _names _scoped [ fn ] = [ rd ] ;
wanted _names . add ( fn ) ;
2022-08-15 21:52:41 +02:00
name _id [ m [ 1 ] ] = file _ids [ a ] [ b ] ;
2022-07-26 22:09:18 +02:00
break ;
}
}
// add all files with the same basename as each explicitly wanted file
// (infojson/chatlog/etc when ID was discovered from metadata)
for ( var a = 0 ; a < good _files . length ; a ++ ) {
2022-09-04 12:20:40 +02:00
var [ rd , name ] = vsplit ( good _files [ a ] [ 1 ] ) ;
2022-07-26 22:09:18 +02:00
for ( var b = 0 ; b < 3 ; b ++ ) {
name = name . replace ( /\.[^\.]+$/ , '' ) ;
2022-09-04 12:20:40 +02:00
if ( ! wanted _names . has ( name ) )
continue ;
var vid _fp = false ;
for ( var c of wanted _names _scoped [ name ] )
if ( rd . startsWith ( c ) )
vid _fp = c + name ;
if ( ! vid _fp )
continue ;
var subdir = name _id [ vid _fp ] ;
subdir = ` v ${ subdir . slice ( 0 , 1 ) } / ${ subdir } - ${ myid } ` ;
var newpath = subdir + '/' + good _files [ a ] [ 1 ] . split ( /\//g ) . pop ( ) ;
// check if this file is a dupe
for ( var c of good _files )
if ( c [ 1 ] == newpath )
newpath = null ;
2022-08-15 21:52:41 +02:00
2022-09-04 12:20:40 +02:00
if ( ! newpath )
2022-07-26 22:09:18 +02:00
break ;
2022-09-04 12:20:40 +02:00
good _files [ a ] [ 1 ] = newpath ;
wanted _files . add ( good _files [ a ] ) ;
break ;
2022-07-26 22:09:18 +02:00
}
2022-07-11 00:52:59 +02:00
}
2022-07-22 10:47:10 +02:00
function upload _filtered ( ) {
2022-07-26 22:09:18 +02:00
if ( ! wanted _files . size )
2022-07-22 10:47:10 +02:00
return modal . alert ( 'Good news -- turns out we already have all those.\n\nBut thank you for checking in!' ) ;
2022-07-26 22:09:18 +02:00
hooks [ 0 ] ( Array . from ( wanted _files ) , nil _files , bad _files , hooks . slice ( 1 ) ) ;
2022-07-22 10:47:10 +02:00
}
function upload _all ( ) {
hooks [ 0 ] ( good _files , nil _files , bad _files , hooks . slice ( 1 ) ) ;
}
2022-07-26 22:09:18 +02:00
var n _skip = good _files . length - wanted _files . size ,
2022-07-29 00:13:18 +02:00
msg = ` you added ${ good _files . length } files; ${ good _files . length == n _skip ? 'all' : n _skip } of them were skipped -- \n either because we already have them, \n or because there is no youtube-ID in your filenames. \n \n <code>OK</code> / <code>Enter</code> = continue uploading just the ${ wanted _files . size } files we definitely need \n \n <code>Cancel</code> / <code>ESC</code> = override the filter; upload ALL the files you added ` ;
2022-07-22 10:47:10 +02:00
if ( ! n _skip )
upload _filtered ( ) ;
else
modal . confirm ( msg , upload _filtered , upload _all ) ;
2022-07-11 00:52:59 +02:00
} ;
}
up2k _hooks . push ( function ( ) {
up2k . gotallfiles . unshift ( up2k _namefilter ) ;
} ) ;
2022-09-04 12:20:40 +02:00
// persist/restore nickname field if present
setInterval ( function ( ) {
var o = ebi ( 'unick' ) ;
if ( ! o || document . activeElement == o )
return ;
o . oninput = function ( ) {
localStorage . setItem ( 'unick' , o . value ) ;
} ;
o . value = localStorage . getItem ( 'unick' ) || '' ;
} , 1000 ) ;