2021-09-27 23:52:36 +02:00
#!/usr/bin/env python3
2021-09-27 23:28:34 +02:00
from __future__ import print_function , unicode_literals
"""
up2k . py : upload to copyparty
2022-09-05 18:24:18 +02:00
2022 - 09 - 05 , v0 .19 , ed < irc . rizon . net > , MIT - Licensed
2021-09-27 23:28:34 +02:00
https : / / github . com / 9001 / copyparty / blob / hovudstraum / bin / up2k . py
- dependencies : requests
2022-06-16 17:51:42 +02:00
- supports python 2.6 , 2.7 , and 3.3 through 3.11
2021-09-27 23:28:34 +02:00
- almost zero error - handling
- but if something breaks just try again and it ' ll autoresume
"""
import os
import sys
import stat
import math
import time
2021-10-01 02:10:03 +02:00
import atexit
import signal
2021-09-27 23:28:34 +02:00
import base64
import hashlib
import platform
import threading
2021-10-01 22:31:24 +02:00
import datetime
2021-09-27 23:28:34 +02:00
2022-08-10 23:12:01 +02:00
try :
import argparse
except :
m = " \n ERROR: need ' argparse ' ; download it here: \n https://github.com/ThomasWaldmann/argparse/raw/master/argparse.py \n "
print ( m )
raise
try :
import requests
2022-09-05 18:24:18 +02:00
except ImportError :
2022-08-10 23:12:01 +02:00
if sys . version_info > ( 2 , 7 ) :
2022-09-05 18:24:18 +02:00
m = " \n ERROR: need ' requests ' ; please run this command: \n {0} -m pip install --user requests \n "
2022-08-10 23:12:01 +02:00
else :
m = " requests/2.18.4 urllib3/1.23 chardet/3.0.4 certifi/2020.4.5.1 idna/2.7 "
m = [ " https://pypi.org/project/ " + x + " /#files " for x in m . split ( ) ]
m = " \n ERROR: need these: \n " + " \n " . join ( m ) + " \n "
2022-09-05 18:24:18 +02:00
print ( m . format ( sys . executable ) )
sys . exit ( 1 )
2022-06-07 23:08:43 +02:00
2021-09-27 23:28:34 +02:00
# from copyparty/__init__.py
PY2 = sys . version_info [ 0 ] == 2
if PY2 :
from Queue import Queue
2021-10-13 00:03:49 +02:00
from urllib import unquote
from urllib import quote
2021-09-27 23:28:34 +02:00
sys . dont_write_bytecode = True
bytes = str
else :
from queue import Queue
2021-10-13 00:03:49 +02:00
from urllib . parse import unquote_to_bytes as unquote
from urllib . parse import quote_from_bytes as quote
2021-09-27 23:28:34 +02:00
unicode = str
2021-10-01 22:31:24 +02:00
VT100 = platform . system ( ) != " Windows "
2021-09-27 23:28:34 +02:00
2021-09-28 00:14:45 +02:00
req_ses = requests . Session ( )
2021-09-27 23:28:34 +02:00
class File ( object ) :
2021-09-27 23:52:36 +02:00
""" an up2k upload task; represents a single file """
2021-09-27 23:28:34 +02:00
def __init__ ( self , top , rel , size , lmod ) :
2021-10-01 00:33:45 +02:00
self . top = top # type: bytes
self . rel = rel . replace ( b " \\ " , b " / " ) # type: bytes
self . size = size # type: int
self . lmod = lmod # type: float
2021-09-27 23:28:34 +02:00
2021-10-01 00:33:45 +02:00
self . abs = os . path . join ( top , rel ) # type: bytes
self . name = self . rel . split ( b " / " ) [ - 1 ] . decode ( " utf-8 " , " replace " ) # type: str
2021-09-27 23:28:34 +02:00
# set by get_hashlist
2021-10-01 00:33:45 +02:00
self . cids = [ ] # type: list[tuple[str, int, int]] # [ hash, ofs, sz ]
self . kchunks = { } # type: dict[str, tuple[int, int]] # hash: [ ofs, sz ]
2021-09-27 23:28:34 +02:00
# set by handshake
2021-10-01 00:33:45 +02:00
self . ucids = [ ] # type: list[str] # chunks which need to be uploaded
self . wark = None # type: str
self . url = None # type: str
2021-09-27 23:28:34 +02:00
# set by upload
2021-10-01 00:33:45 +02:00
self . up_b = 0 # type: int
self . up_c = 0 # type: int
2021-09-27 23:28:34 +02:00
2022-06-16 01:07:15 +02:00
# t = "size({}) lmod({}) top({}) rel({}) abs({}) name({})\n"
# eprint(t.format(self.size, self.lmod, self.top, self.rel, self.abs, self.name))
2021-09-27 23:28:34 +02:00
class FileSlice ( object ) :
2021-09-27 23:52:36 +02:00
""" file-like object providing a fixed window into a file """
2021-09-27 23:28:34 +02:00
def __init__ ( self , file , cid ) :
2022-06-16 01:07:15 +02:00
# type: (File, str) -> None
2021-10-01 00:33:45 +02:00
2021-09-27 23:28:34 +02:00
self . car , self . len = file . kchunks [ cid ]
self . cdr = self . car + self . len
2021-10-01 00:33:45 +02:00
self . ofs = 0 # type: int
2021-09-27 23:28:34 +02:00
self . f = open ( file . abs , " rb " , 512 * 1024 )
self . f . seek ( self . car )
# https://stackoverflow.com/questions/4359495/what-is-exactly-a-file-like-object-in-python
# IOBase, RawIOBase, BufferedIOBase
funs = " close closed __enter__ __exit__ __iter__ isatty __next__ readable seekable writable "
try :
for fun in funs . split ( ) :
setattr ( self , fun , getattr ( self . f , fun ) )
except :
pass # py27 probably
def tell ( self ) :
return self . ofs
def seek ( self , ofs , wh = 0 ) :
if wh == 1 :
ofs = self . ofs + ofs
elif wh == 2 :
ofs = self . len + ofs # provided ofs is negative
if ofs < 0 :
ofs = 0
elif ofs > = self . len :
ofs = self . len - 1
self . ofs = ofs
self . f . seek ( self . car + ofs )
def read ( self , sz ) :
sz = min ( sz , self . len - self . ofs )
ret = self . f . read ( sz )
self . ofs + = len ( ret )
return ret
2022-08-10 23:12:01 +02:00
class MTHash ( object ) :
def __init__ ( self , cores ) :
self . f = None
self . sz = 0
self . csz = 0
self . omutex = threading . Lock ( )
self . imutex = threading . Lock ( )
self . work_q = Queue ( )
self . done_q = Queue ( )
self . thrs = [ ]
for _ in range ( cores ) :
t = threading . Thread ( target = self . worker )
t . daemon = True
t . start ( )
self . thrs . append ( t )
def hash ( self , f , fsz , chunksz , pcb = None , pcb_opaque = None ) :
with self . omutex :
self . f = f
self . sz = fsz
self . csz = chunksz
chunks = { }
nchunks = int ( math . ceil ( fsz / chunksz ) )
for nch in range ( nchunks ) :
self . work_q . put ( nch )
ex = " "
for nch in range ( nchunks ) :
qe = self . done_q . get ( )
try :
nch , dig , ofs , csz = qe
chunks [ nch ] = [ dig , ofs , csz ]
except :
ex = ex or qe
if pcb :
pcb ( pcb_opaque , chunksz * nch )
if ex :
raise Exception ( ex )
ret = [ ]
for n in range ( nchunks ) :
ret . append ( chunks [ n ] )
self . f = None
self . csz = 0
self . sz = 0
return ret
def worker ( self ) :
while True :
ofs = self . work_q . get ( )
try :
v = self . hash_at ( ofs )
except Exception as ex :
v = str ( ex )
self . done_q . put ( v )
def hash_at ( self , nch ) :
f = self . f
ofs = ofs0 = nch * self . csz
hashobj = hashlib . sha512 ( )
chunk_sz = chunk_rem = min ( self . csz , self . sz - ofs )
while chunk_rem > 0 :
with self . imutex :
f . seek ( ofs )
buf = f . read ( min ( chunk_rem , 1024 * 1024 * 12 ) )
if not buf :
raise Exception ( " EOF at " + str ( ofs ) )
hashobj . update ( buf )
chunk_rem - = len ( buf )
ofs + = len ( buf )
digest = hashobj . digest ( ) [ : 33 ]
digest = base64 . urlsafe_b64encode ( digest ) . decode ( " utf-8 " )
return nch , digest , ofs0 , chunk_sz
2021-10-18 20:35:50 +02:00
_print = print
2021-10-01 00:33:45 +02:00
def eprint ( * a , * * ka ) :
ka [ " file " ] = sys . stderr
2021-10-01 22:31:24 +02:00
ka [ " end " ] = " "
2021-10-01 00:33:45 +02:00
if not PY2 :
ka [ " flush " ] = True
2021-10-18 20:35:50 +02:00
_print ( * a , * * ka )
if PY2 or not VT100 :
2021-10-01 00:33:45 +02:00
sys . stderr . flush ( )
2021-10-18 20:35:50 +02:00
def flushing_print ( * a , * * ka ) :
_print ( * a , * * ka )
if " flush " not in ka :
sys . stdout . flush ( )
if not VT100 :
print = flushing_print
2021-10-01 02:10:03 +02:00
def termsize ( ) :
env = os . environ
def ioctl_GWINSZ ( fd ) :
try :
2022-06-07 23:08:43 +02:00
import fcntl , termios , struct
2021-10-01 02:10:03 +02:00
cr = struct . unpack ( " hh " , fcntl . ioctl ( fd , termios . TIOCGWINSZ , " 1234 " ) )
except :
return
return cr
cr = ioctl_GWINSZ ( 0 ) or ioctl_GWINSZ ( 1 ) or ioctl_GWINSZ ( 2 )
if not cr :
try :
fd = os . open ( os . ctermid ( ) , os . O_RDONLY )
cr = ioctl_GWINSZ ( fd )
os . close ( fd )
except :
pass
if not cr :
try :
cr = ( env [ " LINES " ] , env [ " COLUMNS " ] )
except :
cr = ( 25 , 80 )
return int ( cr [ 1 ] ) , int ( cr [ 0 ] )
class CTermsize ( object ) :
def __init__ ( self ) :
self . ev = False
self . margin = None
self . g = None
self . w , self . h = termsize ( )
try :
signal . signal ( signal . SIGWINCH , self . ev_sig )
except :
return
thr = threading . Thread ( target = self . worker )
thr . daemon = True
thr . start ( )
def worker ( self ) :
while True :
time . sleep ( 0.5 )
if not self . ev :
continue
self . ev = False
self . w , self . h = termsize ( )
if self . margin is not None :
self . scroll_region ( self . margin )
def ev_sig ( self , * a , * * ka ) :
self . ev = True
def scroll_region ( self , margin ) :
self . margin = margin
if margin is None :
self . g = None
eprint ( " \033 [s \033 [r \033 [u " )
else :
self . g = 1 + self . h - margin
2022-06-16 01:07:15 +02:00
t = " {0} \033 [ {1} A " . format ( " \n " * margin , margin )
eprint ( " {0} \033 [s \033 [1; {1} r \033 [u " . format ( t , self . g - 1 ) )
2021-10-01 02:10:03 +02:00
ss = CTermsize ( )
2021-11-16 21:53:00 +01:00
def _scd ( err , top ) :
2021-09-27 23:28:34 +02:00
""" non-recursive listing of directory contents, along with stat() info """
2021-11-16 21:53:00 +01:00
with os . scandir ( top ) as dh :
for fh in dh :
abspath = os . path . join ( top , fh . name )
try :
yield [ abspath , fh . stat ( ) ]
2022-08-13 00:28:08 +02:00
except Exception as ex :
err . append ( ( abspath , str ( ex ) ) )
2021-11-16 21:53:00 +01:00
def _lsd ( err , top ) :
""" non-recursive listing of directory contents, along with stat() info """
for name in os . listdir ( top ) :
abspath = os . path . join ( top , name )
try :
2021-09-27 23:28:34 +02:00
yield [ abspath , os . stat ( abspath ) ]
2022-08-13 00:28:08 +02:00
except Exception as ex :
err . append ( ( abspath , str ( ex ) ) )
2021-09-27 23:28:34 +02:00
2022-09-05 18:24:18 +02:00
if hasattr ( os , " scandir " ) and sys . version_info > ( 3 , 6 ) :
2021-11-16 21:53:00 +01:00
statdir = _scd
else :
statdir = _lsd
2022-08-13 00:28:08 +02:00
def walkdir ( err , top , seen ) :
2021-09-27 23:28:34 +02:00
""" recursive statdir """
2022-08-13 00:28:08 +02:00
atop = os . path . abspath ( os . path . realpath ( top ) )
if atop in seen :
2022-08-13 00:58:49 +02:00
err . append ( ( top , " recursive-symlink " ) )
return
2022-08-13 00:28:08 +02:00
seen = seen [ : ] + [ atop ]
2021-11-16 21:53:00 +01:00
for ap , inf in sorted ( statdir ( err , top ) ) :
2021-09-27 23:28:34 +02:00
if stat . S_ISDIR ( inf . st_mode ) :
2021-11-16 21:53:00 +01:00
try :
2022-08-13 00:28:08 +02:00
for x in walkdir ( err , ap , seen ) :
2021-11-16 21:53:00 +01:00
yield x
2022-08-13 00:28:08 +02:00
except Exception as ex :
err . append ( ( ap , str ( ex ) ) )
2021-09-27 23:28:34 +02:00
else :
yield ap , inf
2021-11-16 21:53:00 +01:00
def walkdirs ( err , tops ) :
2021-09-27 23:28:34 +02:00
""" recursive statdir for a list of tops, yields [top, relpath, stat] """
2021-10-13 00:03:49 +02:00
sep = " {0} " . format ( os . sep ) . encode ( " ascii " )
2021-09-27 23:28:34 +02:00
for top in tops :
2021-10-13 00:03:49 +02:00
if top [ - 1 : ] == sep :
2021-10-29 01:49:40 +02:00
stop = top . rstrip ( sep )
else :
stop = os . path . dirname ( top )
2021-10-12 22:46:33 +02:00
2021-09-27 23:28:34 +02:00
if os . path . isdir ( top ) :
2022-08-13 00:28:08 +02:00
for ap , inf in walkdir ( err , top , [ ] ) :
2021-10-13 00:03:49 +02:00
yield stop , ap [ len ( stop ) : ] . lstrip ( sep ) , inf
2021-09-27 23:28:34 +02:00
else :
d , n = top . rsplit ( sep , 1 )
yield d , n , os . stat ( top )
2021-10-13 00:03:49 +02:00
# mostly from copyparty/util.py
def quotep ( btxt ) :
quot1 = quote ( btxt , safe = b " / " )
if not PY2 :
quot1 = quot1 . encode ( " ascii " )
return quot1 . replace ( b " " , b " + " )
2021-09-27 23:28:34 +02:00
# from copyparty/util.py
def humansize ( sz , terse = False ) :
2021-09-27 23:52:36 +02:00
""" picks a sensible unit for the given extent """
2021-09-27 23:28:34 +02:00
for unit in [ " B " , " KiB " , " MiB " , " GiB " , " TiB " ] :
if sz < 1024 :
break
sz / = 1024.0
ret = " " . join ( [ str ( sz ) [ : 4 ] . rstrip ( " . " ) , unit ] )
if not terse :
return ret
return ret . replace ( " iB " , " " ) . replace ( " " , " " )
# from copyparty/up2k.py
def up2k_chunksize ( filesize ) :
2021-09-27 23:52:36 +02:00
""" gives The correct chunksize for up2k hashing """
2021-09-27 23:28:34 +02:00
chunksize = 1024 * 1024
stepsize = 512 * 1024
while True :
for mul in [ 1 , 2 ] :
nchunks = math . ceil ( filesize * 1.0 / chunksize )
if nchunks < = 256 or chunksize > = 32 * 1024 * 1024 :
return chunksize
chunksize + = stepsize
stepsize * = mul
# mostly from copyparty/up2k.py
2022-08-10 23:12:01 +02:00
def get_hashlist ( file , pcb , mth ) :
# type: (File, any, any) -> None
2021-09-27 23:52:36 +02:00
""" generates the up2k hashlist from file contents, inserts it into `file` """
2021-09-27 23:28:34 +02:00
chunk_sz = up2k_chunksize ( file . size )
file_rem = file . size
file_ofs = 0
ret = [ ]
with open ( file . abs , " rb " , 512 * 1024 ) as f :
2022-08-10 23:12:01 +02:00
if mth and file . size > = 1024 * 512 :
ret = mth . hash ( f , file . size , chunk_sz , pcb , file )
file_rem = 0
2021-09-27 23:28:34 +02:00
while file_rem > 0 :
2022-08-10 23:12:01 +02:00
# same as `hash_at` except for `imutex` / bufsz
2021-09-27 23:28:34 +02:00
hashobj = hashlib . sha512 ( )
chunk_sz = chunk_rem = min ( chunk_sz , file_rem )
while chunk_rem > 0 :
buf = f . read ( min ( chunk_rem , 64 * 1024 ) )
if not buf :
raise Exception ( " EOF at " + str ( f . tell ( ) ) )
hashobj . update ( buf )
chunk_rem - = len ( buf )
digest = hashobj . digest ( ) [ : 33 ]
digest = base64 . urlsafe_b64encode ( digest ) . decode ( " utf-8 " )
ret . append ( [ digest , file_ofs , chunk_sz ] )
file_ofs + = chunk_sz
file_rem - = chunk_sz
2021-10-01 00:33:45 +02:00
if pcb :
pcb ( file , file_ofs )
2021-09-27 23:28:34 +02:00
file . cids = ret
2021-10-02 00:36:41 +02:00
file . kchunks = { }
for k , v1 , v2 in ret :
file . kchunks [ k ] = [ v1 , v2 ]
2021-09-27 23:28:34 +02:00
2021-09-30 19:36:47 +02:00
def handshake ( req_ses , url , file , pw , search ) :
2022-06-07 23:08:43 +02:00
# type: (requests.Session, str, File, any, bool) -> list[str]
2021-09-30 19:36:47 +02:00
"""
performs a handshake with the server ; reply is :
if search , a list of search results
otherwise , a list of chunks to upload
"""
2021-09-27 23:52:36 +02:00
2021-09-27 23:28:34 +02:00
req = {
" hash " : [ x [ 0 ] for x in file . cids ] ,
" name " : file . name ,
" lmod " : file . lmod ,
" size " : file . size ,
}
2021-09-30 19:36:47 +02:00
if search :
req [ " srch " ] = 1
2021-09-27 23:28:34 +02:00
headers = { " Content-Type " : " text/plain " } # wtf ed
if pw :
headers [ " Cookie " ] = " = " . join ( [ " cppwd " , pw ] )
if file . url :
url = file . url
elif b " / " in file . rel :
2021-10-13 00:03:49 +02:00
url + = quotep ( file . rel . rsplit ( b " / " , 1 ) [ 0 ] ) . decode ( " utf-8 " , " replace " )
2021-09-27 23:28:34 +02:00
2021-10-01 22:31:24 +02:00
while True :
try :
r = req_ses . post ( url , headers = headers , json = req )
break
2022-08-09 00:11:34 +02:00
except Exception as ex :
em = str ( ex ) . split ( " SSLError( " ) [ - 1 ]
eprint ( " handshake failed, retrying: {0} \n {1} \n \n " . format ( file . name , em ) )
2021-10-01 22:31:24 +02:00
time . sleep ( 1 )
2021-09-27 23:28:34 +02:00
try :
r = r . json ( )
except :
raise Exception ( r . text )
2021-09-30 19:36:47 +02:00
if search :
2022-08-10 23:12:01 +02:00
return r [ " hits " ] , False
2021-09-30 19:36:47 +02:00
2021-09-27 23:28:34 +02:00
try :
pre , url = url . split ( " :// " )
pre + = " :// "
except :
pre = " "
file . url = pre + url . split ( " / " ) [ 0 ] + r [ " purl " ]
file . name = r [ " name " ]
file . wark = r [ " wark " ]
2022-06-16 17:51:42 +02:00
return r [ " hash " ] , r [ " sprs " ]
2021-09-27 23:28:34 +02:00
2021-09-28 00:14:45 +02:00
def upload ( req_ses , file , cid , pw ) :
# type: (requests.Session, File, str, any) -> None
2021-09-27 23:52:36 +02:00
""" upload one specific chunk, `cid` (a chunk-hash) """
2021-09-27 23:28:34 +02:00
headers = {
" X-Up2k-Hash " : cid ,
" X-Up2k-Wark " : file . wark ,
" Content-Type " : " application/octet-stream " ,
}
if pw :
headers [ " Cookie " ] = " = " . join ( [ " cppwd " , pw ] )
f = FileSlice ( file , cid )
try :
2021-09-28 00:14:45 +02:00
r = req_ses . post ( file . url , headers = headers , data = f )
2021-09-27 23:28:34 +02:00
if not r :
raise Exception ( repr ( r ) )
2021-09-28 00:14:45 +02:00
_ = r . content
2021-09-27 23:28:34 +02:00
finally :
f . f . close ( )
class Daemon ( threading . Thread ) :
2021-10-01 00:33:45 +02:00
def __init__ ( self , * a , * * ka ) :
threading . Thread . __init__ ( self , * a , * * ka )
2021-09-27 23:28:34 +02:00
self . daemon = True
class Ctl ( object ) :
2021-09-27 23:52:36 +02:00
"""
this will be the coordinator which runs everything in parallel
( hashing , handshakes , uploads ) but right now it ' s p dumb
"""
2021-09-27 23:28:34 +02:00
def __init__ ( self , ar ) :
self . ar = ar
ar . files = [
2021-10-12 22:46:33 +02:00
os . path . abspath ( os . path . realpath ( x . encode ( " utf-8 " ) ) )
+ ( x [ - 1 : ] if x [ - 1 : ] == os . sep else " " ) . encode ( " utf-8 " )
for x in ar . files
2021-09-27 23:28:34 +02:00
]
2021-10-01 22:31:24 +02:00
ar . url = ar . url . rstrip ( " / " ) + " / "
if " :// " not in ar . url :
ar . url = " http:// " + ar . url
2021-09-27 23:28:34 +02:00
2021-10-02 00:36:41 +02:00
eprint ( " \n scanning {0} locations \n " . format ( len ( ar . files ) ) )
2021-09-27 23:28:34 +02:00
nfiles = 0
nbytes = 0
2021-11-16 21:53:00 +01:00
err = [ ]
for _ , _ , inf in walkdirs ( err , ar . files ) :
2021-09-27 23:28:34 +02:00
nfiles + = 1
nbytes + = inf . st_size
2021-11-16 21:53:00 +01:00
if err :
2021-11-28 03:38:57 +01:00
eprint ( " \n # failed to access {0} paths: \n " . format ( len ( err ) ) )
2022-08-13 00:28:08 +02:00
for ap , msg in err :
if ar . v :
eprint ( " {0} \n `- {1} \n \n " . format ( ap . decode ( " utf-8 " , " replace " ) , msg ) )
else :
eprint ( ap . decode ( " utf-8 " , " replace " ) + " \n " )
2021-11-16 21:53:00 +01:00
2021-11-28 03:38:57 +01:00
eprint ( " ^ failed to access those {0} paths ^ \n \n " . format ( len ( err ) ) )
2022-08-13 00:28:08 +02:00
if not ar . v :
eprint ( " hint: set -v for detailed error messages \n " )
2021-11-16 21:53:00 +01:00
if not ar . ok :
2022-08-13 00:28:08 +02:00
eprint ( " hint: aborting because --ok is not set \n " )
2021-11-16 21:53:00 +01:00
return
2021-10-02 00:36:41 +02:00
eprint ( " found {0} files, {1} \n \n " . format ( nfiles , humansize ( nbytes ) ) )
2021-09-30 19:36:47 +02:00
self . nfiles = nfiles
self . nbytes = nbytes
2021-09-27 23:28:34 +02:00
if ar . td :
2021-10-31 03:37:31 +01:00
requests . packages . urllib3 . disable_warnings ( )
2021-09-28 00:14:45 +02:00
req_ses . verify = False
2021-09-27 23:28:34 +02:00
if ar . te :
2021-09-28 00:14:45 +02:00
req_ses . verify = ar . te
2021-09-27 23:28:34 +02:00
2021-11-16 21:53:00 +01:00
self . filegen = walkdirs ( [ ] , ar . files )
2021-09-30 19:36:47 +02:00
if ar . safe :
2022-06-07 23:08:43 +02:00
self . _safe ( )
2021-10-01 00:33:45 +02:00
else :
2022-06-07 23:08:43 +02:00
self . hash_f = 0
self . hash_c = 0
self . hash_b = 0
self . up_f = 0
self . up_c = 0
self . up_b = 0
self . up_br = 0
self . hasher_busy = 1
self . handshaker_busy = 0
self . uploader_busy = 0
2022-06-16 17:51:42 +02:00
self . serialized = False
2022-06-07 23:08:43 +02:00
self . t0 = time . time ( )
self . t0_up = None
self . spd = None
self . mutex = threading . Lock ( )
self . q_handshake = Queue ( ) # type: Queue[File]
self . q_recheck = Queue ( ) # type: Queue[File] # partial upload exists [...]
self . q_upload = Queue ( ) # type: Queue[tuple[File, str]]
self . st_hash = [ None , " (idle, starting...) " ] # type: tuple[File, int]
self . st_up = [ None , " (idle, starting...) " ] # type: tuple[File, int]
2022-08-10 23:12:01 +02:00
self . mth = MTHash ( ar . J ) if ar . J > 1 else None
2022-06-07 23:08:43 +02:00
self . _fancy ( )
def _safe ( self ) :
2021-09-30 19:36:47 +02:00
""" minimal basic slow boring fallback codepath """
search = self . ar . s
2021-09-27 23:28:34 +02:00
for nf , ( top , rel , inf ) in enumerate ( self . filegen ) :
file = File ( top , rel , inf . st_size , inf . st_mtime )
upath = file . abs . decode ( " utf-8 " , " replace " )
2021-10-02 00:36:41 +02:00
print ( " {0} {1} \n hash... " . format ( self . nfiles - nf , upath ) )
2022-08-10 23:12:01 +02:00
get_hashlist ( file , None , None )
2021-09-27 23:28:34 +02:00
2021-11-28 03:38:57 +01:00
burl = self . ar . url [ : 12 ] + self . ar . url [ 8 : ] . split ( " / " ) [ 0 ] + " / "
2021-09-27 23:28:34 +02:00
while True :
print ( " hs... " )
2022-06-16 17:51:42 +02:00
hs , _ = handshake ( req_ses , self . ar . url , file , self . ar . a , search )
2021-09-30 19:36:47 +02:00
if search :
if hs :
for hit in hs :
2021-10-13 00:03:49 +02:00
print ( " found: {0} {1} " . format ( burl , hit [ " rp " ] ) )
2021-09-30 19:36:47 +02:00
else :
print ( " NOT found " )
break
file . ucids = hs
if not hs :
2021-09-27 23:28:34 +02:00
break
2021-10-02 00:36:41 +02:00
print ( " {0} {1} " . format ( self . nfiles - nf , upath ) )
2021-09-30 19:36:47 +02:00
ncs = len ( hs )
for nc , cid in enumerate ( hs ) :
2021-10-02 00:36:41 +02:00
print ( " {0} up {1} " . format ( ncs - nc , cid ) )
2021-09-30 19:36:47 +02:00
upload ( req_ses , file , cid , self . ar . a )
2021-09-27 23:28:34 +02:00
print ( " ok! " )
2022-06-07 23:08:43 +02:00
def _fancy ( self ) :
2021-10-01 02:10:03 +02:00
if VT100 :
atexit . register ( self . cleanup_vt100 )
ss . scroll_region ( 3 )
2021-10-01 00:33:45 +02:00
Daemon ( target = self . hasher ) . start ( )
for _ in range ( self . ar . j ) :
Daemon ( target = self . handshaker ) . start ( )
Daemon ( target = self . uploader ) . start ( )
2021-10-01 22:31:24 +02:00
idles = 0
while idles < 3 :
2021-10-01 02:10:03 +02:00
time . sleep ( 0.07 )
2021-10-01 00:33:45 +02:00
with self . mutex :
if (
self . q_handshake . empty ( )
and self . q_upload . empty ( )
and not self . hasher_busy
and not self . handshaker_busy
and not self . uploader_busy
) :
2021-10-01 22:31:24 +02:00
idles + = 1
else :
idles = 0
2021-10-01 00:33:45 +02:00
2021-10-01 02:10:03 +02:00
if VT100 :
maxlen = ss . w - len ( str ( self . nfiles ) ) - 14
2021-10-02 00:36:41 +02:00
txt = " \033 [s \033 [ {0} H " . format ( ss . g )
2021-10-01 22:31:24 +02:00
for y , k , st , f in [
[ 0 , " hash " , self . st_hash , self . hash_f ] ,
[ 1 , " send " , self . st_up , self . up_f ] ,
2021-10-01 02:10:03 +02:00
] :
2021-10-02 00:36:41 +02:00
txt + = " \033 [ {0} H {1} : " . format ( ss . g + y , k )
2021-10-01 02:10:03 +02:00
file , arg = st
if not file :
2021-10-02 00:36:41 +02:00
txt + = " {0} \033 [K " . format ( arg )
2021-10-01 02:10:03 +02:00
else :
if y :
p = 100 * file . up_b / file . size
else :
p = 100 * arg / file . size
name = file . abs . decode ( " utf-8 " , " replace " ) [ - maxlen : ]
2021-10-01 22:31:24 +02:00
if " / " in name :
2021-10-02 00:36:41 +02:00
name = " \033 [36m {0} \033 [0m/ {1} " . format ( * name . rsplit ( " / " , 1 ) )
2021-10-01 02:10:03 +02:00
2022-06-16 01:07:15 +02:00
t = " {0:6.1f} % {1} {2} \033 [K "
txt + = t . format ( p , self . nfiles - f , name )
2021-10-01 02:10:03 +02:00
2021-10-02 00:36:41 +02:00
txt + = " \033 [ {0} H " . format ( ss . g + 2 )
2021-10-01 22:31:24 +02:00
else :
txt = " "
2021-10-01 02:10:03 +02:00
2021-10-01 22:31:24 +02:00
if not self . up_br :
spd = self . hash_b / ( time . time ( ) - self . t0 )
eta = ( self . nbytes - self . hash_b ) / ( spd + 1 )
else :
spd = self . up_br / ( time . time ( ) - self . t0_up )
spd = self . spd = ( self . spd or spd ) * 0.9 + spd * 0.1
eta = ( self . nbytes - self . up_b ) / ( spd + 1 )
2021-10-01 02:10:03 +02:00
2021-10-01 22:31:24 +02:00
spd = humansize ( spd )
eta = str ( datetime . timedelta ( seconds = int ( eta ) ) )
2022-06-07 19:02:52 +02:00
sleft = humansize ( self . nbytes - self . up_b )
nleft = self . nfiles - self . up_f
2021-10-01 22:31:24 +02:00
tail = " \033 [K \033 [u " if VT100 else " \r "
2021-10-01 00:33:45 +02:00
2022-06-16 01:07:15 +02:00
t = " {0} eta @ {1} /s, {2} , {3} # left " . format ( eta , spd , sleft , nleft )
eprint ( txt + " \033 ]0; {0} \033 \\ \r {0} {1} " . format ( t , tail ) )
2021-10-01 02:10:03 +02:00
2021-10-01 22:31:24 +02:00
def cleanup_vt100 ( self ) :
ss . scroll_region ( None )
eprint ( " \033 [J \033 ]0; \033 \\ " )
2021-10-01 02:10:03 +02:00
2021-10-01 22:31:24 +02:00
def cb_hasher ( self , file , ofs ) :
self . st_hash = [ file , ofs ]
2021-10-01 02:10:03 +02:00
2021-10-01 00:33:45 +02:00
def hasher ( self ) :
2021-10-13 00:03:49 +02:00
prd = None
ls = { }
2021-10-01 22:31:24 +02:00
for top , rel , inf in self . filegen :
2021-10-13 00:03:49 +02:00
if self . ar . z :
rd = os . path . dirname ( rel )
if prd != rd :
prd = rd
headers = { }
if self . ar . a :
headers [ " Cookie " ] = " = " . join ( [ " cppwd " , self . ar . a ] )
ls = { }
try :
print ( " ls ~ {0} " . format ( rd . decode ( " utf-8 " , " replace " ) ) )
r = req_ses . get (
self . ar . url . encode ( " utf-8 " ) + quotep ( rd ) + b " ?ls " ,
headers = headers ,
)
for f in r . json ( ) [ " files " ] :
rfn = f [ " href " ] . split ( " ? " ) [ 0 ] . encode ( " utf-8 " , " replace " )
ls [ unquote ( rfn ) ] = f
except :
print ( " mkdir ~ {0} " . format ( rd . decode ( " utf-8 " , " replace " ) ) )
rf = ls . get ( os . path . basename ( rel ) , None )
if rf and rf [ " sz " ] == inf . st_size and abs ( rf [ " ts " ] - inf . st_mtime ) < = 1 :
self . nfiles - = 1
self . nbytes - = inf . st_size
continue
2021-10-01 00:33:45 +02:00
file = File ( top , rel , inf . st_size , inf . st_mtime )
while True :
with self . mutex :
if (
self . hash_b - self . up_b < 1024 * 1024 * 128
and self . hash_c - self . up_c < 64
and (
not self . ar . nh
or (
self . q_upload . empty ( )
and self . q_handshake . empty ( )
and not self . uploader_busy
)
)
) :
break
time . sleep ( 0.05 )
2022-08-10 23:12:01 +02:00
get_hashlist ( file , self . cb_hasher , self . mth )
2021-10-01 00:33:45 +02:00
with self . mutex :
self . hash_f + = 1
self . hash_c + = len ( file . cids )
self . hash_b + = file . size
self . q_handshake . put ( file )
self . hasher_busy = 0
2021-10-01 02:10:03 +02:00
self . st_hash = [ None , " (finished) " ]
2021-10-01 00:33:45 +02:00
def handshaker ( self ) :
search = self . ar . s
q = self . q_handshake
2021-10-13 00:03:49 +02:00
burl = self . ar . url [ : 8 ] + self . ar . url [ 8 : ] . split ( " / " ) [ 0 ] + " / "
2021-10-01 00:33:45 +02:00
while True :
file = q . get ( )
if not file :
if q == self . q_handshake :
q = self . q_recheck
q . put ( None )
continue
self . q_upload . put ( None )
break
with self . mutex :
self . handshaker_busy + = 1
upath = file . abs . decode ( " utf-8 " , " replace " )
try :
2022-06-16 17:51:42 +02:00
hs , sprs = handshake ( req_ses , self . ar . url , file , self . ar . a , search )
2021-10-01 00:33:45 +02:00
except Exception as ex :
if q == self . q_handshake and " <pre>partial upload exists " in str ( ex ) :
self . q_recheck . put ( file )
hs = [ ]
else :
raise
if search :
if hs :
for hit in hs :
2022-06-16 01:07:15 +02:00
t = " found: {0} \n {1} {2} \n "
print ( t . format ( upath , burl , hit [ " rp " ] ) , end = " " )
2021-10-01 00:33:45 +02:00
else :
2021-10-02 00:36:41 +02:00
print ( " NOT found: {0} \n " . format ( upath ) , end = " " )
2021-10-01 00:33:45 +02:00
with self . mutex :
self . up_f + = 1
self . up_c + = len ( file . cids )
self . up_b + = file . size
self . handshaker_busy - = 1
continue
with self . mutex :
2022-06-16 17:51:42 +02:00
if not sprs and not self . serialized :
t = " server filesystem does not support sparse files; serializing uploads \n "
eprint ( t )
self . serialized = True
for _ in range ( self . ar . j - 1 ) :
self . q_upload . put ( None )
2021-10-01 00:33:45 +02:00
if not hs :
# all chunks done
self . up_f + = 1
self . up_c + = len ( file . cids ) - file . up_c
self . up_b + = file . size - file . up_b
2021-10-01 02:10:03 +02:00
if hs and file . up_c :
2021-10-01 00:33:45 +02:00
# some chunks failed
self . up_c - = len ( hs )
file . up_c - = len ( hs )
for cid in hs :
sz = file . kchunks [ cid ] [ 1 ]
self . up_b - = sz
file . up_b - = sz
file . ucids = hs
self . handshaker_busy - = 1
2021-10-01 22:31:24 +02:00
if not hs :
2021-10-13 00:03:49 +02:00
kw = " uploaded " if file . up_b else " found "
print ( " {0} {1} " . format ( kw , upath ) )
2021-10-01 00:33:45 +02:00
for cid in hs :
self . q_upload . put ( [ file , cid ] )
def uploader ( self ) :
while True :
task = self . q_upload . get ( )
if not task :
2021-10-01 02:10:03 +02:00
self . st_up = [ None , " (finished) " ]
2021-10-01 00:33:45 +02:00
break
with self . mutex :
self . uploader_busy + = 1
2021-10-01 22:31:24 +02:00
self . t0_up = self . t0_up or time . time ( )
2021-10-01 00:33:45 +02:00
file , cid = task
2021-10-01 22:31:24 +02:00
try :
upload ( req_ses , file , cid , self . ar . a )
except :
2021-11-28 03:38:57 +01:00
eprint ( " upload failed, retrying: {0} # {1} \n " . format ( file . name , cid [ : 8 ] ) )
2021-10-01 22:31:24 +02:00
pass # handshake will fix it
2021-10-01 00:33:45 +02:00
with self . mutex :
sz = file . kchunks [ cid ] [ 1 ]
file . ucids = [ x for x in file . ucids if x != cid ]
if not file . ucids :
self . q_handshake . put ( file )
2021-10-01 22:31:24 +02:00
self . st_up = [ file , cid ]
2021-10-01 00:33:45 +02:00
file . up_b + = sz
self . up_b + = sz
2021-10-01 22:31:24 +02:00
self . up_br + = sz
2021-10-01 00:33:45 +02:00
file . up_c + = 1
self . up_c + = 1
self . uploader_busy - = 1
2021-09-27 23:28:34 +02:00
2021-10-12 22:46:33 +02:00
class APF ( argparse . ArgumentDefaultsHelpFormatter , argparse . RawDescriptionHelpFormatter ) :
pass
2021-09-27 23:28:34 +02:00
def main ( ) :
time . strptime ( " 19970815 " , " % Y % m %d " ) # python#7980
2021-10-01 22:31:24 +02:00
if not VT100 :
2021-09-27 23:28:34 +02:00
os . system ( " rem " ) # enables colors
2022-08-29 19:24:48 +02:00
cores = ( os . cpu_count ( ) if hasattr ( os , " cpu_count " ) else 0 ) or 2
2022-08-10 23:12:01 +02:00
hcores = min ( cores , 3 ) # 4% faster than 4+ on py3.9 @ r5-4500U
2021-09-27 23:28:34 +02:00
# fmt: off
2021-10-12 22:46:33 +02:00
ap = app = argparse . ArgumentParser ( formatter_class = APF , epilog = """
NOTE :
source file / folder selection uses rsync syntax , meaning that :
" foo " uploads the entire folder to URL / foo /
" foo/ " uploads the CONTENTS of the folder into URL /
""" )
2021-09-27 23:28:34 +02:00
ap . add_argument ( " url " , type = unicode , help = " server url, including destination folder " )
ap . add_argument ( " files " , type = unicode , nargs = " + " , help = " files and/or folders to process " )
2022-08-13 00:28:08 +02:00
ap . add_argument ( " -v " , action = " store_true " , help = " verbose " )
2021-09-27 23:28:34 +02:00
ap . add_argument ( " -a " , metavar = " PASSWORD " , help = " password " )
2021-09-30 19:36:47 +02:00
ap . add_argument ( " -s " , action = " store_true " , help = " file-search (disables upload) " )
2021-11-16 21:53:00 +01:00
ap . add_argument ( " --ok " , action = " store_true " , help = " continue even if some local files are inaccessible " )
2021-10-01 00:33:45 +02:00
ap = app . add_argument_group ( " performance tweaks " )
ap . add_argument ( " -j " , type = int , metavar = " THREADS " , default = 4 , help = " parallel connections " )
2022-08-10 23:12:01 +02:00
ap . add_argument ( " -J " , type = int , metavar = " THREADS " , default = hcores , help = " num cpu-cores to use for hashing; set 0 or 1 for single-core hashing " )
2021-10-01 00:33:45 +02:00
ap . add_argument ( " -nh " , action = " store_true " , help = " disable hashing while uploading " )
ap . add_argument ( " --safe " , action = " store_true " , help = " use simple fallback approach " )
2021-10-13 00:03:49 +02:00
ap . add_argument ( " -z " , action = " store_true " , help = " ZOOMIN ' (skip uploading files if they exist at the destination with the ~same last-modified timestamp, so same as yolo / turbo with date-chk but even faster) " )
2021-10-01 00:33:45 +02:00
ap = app . add_argument_group ( " tls " )
2021-09-27 23:28:34 +02:00
ap . add_argument ( " -te " , metavar = " PEM_FILE " , help = " certificate to expect/verify " )
ap . add_argument ( " -td " , action = " store_true " , help = " disable certificate check " )
# fmt: on
2021-10-01 00:33:45 +02:00
Ctl ( app . parse_args ( ) )
2021-09-27 23:28:34 +02:00
if __name__ == " __main__ " :
main ( )