2021-09-27 23:52:36 +02:00
#!/usr/bin/env python3
2021-09-27 23:28:34 +02:00
from __future__ import print_function , unicode_literals
2025-02-19 21:58:56 +00:00
S_VERSION = " 2.10 "
S_BUILD_DT = " 2025-02-19 "
2023-03-07 22:58:14 +00:00
2021-09-27 23:28:34 +02:00
"""
2023-05-07 15:37:52 +00:00
u2c . py : upload to copyparty
2023-03-07 22:58:14 +00:00
2021 , ed < irc . rizon . net > , MIT - Licensed
2023-05-07 15:37:52 +00:00
https : / / github . com / 9001 / copyparty / blob / hovudstraum / bin / u2c . py
2021-09-27 23:28:34 +02:00
2024-09-22 18:07:36 +00:00
- dependencies : no
2022-12-12 21:59:50 +00:00
- supports python 2.6 , 2.7 , and 3.3 through 3.12
- if something breaks just try again and it ' ll autoresume
2021-09-27 23:28:34 +02:00
"""
2021-10-01 02:10:03 +02:00
import atexit
2021-09-27 23:28:34 +02:00
import base64
2024-09-22 18:07:36 +00:00
import binascii
import datetime
2021-09-27 23:28:34 +02:00
import hashlib
2024-09-22 18:07:36 +00:00
import json
import math
import os
2021-09-27 23:28:34 +02:00
import platform
2024-09-22 18:07:36 +00:00
import re
import signal
import socket
import stat
import sys
2021-09-27 23:28:34 +02:00
import threading
2024-09-22 18:07:36 +00:00
import time
2021-09-27 23:28:34 +02:00
2024-02-17 23:19:11 +00:00
EXE = bool ( getattr ( sys , " frozen " , False ) )
2023-03-07 22:58:14 +00:00
2022-08-10 23:12:01 +02:00
try :
import argparse
except :
m = " \n ERROR: need ' argparse ' ; download it here: \n https://github.com/ThomasWaldmann/argparse/raw/master/argparse.py \n "
print ( m )
raise
2024-09-06 00:31:25 +00:00
2022-09-25 21:31:47 +02:00
PY2 = sys . version_info < ( 3 , )
2024-09-23 17:20:04 +00:00
PY27 = sys . version_info > ( 2 , 7 ) and PY2
PY37 = sys . version_info > ( 3 , 7 )
2021-09-27 23:28:34 +02:00
if PY2 :
2024-09-22 18:07:36 +00:00
import httplib as http_client
2021-09-27 23:28:34 +02:00
from Queue import Queue
2022-12-01 21:44:31 +00:00
from urllib import quote , unquote
2023-04-26 18:46:42 +00:00
from urlparse import urlsplit , urlunsplit
2021-09-27 23:28:34 +02:00
sys . dont_write_bytecode = True
bytes = str
else :
2021-10-13 00:03:49 +02:00
from urllib . parse import quote_from_bytes as quote
2024-09-22 18:07:36 +00:00
from urllib . parse import unquote_to_bytes as unquote
2023-04-26 18:46:42 +00:00
from urllib . parse import urlsplit , urlunsplit
2021-09-27 23:28:34 +02:00
2024-09-22 18:07:36 +00:00
import http . client as http_client
from queue import Queue
2021-09-27 23:28:34 +02:00
unicode = str
2024-10-15 23:01:07 +00:00
WTF8 = " replace " if PY2 else " surrogateescape "
2021-10-01 22:31:24 +02:00
VT100 = platform . system ( ) != " Windows "
2021-09-27 23:28:34 +02:00
2024-09-06 00:31:25 +00:00
try :
UTC = datetime . timezone . utc
except :
TD_ZERO = datetime . timedelta ( 0 )
class _UTC ( datetime . tzinfo ) :
def utcoffset ( self , dt ) :
return TD_ZERO
def tzname ( self , dt ) :
return " UTC "
def dst ( self , dt ) :
return TD_ZERO
UTC = _UTC ( )
2021-09-28 00:14:45 +02:00
2024-09-22 18:07:36 +00:00
try :
_b64etl = bytes . maketrans ( b " +/ " , b " -_ " )
def ub64enc ( bs ) :
x = binascii . b2a_base64 ( bs , newline = False )
return x . translate ( _b64etl )
ub64enc ( b " a " )
except :
ub64enc = base64 . urlsafe_b64encode
class BadAuth ( Exception ) :
pass
2022-11-01 22:18:20 +00:00
class Daemon ( threading . Thread ) :
2024-07-22 20:55:32 +00:00
def __init__ ( self , target , name = None , a = None ) :
2024-05-09 22:28:16 +00:00
threading . Thread . __init__ ( self , name = name )
self . a = a or ( )
self . fun = target
2022-11-01 22:18:20 +00:00
self . daemon = True
self . start ( )
2024-05-09 22:28:16 +00:00
def run ( self ) :
try :
signal . pthread_sigmask ( signal . SIG_BLOCK , [ signal . SIGINT , signal . SIGTERM ] )
except :
pass
self . fun ( * self . a )
2022-11-01 22:18:20 +00:00
2024-09-22 18:07:36 +00:00
class HSQueue ( Queue ) :
def _init ( self , maxsize ) :
from collections import deque
self . q = deque ( )
def _qsize ( self ) :
return len ( self . q )
def _put ( self , item ) :
if item and item . nhs :
self . q . appendleft ( item )
else :
self . q . append ( item )
def _get ( self ) :
return self . q . popleft ( )
class HCli ( object ) :
def __init__ ( self , ar ) :
self . ar = ar
url = urlsplit ( ar . url )
tls = url . scheme . lower ( ) == " https "
try :
addr , port = url . netloc . split ( " : " )
except :
addr = url . netloc
port = 443 if tls else 80
self . addr = addr
self . port = int ( port )
self . tls = tls
self . verify = ar . te or not ar . td
self . conns = [ ]
2024-10-18 16:24:31 +00:00
self . hconns = [ ]
2024-09-22 18:07:36 +00:00
if tls :
import ssl
if not self . verify :
self . ctx = ssl . _create_unverified_context ( )
elif self . verify is True :
self . ctx = None
else :
self . ctx = ssl . SSLContext ( ssl . PROTOCOL_TLS )
self . ctx . load_verify_locations ( self . verify )
self . base_hdrs = {
" Accept " : " */* " ,
" Connection " : " keep-alive " ,
" Host " : url . netloc ,
" Origin " : self . ar . burl ,
" User-Agent " : " u2c/ %s " % ( S_VERSION , ) ,
}
2024-10-18 16:24:31 +00:00
def _connect ( self , timeout ) :
2024-09-23 17:20:04 +00:00
args = { }
if PY37 :
args [ " blocksize " ] = 1048576
2024-09-22 18:07:36 +00:00
if not self . tls :
C = http_client . HTTPConnection
else :
C = http_client . HTTPSConnection
if self . ctx :
args = { " context " : self . ctx }
2024-10-18 16:24:31 +00:00
return C ( self . addr , self . port , timeout = timeout , * * args )
2024-09-22 18:07:36 +00:00
def req ( self , meth , vpath , hdrs , body = None , ctype = None ) :
2024-11-10 17:43:40 +00:00
now = time . time ( )
2024-09-22 18:07:36 +00:00
hdrs . update ( self . base_hdrs )
if self . ar . a :
hdrs [ " PW " ] = self . ar . a
if ctype :
hdrs [ " Content-Type " ] = ctype
if meth == " POST " and CLEN not in hdrs :
hdrs [ CLEN ] = (
0 if not body else body . len if hasattr ( body , " len " ) else len ( body )
)
2024-10-18 16:24:31 +00:00
# large timeout for handshakes (safededup)
conns = self . hconns if ctype == MJ else self . conns
2024-11-10 17:43:40 +00:00
while conns and self . ar . cxp < now - conns [ 0 ] [ 0 ] :
conns . pop ( 0 ) [ 1 ] . close ( )
c = conns . pop ( ) [ 1 ] if conns else self . _connect ( 999 if ctype == MJ else 128 )
2024-09-22 18:07:36 +00:00
try :
c . request ( meth , vpath , body , hdrs )
2024-09-23 17:20:04 +00:00
if PY27 :
rsp = c . getresponse ( buffering = True )
else :
rsp = c . getresponse ( )
2024-09-22 18:07:36 +00:00
data = rsp . read ( )
2024-11-10 17:43:40 +00:00
conns . append ( ( time . time ( ) , c ) )
2024-09-22 18:07:36 +00:00
return rsp . status , data . decode ( " utf-8 " )
2024-11-10 17:43:40 +00:00
except http_client . BadStatusLine :
if self . ar . cxp > 4 :
t = " \n WARNING: --cxp probably too high; reducing from %d to 4 "
print ( t % ( self . ar . cxp , ) )
self . ar . cxp = 4
c . close ( )
raise
2024-09-22 18:07:36 +00:00
except :
c . close ( )
raise
MJ = " application/json "
MO = " application/octet-stream "
CLEN = " Content-Length "
web = None # type: HCli
2025-01-27 01:35:36 +00:00
links = [ ] # type: list[str]
linkmtx = threading . Lock ( )
linkfile = None
2024-09-22 18:07:36 +00:00
2021-09-27 23:28:34 +02:00
class File ( object ) :
2021-09-27 23:52:36 +02:00
""" an up2k upload task; represents a single file """
2021-09-27 23:28:34 +02:00
def __init__ ( self , top , rel , size , lmod ) :
2021-10-01 00:33:45 +02:00
self . top = top # type: bytes
self . rel = rel . replace ( b " \\ " , b " / " ) # type: bytes
self . size = size # type: int
self . lmod = lmod # type: float
2021-09-27 23:28:34 +02:00
2021-10-01 00:33:45 +02:00
self . abs = os . path . join ( top , rel ) # type: bytes
2024-10-15 23:01:07 +00:00
self . name = self . rel . split ( b " / " ) [ - 1 ] . decode ( " utf-8 " , WTF8 ) # type: str
2021-09-27 23:28:34 +02:00
# set by get_hashlist
2021-10-01 00:33:45 +02:00
self . cids = [ ] # type: list[tuple[str, int, int]] # [ hash, ofs, sz ]
self . kchunks = { } # type: dict[str, tuple[int, int]] # hash: [ ofs, sz ]
2024-07-22 20:55:32 +00:00
self . t_hash = 0.0 # type: float
2021-09-27 23:28:34 +02:00
# set by handshake
2022-11-29 22:09:32 +00:00
self . recheck = False # duplicate; redo handshake after all files done
2021-10-01 00:33:45 +02:00
self . ucids = [ ] # type: list[str] # chunks which need to be uploaded
2023-11-30 17:33:07 +00:00
self . wark = " " # type: str
self . url = " " # type: str
2024-07-22 20:55:32 +00:00
self . nhs = 0 # type: int
2021-09-27 23:28:34 +02:00
# set by upload
2024-07-22 20:55:32 +00:00
self . t0_up = 0.0 # type: float
self . t1_up = 0.0 # type: float
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
self . nojoin = 0 # type: int
2021-10-01 00:33:45 +02:00
self . up_b = 0 # type: int
self . up_c = 0 # type: int
2024-07-22 20:55:32 +00:00
self . cd = 0 # type: int
2021-09-27 23:28:34 +02:00
class FileSlice ( object ) :
2021-09-27 23:52:36 +02:00
""" file-like object providing a fixed window into a file """
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
def __init__ ( self , file , cids ) :
2022-06-16 01:07:15 +02:00
# type: (File, str) -> None
2021-10-01 00:33:45 +02:00
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
self . file = file
self . cids = cids
self . car , tlen = file . kchunks [ cids [ 0 ] ]
for cid in cids [ 1 : ] :
ofs , clen = file . kchunks [ cid ]
if ofs != self . car + tlen :
raise Exception ( 9 )
tlen + = clen
2024-10-16 19:29:08 +00:00
self . len = self . tlen = tlen
2021-09-27 23:28:34 +02:00
self . cdr = self . car + self . len
2021-10-01 00:33:45 +02:00
self . ofs = 0 # type: int
2024-10-15 22:30:15 +00:00
self . f = None
self . seek = self . _seek0
self . read = self . _read0
2024-10-16 19:29:08 +00:00
def subchunk ( self , maxsz , nth ) :
if self . tlen < = maxsz :
return - 1
if not nth :
self . car0 = self . car
self . cdr0 = self . cdr
self . car = self . car0 + maxsz * nth
if self . car > = self . cdr0 :
return - 2
self . cdr = self . car + min ( self . cdr0 - self . car , maxsz )
self . len = self . cdr - self . car
self . seek ( 0 )
return nth
def unsub ( self ) :
self . car = self . car0
self . cdr = self . cdr0
self . len = self . tlen
2024-10-15 22:30:15 +00:00
def _open ( self ) :
self . seek = self . _seek
self . read = self . _read
self . f = open ( self . file . abs , " rb " , 512 * 1024 )
2021-09-27 23:28:34 +02:00
self . f . seek ( self . car )
# https://stackoverflow.com/questions/4359495/what-is-exactly-a-file-like-object-in-python
# IOBase, RawIOBase, BufferedIOBase
funs = " close closed __enter__ __exit__ __iter__ isatty __next__ readable seekable writable "
try :
for fun in funs . split ( ) :
setattr ( self , fun , getattr ( self . f , fun ) )
except :
pass # py27 probably
2024-10-15 22:30:15 +00:00
def close ( self , * a , * * ka ) :
return # until _open
2021-09-27 23:28:34 +02:00
def tell ( self ) :
return self . ofs
2024-10-15 22:30:15 +00:00
def _seek ( self , ofs , wh = 0 ) :
assert self . f # !rm
2021-09-27 23:28:34 +02:00
if wh == 1 :
ofs = self . ofs + ofs
elif wh == 2 :
ofs = self . len + ofs # provided ofs is negative
if ofs < 0 :
ofs = 0
elif ofs > = self . len :
ofs = self . len - 1
self . ofs = ofs
self . f . seek ( self . car + ofs )
2024-10-15 22:30:15 +00:00
def _read ( self , sz ) :
assert self . f # !rm
2021-09-27 23:28:34 +02:00
sz = min ( sz , self . len - self . ofs )
ret = self . f . read ( sz )
self . ofs + = len ( ret )
return ret
2024-10-15 22:30:15 +00:00
def _seek0 ( self , ofs , wh = 0 ) :
self . _open ( )
return self . seek ( ofs , wh )
def _read0 ( self , sz ) :
self . _open ( )
return self . read ( sz )
2021-09-27 23:28:34 +02:00
2022-08-10 23:12:01 +02:00
class MTHash ( object ) :
def __init__ ( self , cores ) :
self . f = None
self . sz = 0
self . csz = 0
self . omutex = threading . Lock ( )
self . imutex = threading . Lock ( )
self . work_q = Queue ( )
self . done_q = Queue ( )
self . thrs = [ ]
for _ in range ( cores ) :
2022-10-23 12:05:44 +02:00
self . thrs . append ( Daemon ( self . worker ) )
2022-08-10 23:12:01 +02:00
def hash ( self , f , fsz , chunksz , pcb = None , pcb_opaque = None ) :
with self . omutex :
self . f = f
self . sz = fsz
self . csz = chunksz
chunks = { }
nchunks = int ( math . ceil ( fsz / chunksz ) )
for nch in range ( nchunks ) :
self . work_q . put ( nch )
ex = " "
for nch in range ( nchunks ) :
qe = self . done_q . get ( )
try :
nch , dig , ofs , csz = qe
chunks [ nch ] = [ dig , ofs , csz ]
except :
ex = ex or qe
if pcb :
pcb ( pcb_opaque , chunksz * nch )
if ex :
raise Exception ( ex )
ret = [ ]
for n in range ( nchunks ) :
ret . append ( chunks [ n ] )
self . f = None
self . csz = 0
self . sz = 0
return ret
def worker ( self ) :
while True :
ofs = self . work_q . get ( )
try :
v = self . hash_at ( ofs )
except Exception as ex :
v = str ( ex )
self . done_q . put ( v )
def hash_at ( self , nch ) :
f = self . f
2023-11-30 17:33:07 +00:00
assert f
2022-08-10 23:12:01 +02:00
ofs = ofs0 = nch * self . csz
hashobj = hashlib . sha512 ( )
chunk_sz = chunk_rem = min ( self . csz , self . sz - ofs )
while chunk_rem > 0 :
with self . imutex :
f . seek ( ofs )
buf = f . read ( min ( chunk_rem , 1024 * 1024 * 12 ) )
if not buf :
raise Exception ( " EOF at " + str ( ofs ) )
hashobj . update ( buf )
chunk_rem - = len ( buf )
ofs + = len ( buf )
2024-09-22 18:07:36 +00:00
digest = ub64enc ( hashobj . digest ( ) [ : 33 ] ) . decode ( " utf-8 " )
2022-08-10 23:12:01 +02:00
return nch , digest , ofs0 , chunk_sz
2021-10-18 20:35:50 +02:00
_print = print
2024-09-06 00:31:25 +00:00
def safe_print ( * a , * * ka ) :
ka [ " end " ] = " "
zs = " " . join ( [ unicode ( x ) for x in a ] )
_print ( zs + " \n " , * * ka )
2021-10-01 00:33:45 +02:00
def eprint ( * a , * * ka ) :
ka [ " file " ] = sys . stderr
2021-10-01 22:31:24 +02:00
ka [ " end " ] = " "
2021-10-01 00:33:45 +02:00
if not PY2 :
ka [ " flush " ] = True
2021-10-18 20:35:50 +02:00
_print ( * a , * * ka )
if PY2 or not VT100 :
2021-10-01 00:33:45 +02:00
sys . stderr . flush ( )
2021-10-18 20:35:50 +02:00
def flushing_print ( * a , * * ka ) :
2023-03-08 22:27:13 +00:00
try :
2024-09-06 00:31:25 +00:00
safe_print ( * a , * * ka )
2023-03-08 22:27:13 +00:00
except :
v = " " . join ( str ( x ) for x in a )
v = v . encode ( " ascii " , " replace " ) . decode ( " ascii " )
2024-09-06 00:31:25 +00:00
safe_print ( v , * * ka )
2023-03-08 22:27:13 +00:00
2021-10-18 20:35:50 +02:00
if " flush " not in ka :
sys . stdout . flush ( )
2024-09-06 00:31:25 +00:00
print = safe_print if VT100 else flushing_print
2021-10-18 20:35:50 +02:00
2021-10-01 02:10:03 +02:00
def termsize ( ) :
env = os . environ
def ioctl_GWINSZ ( fd ) :
try :
2024-09-22 18:07:36 +00:00
import fcntl
import struct
import termios
2021-10-01 02:10:03 +02:00
2022-12-11 17:41:10 +00:00
r = struct . unpack ( b " hh " , fcntl . ioctl ( fd , termios . TIOCGWINSZ , b " AAAA " ) )
return r [ : : - 1 ]
2021-10-01 02:10:03 +02:00
except :
2022-12-11 17:41:10 +00:00
return None
2021-10-01 02:10:03 +02:00
cr = ioctl_GWINSZ ( 0 ) or ioctl_GWINSZ ( 1 ) or ioctl_GWINSZ ( 2 )
if not cr :
try :
fd = os . open ( os . ctermid ( ) , os . O_RDONLY )
cr = ioctl_GWINSZ ( fd )
os . close ( fd )
except :
pass
2022-12-11 17:41:10 +00:00
try :
return cr or ( int ( env [ " COLUMNS " ] ) , int ( env [ " LINES " ] ) )
except :
return 80 , 25
2021-10-01 02:10:03 +02:00
class CTermsize ( object ) :
def __init__ ( self ) :
self . ev = False
self . margin = None
self . g = None
self . w , self . h = termsize ( )
try :
signal . signal ( signal . SIGWINCH , self . ev_sig )
except :
return
2022-10-23 12:05:44 +02:00
Daemon ( self . worker )
2021-10-01 02:10:03 +02:00
def worker ( self ) :
while True :
time . sleep ( 0.5 )
if not self . ev :
continue
self . ev = False
self . w , self . h = termsize ( )
if self . margin is not None :
self . scroll_region ( self . margin )
def ev_sig ( self , * a , * * ka ) :
self . ev = True
def scroll_region ( self , margin ) :
self . margin = margin
if margin is None :
self . g = None
eprint ( " \033 [s \033 [r \033 [u " )
else :
self . g = 1 + self . h - margin
2024-09-22 18:07:36 +00:00
t = " %s \033 [ %d A " % ( " \n " * margin , margin )
eprint ( " %s \033 [s \033 [1; %d r \033 [u " % ( t , self . g - 1 ) )
2021-10-01 02:10:03 +02:00
ss = CTermsize ( )
2023-04-26 18:46:42 +00:00
def undns ( url ) :
usp = urlsplit ( url )
hn = usp . hostname
gai = None
2024-07-22 20:55:32 +00:00
eprint ( " resolving host [ %s ] ... " % ( hn , ) )
2023-04-26 18:46:42 +00:00
try :
gai = socket . getaddrinfo ( hn , None )
hn = gai [ 0 ] [ 4 ] [ 0 ]
2023-04-27 19:06:35 +00:00
except KeyboardInterrupt :
raise
2023-04-26 18:46:42 +00:00
except :
2024-09-22 18:07:36 +00:00
t = " \n \033 [31mfailed to resolve upload destination host; \033 [0m \n gai= %r \n "
eprint ( t % ( gai , ) )
2023-04-26 18:46:42 +00:00
raise
if usp . port :
2024-09-22 18:07:36 +00:00
hn = " %s : %s " % ( hn , usp . port )
2023-04-26 18:46:42 +00:00
if usp . username or usp . password :
2024-09-22 18:07:36 +00:00
hn = " %s : %s @ %s " % ( usp . username , usp . password , hn )
2023-04-26 18:46:42 +00:00
usp = usp . _replace ( netloc = hn )
url = urlunsplit ( usp )
2024-07-22 20:55:32 +00:00
eprint ( " %s \n " % ( url , ) )
2023-04-26 18:46:42 +00:00
return url
2021-11-16 21:53:00 +01:00
def _scd ( err , top ) :
2021-09-27 23:28:34 +02:00
""" non-recursive listing of directory contents, along with stat() info """
2021-11-16 21:53:00 +01:00
with os . scandir ( top ) as dh :
for fh in dh :
abspath = os . path . join ( top , fh . name )
try :
yield [ abspath , fh . stat ( ) ]
2022-08-13 00:28:08 +02:00
except Exception as ex :
err . append ( ( abspath , str ( ex ) ) )
2021-11-16 21:53:00 +01:00
def _lsd ( err , top ) :
""" non-recursive listing of directory contents, along with stat() info """
for name in os . listdir ( top ) :
abspath = os . path . join ( top , name )
try :
2021-09-27 23:28:34 +02:00
yield [ abspath , os . stat ( abspath ) ]
2022-08-13 00:28:08 +02:00
except Exception as ex :
err . append ( ( abspath , str ( ex ) ) )
2021-09-27 23:28:34 +02:00
2022-09-05 18:24:18 +02:00
if hasattr ( os , " scandir " ) and sys . version_info > ( 3 , 6 ) :
2021-11-16 21:53:00 +01:00
statdir = _scd
else :
statdir = _lsd
2024-09-23 17:20:04 +00:00
def walkdir ( err , top , excl , seen ) :
2021-09-27 23:28:34 +02:00
""" recursive statdir """
2022-08-13 00:28:08 +02:00
atop = os . path . abspath ( os . path . realpath ( top ) )
if atop in seen :
2022-08-13 00:58:49 +02:00
err . append ( ( top , " recursive-symlink " ) )
return
2022-08-13 00:28:08 +02:00
seen = seen [ : ] + [ atop ]
2021-11-16 21:53:00 +01:00
for ap , inf in sorted ( statdir ( err , top ) ) :
2024-09-23 17:20:04 +00:00
if excl . match ( ap ) :
continue
2021-09-27 23:28:34 +02:00
if stat . S_ISDIR ( inf . st_mode ) :
2024-10-15 22:53:55 +00:00
yield ap , inf
2021-11-16 21:53:00 +01:00
try :
2024-09-23 17:20:04 +00:00
for x in walkdir ( err , ap , excl , seen ) :
2021-11-16 21:53:00 +01:00
yield x
2022-08-13 00:28:08 +02:00
except Exception as ex :
err . append ( ( ap , str ( ex ) ) )
2024-10-15 22:53:55 +00:00
elif stat . S_ISREG ( inf . st_mode ) :
yield ap , inf
else :
err . append ( ( ap , " irregular filetype 0 %o " % ( inf . st_mode , ) ) )
2021-09-27 23:28:34 +02:00
2023-08-15 00:45:12 +02:00
def walkdirs ( err , tops , excl ) :
2021-09-27 23:28:34 +02:00
""" recursive statdir for a list of tops, yields [top, relpath, stat] """
2021-10-13 00:03:49 +02:00
sep = " {0} " . format ( os . sep ) . encode ( " ascii " )
2023-03-08 22:27:13 +00:00
if not VT100 :
2023-08-15 00:45:12 +02:00
excl = excl . replace ( " / " , r " \\ " )
2023-03-08 22:27:13 +00:00
za = [ ]
for td in tops :
try :
ap = os . path . abspath ( os . path . realpath ( td ) )
if td [ - 1 : ] in ( b " \\ " , b " / " ) :
ap + = sep
except :
# maybe cpython #88013 (ok)
ap = td
za . append ( ap )
za = [ x if x . startswith ( b " \\ \\ " ) else b " \\ \\ ? \\ " + x for x in za ]
za = [ x . replace ( b " / " , b " \\ " ) for x in za ]
tops = za
2023-11-11 14:02:01 +00:00
ptn = re . compile ( excl . encode ( " utf-8 " ) or b " \n " , re . I )
2023-08-15 00:45:12 +02:00
2021-09-27 23:28:34 +02:00
for top in tops :
2022-12-12 21:59:50 +00:00
isdir = os . path . isdir ( top )
2021-10-13 00:03:49 +02:00
if top [ - 1 : ] == sep :
2021-10-29 01:49:40 +02:00
stop = top . rstrip ( sep )
2022-12-12 07:16:05 +01:00
yield stop , b " " , os . stat ( stop )
2021-10-29 01:49:40 +02:00
else :
2022-12-12 07:16:05 +01:00
stop , dn = os . path . split ( top )
2022-12-12 21:59:50 +00:00
if isdir :
yield stop , dn , os . stat ( stop )
2021-10-12 22:46:33 +02:00
2022-12-12 21:59:50 +00:00
if isdir :
2024-09-23 17:20:04 +00:00
for ap , inf in walkdir ( err , top , ptn , [ ] ) :
2021-10-13 00:03:49 +02:00
yield stop , ap [ len ( stop ) : ] . lstrip ( sep ) , inf
2021-09-27 23:28:34 +02:00
else :
d , n = top . rsplit ( sep , 1 )
yield d , n , os . stat ( top )
2021-10-13 00:03:49 +02:00
# mostly from copyparty/util.py
def quotep ( btxt ) :
2024-10-15 23:01:07 +00:00
# type: (bytes) -> bytes
2021-10-13 00:03:49 +02:00
quot1 = quote ( btxt , safe = b " / " )
if not PY2 :
quot1 = quot1 . encode ( " ascii " )
2024-10-15 23:01:07 +00:00
return quot1 . replace ( b " " , b " % 20 " ) # type: ignore
2021-10-13 00:03:49 +02:00
2021-09-27 23:28:34 +02:00
# from copyparty/util.py
def humansize ( sz , terse = False ) :
2021-09-27 23:52:36 +02:00
""" picks a sensible unit for the given extent """
2021-09-27 23:28:34 +02:00
for unit in [ " B " , " KiB " , " MiB " , " GiB " , " TiB " ] :
if sz < 1024 :
break
sz / = 1024.0
ret = " " . join ( [ str ( sz ) [ : 4 ] . rstrip ( " . " ) , unit ] )
if not terse :
return ret
return ret . replace ( " iB " , " " ) . replace ( " " , " " )
# from copyparty/up2k.py
def up2k_chunksize ( filesize ) :
2021-09-27 23:52:36 +02:00
""" gives The correct chunksize for up2k hashing """
2021-09-27 23:28:34 +02:00
chunksize = 1024 * 1024
stepsize = 512 * 1024
while True :
for mul in [ 1 , 2 ] :
nchunks = math . ceil ( filesize * 1.0 / chunksize )
2024-10-15 22:39:48 +00:00
if nchunks < = 256 or ( chunksize > = 32 * 1024 * 1024 and nchunks < = 4096 ) :
2021-09-27 23:28:34 +02:00
return chunksize
chunksize + = stepsize
stepsize * = mul
# mostly from copyparty/up2k.py
2022-08-10 23:12:01 +02:00
def get_hashlist ( file , pcb , mth ) :
2023-11-30 17:33:07 +00:00
# type: (File, Any, Any) -> None
2021-09-27 23:52:36 +02:00
""" generates the up2k hashlist from file contents, inserts it into `file` """
2021-09-27 23:28:34 +02:00
chunk_sz = up2k_chunksize ( file . size )
file_rem = file . size
file_ofs = 0
ret = [ ]
with open ( file . abs , " rb " , 512 * 1024 ) as f :
2024-07-22 20:55:32 +00:00
t0 = time . time ( )
2022-08-10 23:12:01 +02:00
if mth and file . size > = 1024 * 512 :
ret = mth . hash ( f , file . size , chunk_sz , pcb , file )
file_rem = 0
2021-09-27 23:28:34 +02:00
while file_rem > 0 :
2022-08-10 23:12:01 +02:00
# same as `hash_at` except for `imutex` / bufsz
2021-09-27 23:28:34 +02:00
hashobj = hashlib . sha512 ( )
chunk_sz = chunk_rem = min ( chunk_sz , file_rem )
while chunk_rem > 0 :
buf = f . read ( min ( chunk_rem , 64 * 1024 ) )
if not buf :
raise Exception ( " EOF at " + str ( f . tell ( ) ) )
hashobj . update ( buf )
chunk_rem - = len ( buf )
2024-09-22 18:07:36 +00:00
digest = ub64enc ( hashobj . digest ( ) [ : 33 ] ) . decode ( " utf-8 " )
2021-09-27 23:28:34 +02:00
ret . append ( [ digest , file_ofs , chunk_sz ] )
file_ofs + = chunk_sz
file_rem - = chunk_sz
2021-10-01 00:33:45 +02:00
if pcb :
pcb ( file , file_ofs )
2024-07-22 20:55:32 +00:00
file . t_hash = time . time ( ) - t0
2021-09-27 23:28:34 +02:00
file . cids = ret
2021-10-02 00:36:41 +02:00
file . kchunks = { }
for k , v1 , v2 in ret :
2024-07-22 20:55:32 +00:00
if k not in file . kchunks :
file . kchunks [ k ] = [ v1 , v2 ]
2021-09-27 23:28:34 +02:00
2025-01-27 01:35:36 +00:00
def printlink ( ar , purl , name , fk ) :
if not name :
url = purl # srch
else :
name = quotep ( name . encode ( " utf-8 " , WTF8 ) ) . decode ( " utf-8 " )
if fk :
url = " %s %s ?k= %s " % ( purl , name , fk )
else :
url = " %s %s " % ( purl , name )
url = " %s / %s " % ( ar . burl , url . lstrip ( " / " ) )
with linkmtx :
if ar . u :
links . append ( url )
if ar . ud :
print ( url )
if linkfile :
zs = " %s \n " % ( url , )
zb = zs . encode ( " utf-8 " , " replace " )
linkfile . write ( zb )
2022-12-11 17:41:10 +00:00
def handshake ( ar , file , search ) :
# type: (argparse.Namespace, File, bool) -> tuple[list[str], bool]
2021-09-30 19:36:47 +02:00
"""
performs a handshake with the server ; reply is :
if search , a list of search results
otherwise , a list of chunks to upload
"""
2021-09-27 23:52:36 +02:00
2021-09-27 23:28:34 +02:00
req = {
" hash " : [ x [ 0 ] for x in file . cids ] ,
" name " : file . name ,
" lmod " : file . lmod ,
" size " : file . size ,
}
2021-09-30 19:36:47 +02:00
if search :
req [ " srch " ] = 1
2024-01-24 20:36:41 +00:00
else :
if ar . touch :
req [ " umod " ] = True
2025-02-19 21:58:56 +00:00
if ar . owo :
req [ " replace " ] = " mt "
elif ar . ow :
2024-01-24 20:36:41 +00:00
req [ " replace " ] = True
2021-09-30 19:36:47 +02:00
2022-12-11 17:41:10 +00:00
file . recheck = False
2021-09-27 23:28:34 +02:00
if file . url :
url = file . url
2024-09-22 18:07:36 +00:00
else :
if b " / " in file . rel :
2024-10-15 23:01:07 +00:00
url = quotep ( file . rel . rsplit ( b " / " , 1 ) [ 0 ] ) . decode ( " utf-8 " )
2024-09-22 18:07:36 +00:00
else :
url = " "
url = ar . vtop + url
2021-09-27 23:28:34 +02:00
2021-10-01 22:31:24 +02:00
while True :
2023-01-13 07:17:41 +00:00
sc = 600
txt = " "
2024-10-13 22:07:41 +00:00
t0 = time . time ( )
2021-10-01 22:31:24 +02:00
try :
2024-07-22 20:55:32 +00:00
zs = json . dumps ( req , separators = ( " , \n " , " : " ) )
2024-09-22 18:07:36 +00:00
sc , txt = web . req ( " POST " , url , { } , zs . encode ( " utf-8 " ) , MJ )
2023-01-13 07:17:41 +00:00
if sc < 400 :
break
2024-09-22 18:07:36 +00:00
raise Exception ( " http %d : %s " % ( sc , txt ) )
2023-01-13 07:17:41 +00:00
2022-08-09 00:11:34 +02:00
except Exception as ex :
2023-01-13 07:17:41 +00:00
em = str ( ex ) . split ( " SSLError( " ) [ - 1 ] . split ( " \n URL: " ) [ 0 ] . strip ( )
2021-10-01 22:31:24 +02:00
2023-03-05 19:45:50 +00:00
if (
sc == 422
or " <pre>partial upload exists at a different " in txt
or " <pre>source file busy; please try again " in txt
) :
2023-01-13 07:17:41 +00:00
file . recheck = True
return [ ] , False
elif sc == 409 or " <pre>upload rejected, file already exists " in txt :
return [ ] , False
2024-09-22 18:07:36 +00:00
elif sc == 403 :
print ( " \n ERROR: login required, or wrong password: \n %s " % ( txt , ) )
raise BadAuth ( )
2022-11-29 22:09:32 +00:00
2024-10-13 22:07:41 +00:00
t = " handshake failed, retrying: %s \n t0= %.3f t1= %.3f td= %.3f \n %s \n \n "
now = time . time ( )
eprint ( t % ( file . name , t0 , now , now - t0 , em ) )
2023-11-11 14:46:00 +00:00
time . sleep ( ar . cd )
2022-11-29 22:09:32 +00:00
2021-09-27 23:28:34 +02:00
try :
2024-09-22 18:07:36 +00:00
r = json . loads ( txt )
2021-09-27 23:28:34 +02:00
except :
2024-09-22 18:07:36 +00:00
raise Exception ( txt )
2021-09-27 23:28:34 +02:00
2021-09-30 19:36:47 +02:00
if search :
2025-01-27 01:35:36 +00:00
if ar . uon and r [ " hits " ] :
printlink ( ar , r [ " hits " ] [ 0 ] [ " rp " ] , " " , " " )
2022-08-10 23:12:01 +02:00
return r [ " hits " ] , False
2021-09-30 19:36:47 +02:00
2024-10-15 23:01:07 +00:00
file . url = quotep ( r [ " purl " ] . encode ( " utf-8 " , WTF8 ) ) . decode ( " utf-8 " )
2021-09-27 23:28:34 +02:00
file . name = r [ " name " ]
file . wark = r [ " wark " ]
2025-01-27 01:35:36 +00:00
if ar . uon and not r [ " hash " ] :
printlink ( ar , file . url , r [ " name " ] , r . get ( " fk " ) )
2022-06-16 17:51:42 +02:00
return r [ " hash " ] , r [ " sprs " ]
2021-09-27 23:28:34 +02:00
2024-10-16 19:29:08 +00:00
def upload ( fsl , stats , maxsz ) :
# type: (FileSlice, str, int) -> None
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
""" upload a range of file data, defined by one or more `cid` (chunk-hash) """
2021-09-27 23:52:36 +02:00
2024-08-08 18:24:18 +00:00
ctxt = fsl . cids [ 0 ]
if len ( fsl . cids ) > 1 :
n = 192 / / len ( fsl . cids )
n = 9 if n > 9 else 2 if n < 2 else n
zsl = [ zs [ : n ] for zs in fsl . cids [ 1 : ] ]
ctxt + = " , %d , %s " % ( n , " " . join ( zsl ) )
2021-09-27 23:28:34 +02:00
headers = {
2024-08-08 18:24:18 +00:00
" X-Up2k-Hash " : ctxt ,
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
" X-Up2k-Wark " : fsl . file . wark ,
2021-09-27 23:28:34 +02:00
}
2023-03-11 21:39:56 +00:00
if stats :
headers [ " X-Up2k-Stat " ] = stats
2024-10-16 19:29:08 +00:00
nsub = 0
2021-09-27 23:28:34 +02:00
try :
2024-10-16 19:29:08 +00:00
while nsub != - 1 :
nsub = fsl . subchunk ( maxsz , nsub )
if nsub == - 2 :
return
if nsub > = 0 :
headers [ " X-Up2k-Subc " ] = str ( maxsz * nsub )
headers . pop ( CLEN , None )
nsub + = 1
sc , txt = web . req ( " POST " , fsl . file . url , headers , fsl , MO )
if sc == 400 :
if (
" already being written " in txt
or " already got that " in txt
or " only sibling chunks " in txt
) :
fsl . file . nojoin = 1
if sc > = 400 :
raise Exception ( " http %s : %s " % ( sc , txt ) )
2021-09-27 23:28:34 +02:00
finally :
2024-10-18 16:24:31 +00:00
if fsl . f :
fsl . f . close ( )
if nsub != - 1 :
fsl . unsub ( )
2021-09-27 23:28:34 +02:00
class Ctl ( object ) :
2021-09-27 23:52:36 +02:00
"""
2022-12-11 17:41:10 +00:00
the coordinator which runs everything in parallel
( hashing , handshakes , uploads )
2021-09-27 23:52:36 +02:00
"""
2022-12-13 18:56:40 +00:00
def _scan ( self ) :
ar = self . ar
2024-09-22 18:07:36 +00:00
eprint ( " \n scanning %d locations \n " % ( len ( ar . files ) , ) )
2021-09-27 23:28:34 +02:00
nfiles = 0
nbytes = 0
2021-11-16 21:53:00 +01:00
err = [ ]
2023-08-15 00:45:12 +02:00
for _ , _ , inf in walkdirs ( err , ar . files , ar . x ) :
2022-12-11 17:41:10 +00:00
if stat . S_ISDIR ( inf . st_mode ) :
continue
2021-09-27 23:28:34 +02:00
nfiles + = 1
nbytes + = inf . st_size
2021-11-16 21:53:00 +01:00
if err :
2024-09-22 18:07:36 +00:00
eprint ( " \n # failed to access %d paths: \n " % ( len ( err ) , ) )
2022-08-13 00:28:08 +02:00
for ap , msg in err :
if ar . v :
2024-09-22 18:07:36 +00:00
eprint ( " %s \n `- %s \n \n " % ( ap . decode ( " utf-8 " , " replace " ) , msg ) )
2022-08-13 00:28:08 +02:00
else :
eprint ( ap . decode ( " utf-8 " , " replace " ) + " \n " )
2021-11-16 21:53:00 +01:00
2024-09-22 18:07:36 +00:00
eprint ( " ^ failed to access those %d paths ^ \n \n " % ( len ( err ) , ) )
2022-08-13 00:28:08 +02:00
if not ar . v :
eprint ( " hint: set -v for detailed error messages \n " )
2021-11-16 21:53:00 +01:00
if not ar . ok :
2022-08-13 00:28:08 +02:00
eprint ( " hint: aborting because --ok is not set \n " )
2021-11-16 21:53:00 +01:00
return
2024-09-22 18:07:36 +00:00
eprint ( " found %d files, %s \n \n " % ( nfiles , humansize ( nbytes ) ) )
2022-12-13 18:56:40 +00:00
return nfiles , nbytes
def __init__ ( self , ar , stats = None ) :
2023-04-20 20:40:09 +00:00
self . ok = False
2024-09-22 18:07:36 +00:00
self . panik = 0
2023-11-11 14:30:46 +00:00
self . errs = 0
2022-12-13 18:56:40 +00:00
self . ar = ar
self . stats = stats or self . _scan ( )
if not self . stats :
return
self . nfiles , self . nbytes = self . stats
2023-08-15 00:45:12 +02:00
self . filegen = walkdirs ( [ ] , ar . files , ar . x )
2023-03-11 21:39:56 +00:00
self . recheck = [ ] # type: list[File]
2021-09-30 19:36:47 +02:00
if ar . safe :
2022-06-07 23:08:43 +02:00
self . _safe ( )
2021-10-01 00:33:45 +02:00
else :
2024-07-22 20:55:32 +00:00
self . at_hash = 0.0
self . at_up = 0.0
2024-07-26 19:28:47 +00:00
self . at_upr = 0.0
2022-06-07 23:08:43 +02:00
self . hash_f = 0
self . hash_c = 0
self . hash_b = 0
self . up_f = 0
self . up_c = 0
2024-10-13 22:07:07 +00:00
self . up_b = 0 # num bytes handled
self . up_br = 0 # num bytes actually transferred
2022-06-07 23:08:43 +02:00
self . uploader_busy = 0
2022-06-16 17:51:42 +02:00
self . serialized = False
2022-06-07 23:08:43 +02:00
self . t0 = time . time ( )
self . t0_up = None
self . spd = None
2023-03-11 21:39:56 +00:00
self . eta = " 99:99:99 "
2022-06-07 23:08:43 +02:00
self . mutex = threading . Lock ( )
2024-09-06 00:31:25 +00:00
self . exit_cond = threading . Condition ( )
self . uploader_alive = ar . j
self . handshaker_alive = ar . j
2024-09-22 18:07:36 +00:00
self . q_handshake = HSQueue ( ) # type: Queue[File]
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
self . q_upload = Queue ( ) # type: Queue[FileSlice]
2022-06-07 23:08:43 +02:00
self . st_hash = [ None , " (idle, starting...) " ] # type: tuple[File, int]
self . st_up = [ None , " (idle, starting...) " ] # type: tuple[File, int]
2022-08-10 23:12:01 +02:00
self . mth = MTHash ( ar . J ) if ar . J > 1 else None
2022-06-07 23:08:43 +02:00
self . _fancy ( )
2023-11-11 14:30:46 +00:00
self . ok = not self . errs
2023-04-20 20:40:09 +00:00
2022-06-07 23:08:43 +02:00
def _safe ( self ) :
2021-09-30 19:36:47 +02:00
""" minimal basic slow boring fallback codepath """
search = self . ar . s
2024-09-22 18:07:36 +00:00
nf = 0
for top , rel , inf in self . filegen :
2022-12-11 17:41:10 +00:00
if stat . S_ISDIR ( inf . st_mode ) or not rel :
continue
2024-09-22 18:07:36 +00:00
nf + = 1
2021-09-27 23:28:34 +02:00
file = File ( top , rel , inf . st_size , inf . st_mtime )
upath = file . abs . decode ( " utf-8 " , " replace " )
2024-09-22 18:07:36 +00:00
print ( " %d %s \n hash... " % ( self . nfiles - nf , upath ) )
2022-08-10 23:12:01 +02:00
get_hashlist ( file , None , None )
2021-09-27 23:28:34 +02:00
while True :
print ( " hs... " )
2024-09-22 18:07:36 +00:00
try :
hs , _ = handshake ( self . ar , file , search )
except BadAuth :
sys . exit ( 1 )
2021-09-30 19:36:47 +02:00
if search :
if hs :
for hit in hs :
2024-09-22 18:07:36 +00:00
print ( " found: %s / %s " % ( self . ar . burl , hit [ " rp " ] ) )
2021-09-30 19:36:47 +02:00
else :
print ( " NOT found " )
break
file . ucids = hs
if not hs :
2021-09-27 23:28:34 +02:00
break
2024-09-22 18:07:36 +00:00
print ( " %d %s " % ( self . nfiles - nf , upath ) )
2021-09-30 19:36:47 +02:00
ncs = len ( hs )
for nc , cid in enumerate ( hs ) :
2024-09-22 18:07:36 +00:00
print ( " %d up %s " % ( ncs - nc , cid ) )
stats = " %d /0/0/ %d " % ( nf , self . nfiles - nf )
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
fslice = FileSlice ( file , [ cid ] )
2024-10-16 19:29:08 +00:00
upload ( fslice , stats , self . ar . szm )
2021-09-27 23:28:34 +02:00
print ( " ok! " )
2022-11-29 22:09:32 +00:00
if file . recheck :
self . recheck . append ( file )
if not self . recheck :
return
2024-07-22 20:55:32 +00:00
eprint ( " finalizing %d duplicate files \n " % ( len ( self . recheck ) , ) )
2022-11-29 22:09:32 +00:00
for file in self . recheck :
2024-09-22 18:07:36 +00:00
handshake ( self . ar , file , False )
2021-09-27 23:28:34 +02:00
2022-06-07 23:08:43 +02:00
def _fancy ( self ) :
2024-12-06 18:44:05 +00:00
atexit . register ( self . cleanup_vt100 )
2023-01-17 23:29:51 +00:00
if VT100 and not self . ar . ns :
2021-10-01 02:10:03 +02:00
ss . scroll_region ( 3 )
2022-10-23 12:05:44 +02:00
Daemon ( self . hasher )
2021-10-01 00:33:45 +02:00
for _ in range ( self . ar . j ) :
2022-10-23 12:05:44 +02:00
Daemon ( self . handshaker )
Daemon ( self . uploader )
2021-10-01 00:33:45 +02:00
2024-12-06 18:44:05 +00:00
last_sp = - 1
2024-09-06 00:31:25 +00:00
while True :
with self . exit_cond :
self . exit_cond . wait ( 0.07 )
2024-09-22 18:07:36 +00:00
if self . panik :
sys . exit ( 1 )
2021-10-01 00:33:45 +02:00
with self . mutex :
2024-09-06 00:31:25 +00:00
if not self . handshaker_alive and not self . uploader_alive :
break
st_hash = self . st_hash [ : ]
st_up = self . st_up [ : ]
2021-10-01 00:33:45 +02:00
2023-01-17 23:29:51 +00:00
if VT100 and not self . ar . ns :
2021-10-01 02:10:03 +02:00
maxlen = ss . w - len ( str ( self . nfiles ) ) - 14
2024-09-22 18:07:36 +00:00
txt = " \033 [s \033 [ %d H " % ( ss . g , )
2021-10-01 22:31:24 +02:00
for y , k , st , f in [
2024-09-06 00:31:25 +00:00
[ 0 , " hash " , st_hash , self . hash_f ] ,
[ 1 , " send " , st_up , self . up_f ] ,
2021-10-01 02:10:03 +02:00
] :
2024-09-22 18:07:36 +00:00
txt + = " \033 [ %d H %s : " % ( ss . g + y , k )
2021-10-01 02:10:03 +02:00
file , arg = st
if not file :
2024-09-22 18:07:36 +00:00
txt + = " %s \033 [K " % ( arg , )
2021-10-01 02:10:03 +02:00
else :
if y :
p = 100 * file . up_b / file . size
else :
p = 100 * arg / file . size
name = file . abs . decode ( " utf-8 " , " replace " ) [ - maxlen : ]
2021-10-01 22:31:24 +02:00
if " / " in name :
2024-09-22 18:07:36 +00:00
name = " \033 [36m %s \033 [0m/ %s " % tuple ( name . rsplit ( " / " , 1 ) )
2021-10-01 02:10:03 +02:00
2024-09-22 18:07:36 +00:00
txt + = " %6.1f %% %d %s \033 [K " % ( p , self . nfiles - f , name )
2021-10-01 02:10:03 +02:00
2024-09-22 18:07:36 +00:00
txt + = " \033 [ %d H " % ( ss . g + 2 , )
2021-10-01 22:31:24 +02:00
else :
txt = " "
2021-10-01 02:10:03 +02:00
2024-12-06 18:44:05 +00:00
if not VT100 : # OSC9;4 (taskbar-progress)
sp = int ( self . up_b * 100 / self . nbytes ) or 1
if last_sp != sp :
last_sp = sp
txt + = " \033 ]9;4;1; %d \033 \\ " % ( sp , )
2021-10-01 22:31:24 +02:00
if not self . up_br :
2024-02-17 23:19:11 +00:00
spd = self . hash_b / ( ( time . time ( ) - self . t0 ) or 1 )
eta = ( self . nbytes - self . hash_b ) / ( spd or 1 )
2021-10-01 22:31:24 +02:00
else :
2024-02-17 23:19:11 +00:00
spd = self . up_br / ( ( time . time ( ) - self . t0_up ) or 1 )
2021-10-01 22:31:24 +02:00
spd = self . spd = ( self . spd or spd ) * 0.9 + spd * 0.1
2024-02-17 23:19:11 +00:00
eta = ( self . nbytes - self . up_b ) / ( spd or 1 )
2021-10-01 02:10:03 +02:00
2021-10-01 22:31:24 +02:00
spd = humansize ( spd )
2023-03-11 21:39:56 +00:00
self . eta = str ( datetime . timedelta ( seconds = int ( eta ) ) )
2024-11-23 19:58:25 +00:00
if eta > 2591999 :
self . eta = self . eta . split ( " , " ) [ 0 ] # truncate HH:MM:SS
2022-06-07 19:02:52 +02:00
sleft = humansize ( self . nbytes - self . up_b )
nleft = self . nfiles - self . up_f
2023-01-17 23:29:51 +00:00
tail = " \033 [K \033 [u " if VT100 and not self . ar . ns else " \r "
2021-10-01 00:33:45 +02:00
2024-12-19 18:02:29 +00:00
t = " %s eta @ %s /s, %s , %d # left " % ( self . eta , spd , sleft , nleft )
2024-12-06 18:44:05 +00:00
if not self . hash_b :
t = " now hashing... "
2022-06-16 01:07:15 +02:00
eprint ( txt + " \033 ]0; {0} \033 \\ \r {0} {1} " . format ( t , tail ) )
2021-10-01 02:10:03 +02:00
2024-10-13 22:06:02 +00:00
if self . ar . wlist :
self . at_hash = time . time ( ) - self . t0
2024-07-26 19:28:47 +00:00
if self . hash_b and self . at_hash :
spd = humansize ( self . hash_b / self . at_hash )
eprint ( " \n hasher: %.2f sec, %s /s \n " % ( self . at_hash , spd ) )
2024-10-13 22:07:07 +00:00
if self . up_br and self . at_up :
spd = humansize ( self . up_br / self . at_up )
2024-07-22 20:55:32 +00:00
eprint ( " upload: %.2f sec, %s /s \n " % ( self . at_up , spd ) )
2022-11-29 22:09:32 +00:00
if not self . recheck :
return
2024-07-22 20:55:32 +00:00
eprint ( " finalizing %d duplicate files \n " % ( len ( self . recheck ) , ) )
2022-11-29 22:09:32 +00:00
for file in self . recheck :
2022-12-11 17:41:10 +00:00
handshake ( self . ar , file , False )
2022-11-29 22:09:32 +00:00
2021-10-01 22:31:24 +02:00
def cleanup_vt100 ( self ) :
2024-12-06 18:44:05 +00:00
if VT100 :
ss . scroll_region ( None )
else :
eprint ( " \033 ]9;4;0 \033 \\ " )
2021-10-01 22:31:24 +02:00
eprint ( " \033 [J \033 ]0; \033 \\ " )
2021-10-01 02:10:03 +02:00
2021-10-01 22:31:24 +02:00
def cb_hasher ( self , file , ofs ) :
self . st_hash = [ file , ofs ]
2021-10-01 02:10:03 +02:00
2021-10-01 00:33:45 +02:00
def hasher ( self ) :
2024-01-27 18:49:25 +00:00
ptn = re . compile ( self . ar . x . encode ( " utf-8 " ) , re . I ) if self . ar . x else None
sep = " {0} " . format ( os . sep ) . encode ( " ascii " )
2021-10-13 00:03:49 +02:00
prd = None
ls = { }
2021-10-01 22:31:24 +02:00
for top , rel , inf in self . filegen :
2022-12-11 17:41:10 +00:00
isdir = stat . S_ISDIR ( inf . st_mode )
if self . ar . z or self . ar . drd :
rd = rel if isdir else os . path . dirname ( rel )
2024-12-02 22:52:39 +00:00
srd = rd . decode ( " utf-8 " , " replace " ) . replace ( " \\ " , " / " ) . rstrip ( " / " )
if srd :
srd + = " / "
2021-10-13 00:03:49 +02:00
if prd != rd :
prd = rd
ls = { }
try :
2022-12-11 17:41:10 +00:00
print ( " ls ~ {0} " . format ( srd ) )
2024-09-22 18:07:36 +00:00
zt = (
self . ar . vtop ,
2024-10-15 23:01:07 +00:00
quotep ( rd . replace ( b " \\ " , b " / " ) ) . decode ( " utf-8 " ) ,
2024-09-22 18:07:36 +00:00
)
sc , txt = web . req ( " GET " , " %s %s ?ls<&dots " % zt , { } )
if sc > = 400 :
raise Exception ( " http %s " % ( sc , ) )
2022-12-13 18:56:40 +00:00
2024-09-22 18:07:36 +00:00
j = json . loads ( txt )
2022-12-11 17:41:10 +00:00
for f in j [ " dirs " ] + j [ " files " ] :
rfn = f [ " href " ] . split ( " ? " ) [ 0 ] . rstrip ( " / " )
2024-10-15 23:01:07 +00:00
ls [ unquote ( rfn . encode ( " utf-8 " , WTF8 ) ) ] = f
2022-12-12 21:59:50 +00:00
except Exception as ex :
print ( " mkdir ~ {0} ( {1} ) " . format ( srd , ex ) )
2022-12-11 17:41:10 +00:00
if self . ar . drd :
dp = os . path . join ( top , rd )
2024-11-02 17:38:17 +00:00
try :
lnodes = set ( os . listdir ( dp ) )
except :
lnodes = list ( ls ) # fs eio; don't delete
2024-01-27 18:49:25 +00:00
if ptn :
zs = dp . replace ( sep , b " / " ) . rstrip ( b " / " ) + b " / "
zls = [ zs + x for x in lnodes ]
zls = [ x for x in zls if not ptn . match ( x ) ]
lnodes = [ x . split ( b " / " ) [ - 1 ] for x in zls ]
bnames = [ x for x in ls if x not in lnodes and x != b " .hist " ]
2023-12-08 00:54:57 +00:00
vpath = self . ar . url . split ( " :// " ) [ - 1 ] . split ( " / " , 1 ) [ - 1 ]
2024-10-15 23:01:07 +00:00
names = [ x . decode ( " utf-8 " , WTF8 ) for x in bnames ]
2024-12-02 22:52:39 +00:00
locs = [ vpath + srd + x for x in names ]
2023-12-08 00:54:57 +00:00
while locs :
req = locs
while req :
2024-12-02 22:52:39 +00:00
print ( " DELETING ~ %s # %s " % ( srd , len ( req ) ) )
2024-09-22 18:07:36 +00:00
body = json . dumps ( req ) . encode ( " utf-8 " )
sc , txt = web . req (
" POST " , self . ar . url + " ?delete " , { } , body , MJ
)
if sc == 413 and " json 2big " in txt :
2023-12-08 00:54:57 +00:00
print ( " (delete request too big; slicing...) " )
req = req [ : len ( req ) / / 2 ]
continue
2024-09-22 18:07:36 +00:00
elif sc > = 400 :
t = " delete request failed: %s %s "
raise Exception ( t % ( sc , txt ) )
2023-12-08 00:54:57 +00:00
break
locs = locs [ len ( req ) : ]
2022-12-11 17:41:10 +00:00
if isdir :
continue
2021-10-13 00:03:49 +02:00
2022-12-11 17:41:10 +00:00
if self . ar . z :
2021-10-13 00:03:49 +02:00
rf = ls . get ( os . path . basename ( rel ) , None )
2022-12-11 17:41:10 +00:00
if rf and rf [ " sz " ] == inf . st_size and abs ( rf [ " ts " ] - inf . st_mtime ) < = 2 :
2021-10-13 00:03:49 +02:00
self . nfiles - = 1
self . nbytes - = inf . st_size
continue
2021-10-01 00:33:45 +02:00
file = File ( top , rel , inf . st_size , inf . st_mtime )
while True :
with self . mutex :
if (
2022-11-04 20:28:05 +00:00
self . hash_f - self . up_f == 1
or (
self . hash_b - self . up_b < 1024 * 1024 * 1024
and self . hash_c - self . up_c < 512
)
) and (
not self . ar . nh
or (
self . q_upload . empty ( )
and self . q_handshake . empty ( )
and not self . uploader_busy
2021-10-01 00:33:45 +02:00
)
) :
break
time . sleep ( 0.05 )
2022-08-10 23:12:01 +02:00
get_hashlist ( file , self . cb_hasher , self . mth )
2021-10-01 00:33:45 +02:00
with self . mutex :
self . hash_f + = 1
self . hash_c + = len ( file . cids )
self . hash_b + = file . size
2024-09-06 00:31:25 +00:00
if self . ar . wlist :
self . up_f = self . hash_f
self . up_c = self . hash_c
self . up_b = self . hash_b
if self . ar . wlist :
2024-10-13 22:06:02 +00:00
vp = file . rel . decode ( " utf-8 " )
if self . ar . chs :
zsl = [
" %s %d %d " % ( zsii [ 0 ] , n , zsii [ 1 ] )
for n , zsii in enumerate ( file . cids )
]
print ( " chs: %s \n %s " % ( vp , " \n " . join ( zsl ) ) )
2025-01-21 22:04:20 +00:00
zsl = [ self . ar . wsalt , str ( file . size ) ] + [ x [ 0 ] for x in file . cids ]
2024-09-06 00:31:25 +00:00
zb = hashlib . sha512 ( " \n " . join ( zsl ) . encode ( " utf-8 " ) ) . digest ( ) [ : 33 ]
2024-09-22 18:07:36 +00:00
wark = ub64enc ( zb ) . decode ( " utf-8 " )
2024-09-06 00:31:25 +00:00
if self . ar . jw :
print ( " %s %s " % ( wark , vp ) )
else :
zd = datetime . datetime . fromtimestamp ( file . lmod , UTC )
dt = " %04d - %02d - %02d %02d : %02d : %02d " % (
zd . year ,
zd . month ,
zd . day ,
zd . hour ,
zd . minute ,
zd . second ,
)
print ( " %s %12d %s %s " % ( dt , file . size , wark , vp ) )
continue
2021-10-01 00:33:45 +02:00
self . q_handshake . put ( file )
2021-10-01 02:10:03 +02:00
self . st_hash = [ None , " (finished) " ]
2024-09-06 00:31:25 +00:00
self . _check_if_done ( )
def _check_if_done ( self ) :
with self . mutex :
if self . nfiles - self . up_f :
return
for _ in range ( self . ar . j ) :
self . q_handshake . put ( None )
2021-10-01 00:33:45 +02:00
def handshaker ( self ) :
search = self . ar . s
while True :
2022-11-29 22:09:32 +00:00
file = self . q_handshake . get ( )
2021-10-01 00:33:45 +02:00
if not file :
2024-09-06 00:31:25 +00:00
with self . mutex :
self . handshaker_alive - = 1
2021-10-01 00:33:45 +02:00
self . q_upload . put ( None )
2024-09-06 00:31:25 +00:00
return
2021-10-01 00:33:45 +02:00
2024-10-13 22:07:07 +00:00
chunksz = up2k_chunksize ( file . size )
2021-10-01 00:33:45 +02:00
upath = file . abs . decode ( " utf-8 " , " replace " )
2023-03-08 22:27:13 +00:00
if not VT100 :
2023-04-26 18:46:42 +00:00
upath = upath . lstrip ( " \\ ? " )
2023-03-08 22:27:13 +00:00
2023-11-11 14:30:46 +00:00
file . nhs + = 1
if file . nhs > 32 :
print ( " ERROR: giving up on file %s " % ( upath ) )
self . errs + = 1
continue
2023-11-11 14:46:00 +00:00
while time . time ( ) < file . cd :
time . sleep ( 0.1 )
2024-09-22 18:07:36 +00:00
try :
hs , sprs = handshake ( self . ar , file , search )
except BadAuth :
self . panik = 1
break
2021-10-01 00:33:45 +02:00
if search :
if hs :
for hit in hs :
2024-09-22 18:07:36 +00:00
print ( " found: %s \n %s / %s " % ( upath , self . ar . burl , hit [ " rp " ] ) )
2021-10-01 00:33:45 +02:00
else :
2024-09-06 00:31:25 +00:00
print ( " NOT found: {0} " . format ( upath ) )
2021-10-01 00:33:45 +02:00
with self . mutex :
self . up_f + = 1
self . up_c + = len ( file . cids )
self . up_b + = file . size
2024-09-06 00:31:25 +00:00
self . _check_if_done ( )
2021-10-01 00:33:45 +02:00
continue
2022-11-29 22:09:32 +00:00
if file . recheck :
self . recheck . append ( file )
2021-10-01 00:33:45 +02:00
with self . mutex :
2022-11-29 22:09:32 +00:00
if hs and not sprs and not self . serialized :
2022-06-16 17:51:42 +02:00
t = " server filesystem does not support sparse files; serializing uploads \n "
eprint ( t )
self . serialized = True
for _ in range ( self . ar . j - 1 ) :
self . q_upload . put ( None )
2021-10-01 00:33:45 +02:00
if not hs :
# all chunks done
self . up_f + = 1
self . up_c + = len ( file . cids ) - file . up_c
self . up_b + = file . size - file . up_b
2022-12-11 17:41:10 +00:00
if not file . recheck :
self . up_done ( file )
2021-10-01 02:10:03 +02:00
if hs and file . up_c :
2021-10-01 00:33:45 +02:00
# some chunks failed
self . up_c - = len ( hs )
file . up_c - = len ( hs )
for cid in hs :
sz = file . kchunks [ cid ] [ 1 ]
2024-10-13 22:07:07 +00:00
self . up_br - = sz
2021-10-01 00:33:45 +02:00
self . up_b - = sz
file . up_b - = sz
2024-10-13 22:07:07 +00:00
if hs and not file . up_b :
# first hs of this file; is this an upload resume?
file . up_b = chunksz * max ( 0 , len ( file . kchunks ) - len ( hs ) )
2021-10-01 00:33:45 +02:00
file . ucids = hs
2021-10-01 22:31:24 +02:00
if not hs :
2024-07-22 20:55:32 +00:00
self . at_hash + = file . t_hash
if self . ar . spd :
if VT100 :
c1 = " \033 [36m "
c2 = " \033 [0m "
else :
c1 = c2 = " "
spd_h = humansize ( file . size / file . t_hash , True )
2024-10-13 22:07:07 +00:00
if file . up_c :
2024-07-26 19:28:47 +00:00
t_up = file . t1_up - file . t0_up
2024-07-22 20:55:32 +00:00
spd_u = humansize ( file . size / t_up , True )
t = " uploaded %s %s (h: %.2f s, %s /s,up: %.2f s, %s /s) %s "
print ( t % ( upath , c1 , file . t_hash , spd_h , t_up , spd_u , c2 ) )
else :
t = " found %s %s ( %.2f s, %s /s) %s "
print ( t % ( upath , c1 , file . t_hash , spd_h , c2 ) )
else :
2024-10-13 22:07:07 +00:00
kw = " uploaded " if file . up_c else " found "
2024-07-22 20:55:32 +00:00
print ( " {0} {1} " . format ( kw , upath ) )
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
2024-09-06 00:31:25 +00:00
self . _check_if_done ( )
continue
2024-10-16 19:29:08 +00:00
njoin = self . ar . sz / / chunksz
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
cs = hs [ : ]
while cs :
fsl = FileSlice ( file , cs [ : 1 ] )
try :
if file . nojoin :
raise Exception ( )
2024-07-26 19:28:47 +00:00
for n in range ( 2 , min ( len ( cs ) , njoin + 1 ) ) :
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
fsl = FileSlice ( file , cs [ : n ] )
except :
pass
2024-07-22 20:55:32 +00:00
cs = cs [ len ( fsl . cids ) : ]
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
self . q_upload . put ( fsl )
2021-10-01 00:33:45 +02:00
def uploader ( self ) :
while True :
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
fsl = self . q_upload . get ( )
if not fsl :
2024-09-06 00:31:25 +00:00
done = False
with self . mutex :
self . uploader_alive - = 1
if not self . uploader_alive :
done = not self . handshaker_alive
self . st_up = [ None , " (finished) " ]
if done :
with self . exit_cond :
self . exit_cond . notify_all ( )
return
2021-10-01 00:33:45 +02:00
2024-07-22 20:55:32 +00:00
file = fsl . file
cids = fsl . cids
2021-10-01 00:33:45 +02:00
with self . mutex :
2024-07-26 19:28:47 +00:00
if not self . uploader_busy :
self . at_upr = time . time ( )
2021-10-01 00:33:45 +02:00
self . uploader_busy + = 1
2024-07-22 20:55:32 +00:00
if not file . t0_up :
file . t0_up = time . time ( )
if not self . t0_up :
self . t0_up = file . t0_up
2021-10-01 00:33:45 +02:00
2024-01-10 23:20:42 +01:00
stats = " %d / %d / %d / %d %d / %d %s " % (
2023-03-11 21:39:56 +00:00
self . up_f ,
len ( self . recheck ) ,
self . uploader_busy ,
self . nfiles - self . up_f ,
2024-01-10 23:20:42 +01:00
self . nbytes / / ( 1024 * 1024 ) ,
( self . nbytes - self . up_b ) / / ( 1024 * 1024 ) ,
2023-03-11 21:39:56 +00:00
self . eta ,
)
2021-10-01 22:31:24 +02:00
try :
2024-10-16 19:29:08 +00:00
upload ( fsl , stats , self . ar . szm )
2023-03-23 23:47:41 +00:00
except Exception as ex :
2024-07-22 20:55:32 +00:00
t = " upload failed, retrying: %s # %s + %d ( %s ) \n "
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
eprint ( t % ( file . name , cids [ 0 ] [ : 8 ] , len ( cids ) - 1 , ex ) )
2023-11-11 14:46:00 +00:00
file . cd = time . time ( ) + self . ar . cd
2022-12-11 17:41:10 +00:00
# handshake will fix it
2021-10-01 00:33:45 +02:00
with self . mutex :
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
sz = fsl . len
file . ucids = [ x for x in file . ucids if x not in cids ]
2021-10-01 00:33:45 +02:00
if not file . ucids :
2024-07-22 20:55:32 +00:00
file . t1_up = time . time ( )
2021-10-01 00:33:45 +02:00
self . q_handshake . put ( file )
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
self . st_up = [ file , cids [ 0 ] ]
2021-10-01 00:33:45 +02:00
file . up_b + = sz
self . up_b + = sz
2021-10-01 22:31:24 +02:00
self . up_br + = sz
2021-10-01 00:33:45 +02:00
file . up_c + = 1
self . up_c + = 1
self . uploader_busy - = 1
2024-07-26 19:28:47 +00:00
if not self . uploader_busy :
self . at_up + = time . time ( ) - self . at_upr
2021-10-01 00:33:45 +02:00
2022-12-11 17:41:10 +00:00
def up_done ( self , file ) :
if self . ar . dl :
os . unlink ( file . abs )
2021-09-27 23:28:34 +02:00
2021-10-12 22:46:33 +02:00
class APF ( argparse . ArgumentDefaultsHelpFormatter , argparse . RawDescriptionHelpFormatter ) :
pass
2021-09-27 23:28:34 +02:00
def main ( ) :
2025-01-27 01:35:36 +00:00
global web , linkfile
2024-09-22 18:07:36 +00:00
2021-09-27 23:28:34 +02:00
time . strptime ( " 19970815 " , " % Y % m %d " ) # python#7980
2024-09-22 18:07:36 +00:00
" " . encode ( " idna " ) # python#29288
2021-10-01 22:31:24 +02:00
if not VT100 :
2021-09-27 23:28:34 +02:00
os . system ( " rem " ) # enables colors
2022-08-29 19:24:48 +02:00
cores = ( os . cpu_count ( ) if hasattr ( os , " cpu_count " ) else 0 ) or 2
2022-08-10 23:12:01 +02:00
hcores = min ( cores , 3 ) # 4% faster than 4+ on py3.9 @ r5-4500U
2024-10-13 22:07:41 +00:00
ver = " {0} , v {1} " . format ( S_BUILD_DT , S_VERSION )
2023-03-07 22:58:14 +00:00
if " --version " in sys . argv :
print ( ver )
return
2023-03-23 23:47:41 +00:00
sys . argv = [ x for x in sys . argv if x != " --ws " ]
2021-09-27 23:28:34 +02:00
# fmt: off
2024-09-22 18:07:36 +00:00
ap = app = argparse . ArgumentParser ( formatter_class = APF , description = " copyparty up2k uploader / filesearch tool " + ver , epilog = """
2021-10-12 22:46:33 +02:00
NOTE :
source file / folder selection uses rsync syntax , meaning that :
" foo " uploads the entire folder to URL / foo /
" foo/ " uploads the CONTENTS of the folder into URL /
""" )
2021-09-27 23:28:34 +02:00
ap . add_argument ( " url " , type = unicode , help = " server url, including destination folder " )
ap . add_argument ( " files " , type = unicode , nargs = " + " , help = " files and/or folders to process " )
2022-08-13 00:28:08 +02:00
ap . add_argument ( " -v " , action = " store_true " , help = " verbose " )
2024-06-01 22:26:47 +00:00
ap . add_argument ( " -a " , metavar = " PASSWD " , help = " password or $filepath " )
2021-09-30 19:36:47 +02:00
ap . add_argument ( " -s " , action = " store_true " , help = " file-search (disables upload) " )
2024-08-22 20:03:25 +00:00
ap . add_argument ( " -x " , type = unicode , metavar = " REGEX " , action = " append " , help = " skip file if filesystem-abspath matches REGEX (option can be repeated), example: ' .*/ \\ .hist/.* ' " )
2021-11-16 21:53:00 +01:00
ap . add_argument ( " --ok " , action = " store_true " , help = " continue even if some local files are inaccessible " )
2024-01-24 20:36:41 +00:00
ap . add_argument ( " --touch " , action = " store_true " , help = " if last-modified timestamps differ, push local to server (need write+delete perms) " )
2024-04-20 16:36:10 +00:00
ap . add_argument ( " --ow " , action = " store_true " , help = " overwrite existing files instead of autorenaming " )
2025-02-19 21:58:56 +00:00
ap . add_argument ( " --owo " , action = " store_true " , help = " overwrite existing files if server-file is older " )
2024-07-22 20:55:32 +00:00
ap . add_argument ( " --spd " , action = " store_true " , help = " print speeds for each file " )
2023-03-07 22:58:14 +00:00
ap . add_argument ( " --version " , action = " store_true " , help = " show version and exit " )
2023-01-17 23:29:51 +00:00
2025-01-27 01:35:36 +00:00
ap = app . add_argument_group ( " print links " )
ap . add_argument ( " -u " , action = " store_true " , help = " print list of download-links after all uploads finished " )
ap . add_argument ( " -ud " , action = " store_true " , help = " print download-link after each upload finishes " )
ap . add_argument ( " -uf " , type = unicode , metavar = " PATH " , help = " print list of download-links to file " )
2022-12-12 21:59:50 +00:00
ap = app . add_argument_group ( " compatibility " )
2022-12-11 22:46:21 +00:00
ap . add_argument ( " --cls " , action = " store_true " , help = " clear screen before start " )
2023-04-27 19:06:35 +00:00
ap . add_argument ( " --rh " , type = int , metavar = " TRIES " , default = 0 , help = " resolve server hostname before upload (good for buggy networks, but TLS certs will break) " )
2022-12-11 17:41:10 +00:00
ap = app . add_argument_group ( " folder sync " )
ap . add_argument ( " --dl " , action = " store_true " , help = " delete local files after uploading " )
2024-04-20 16:36:10 +00:00
ap . add_argument ( " --dr " , action = " store_true " , help = " delete remote files which don ' t exist locally (implies --ow) " )
2022-12-11 17:41:10 +00:00
ap . add_argument ( " --drd " , action = " store_true " , help = " delete remote files during upload instead of afterwards; reduces peak disk space usage, but will reupload instead of detecting renames " )
2024-09-06 00:31:25 +00:00
ap = app . add_argument_group ( " file-ID calculator; enable with url ' - ' to list warks (file identifiers) instead of upload/search " )
ap . add_argument ( " --wsalt " , type = unicode , metavar = " S " , default = " hunter2 " , help = " salt to use when creating warks; must match server config " )
2024-10-13 22:06:02 +00:00
ap . add_argument ( " --chs " , action = " store_true " , help = " verbose (print the hash/offset of each chunk in each file) " )
2024-09-06 00:31:25 +00:00
ap . add_argument ( " --jw " , action = " store_true " , help = " just identifier+filepath, not mtime/size too " )
2021-10-01 00:33:45 +02:00
ap = app . add_argument_group ( " performance tweaks " )
2024-06-01 22:26:47 +00:00
ap . add_argument ( " -j " , type = int , metavar = " CONNS " , default = 2 , help = " parallel connections " )
ap . add_argument ( " -J " , type = int , metavar = " CORES " , default = hcores , help = " num cpu-cores to use for hashing; set 0 or 1 for single-core hashing " )
add chunk stitching; twice as fast long-distance uploads:
rather than sending each file chunk as a separate HTTP request,
sibling chunks will now be fused together into larger HTTP POSTs
which results in unreasonably huge speed boosts on some routes
( `2.6x` from Norway to US-East, `1.6x` from US-West to Finland )
the `x-up2k-hash` request header now takes a comma-separated list
of chunk hashes, which must all be sibling chunks, resulting in
one large consecutive range of file data as the post body
a new global-option `--u2sz`, default `1,64,96`, sets the target
request size as 64 MiB, allowing the settings ui to specify any
value between 1 and 96 MiB, which is cloudflare's max value
this does not cause any issues for resumable uploads; thanks to the
streaming HTTP POST parser, each chunk will be verified and written
to disk as they arrive, meaning only the untransmitted chunks will
have to be resent in the event of a connection drop -- of course
assuming there are no misconfigured WAFs or caching-proxies
the previous up2k approach of uploading each chunk in a separate HTTP
POST was inefficient in many real-world scenarios, mainly due to TCP
window-scaling behaving erratically in some IXPs / along some routes
a particular link from Norway to Virginia,US is unusably slow for
the first 4 MiB, only reaching optimal speeds after 100 MiB, and
then immediately resets the scale when the request has been sent;
connection reuse does not help in this case
on this route, the basic-uploader was somehow faster than up2k
with 6 parallel uploads; only time i've seen this
2024-07-21 23:35:37 +00:00
ap . add_argument ( " --sz " , type = int , metavar = " MiB " , default = 64 , help = " try to make each POST this big " )
2024-10-16 19:29:08 +00:00
ap . add_argument ( " --szm " , type = int , metavar = " MiB " , default = 96 , help = " max size of each POST (default is cloudflare max) " )
2021-10-01 00:33:45 +02:00
ap . add_argument ( " -nh " , action = " store_true " , help = " disable hashing while uploading " )
2023-08-15 00:45:12 +02:00
ap . add_argument ( " -ns " , action = " store_true " , help = " no status panel (for slow consoles and macos) " )
2024-11-10 17:43:40 +00:00
ap . add_argument ( " --cxp " , type = float , metavar = " SEC " , default = 57 , help = " assume http connections expired after SEConds " )
2023-11-11 14:46:00 +00:00
ap . add_argument ( " --cd " , type = float , metavar = " SEC " , default = 5 , help = " delay before reattempting a failed handshake/upload " )
2021-10-01 00:33:45 +02:00
ap . add_argument ( " --safe " , action = " store_true " , help = " use simple fallback approach " )
2021-10-13 00:03:49 +02:00
ap . add_argument ( " -z " , action = " store_true " , help = " ZOOMIN ' (skip uploading files if they exist at the destination with the ~same last-modified timestamp, so same as yolo / turbo with date-chk but even faster) " )
2022-12-11 17:41:10 +00:00
2021-10-01 00:33:45 +02:00
ap = app . add_argument_group ( " tls " )
2024-06-01 22:26:47 +00:00
ap . add_argument ( " -te " , metavar = " PATH " , help = " path to ca.pem or cert.pem to expect/verify " )
2021-09-27 23:28:34 +02:00
ap . add_argument ( " -td " , action = " store_true " , help = " disable certificate check " )
# fmt: on
2023-03-07 22:58:14 +00:00
try :
ar = app . parse_args ( )
finally :
if EXE and not sys . argv [ 1 : ] :
2023-04-26 23:00:55 +00:00
eprint ( " *** hit enter to exit *** " )
2023-03-07 22:58:14 +00:00
try :
input ( )
except :
pass
2024-12-06 18:44:05 +00:00
# msys2 doesn't uncygpath absolute paths with whitespace
if not VT100 :
zsl = [ ]
for fn in ar . files :
if re . search ( " ^/[a-z]/ " , fn ) :
fn = r " %s : \ %s " % ( fn [ 1 : 2 ] , fn [ 3 : ] )
zsl . append ( fn . replace ( " / " , " \\ " ) )
ar . files = zsl
fok = [ ]
fng = [ ]
for fn in ar . files :
if os . path . exists ( fn ) :
fok . append ( fn )
elif VT100 :
fng . append ( fn )
else :
# windows leaves glob-expansion to the invoked process... okayyy let's get to work
from glob import glob
fns = glob ( fn )
if fns :
fok . extend ( fns )
else :
fng . append ( fn )
if fng :
t = " some files/folders were not found: \n %s "
raise Exception ( t % ( " \n " . join ( fng ) , ) )
ar . files = fok
2022-12-11 17:41:10 +00:00
if ar . drd :
ar . dr = True
2024-04-20 16:36:10 +00:00
if ar . dr :
ar . ow = True
2024-10-16 19:29:08 +00:00
ar . sz * = 1024 * 1024
ar . szm * = 1024 * 1024
2024-08-22 20:03:25 +00:00
ar . x = " | " . join ( ar . x or [ ] )
2024-09-06 00:31:25 +00:00
setattr ( ar , " wlist " , ar . url == " - " )
2025-01-27 01:35:36 +00:00
setattr ( ar , " uon " , ar . u or ar . ud or ar . uf )
if ar . uf :
linkfile = open ( ar . uf , " wb " )
2024-09-06 00:31:25 +00:00
for k in " dl dr drd wlist " . split ( ) :
2022-12-11 17:41:10 +00:00
errs = [ ]
if ar . safe and getattr ( ar , k ) :
errs . append ( k )
if errs :
raise Exception ( " --safe is incompatible with " + str ( errs ) )
ar . files = [
os . path . abspath ( os . path . realpath ( x . encode ( " utf-8 " ) ) )
2023-03-08 22:27:13 +00:00
+ ( x [ - 1 : ] if x [ - 1 : ] in ( " \\ " , " / " ) else " " ) . encode ( " utf-8 " )
2022-12-11 17:41:10 +00:00
for x in ar . files
]
2024-09-22 18:07:36 +00:00
# urlsplit needs scheme;
zs = ar . url . rstrip ( " / " ) + " / "
if " :// " not in zs :
zs = " http:// " + zs
ar . url = zs
url = urlsplit ( zs )
ar . burl = " %s :// %s " % ( url . scheme , url . netloc )
ar . vtop = url . path
2024-07-22 20:55:32 +00:00
2024-06-01 22:26:47 +00:00
if " https:// " in ar . url . lower ( ) :
try :
2024-09-22 18:07:36 +00:00
import ssl
import zipfile
2024-06-01 22:26:47 +00:00
except :
t = " ERROR: https is not available for some reason; please use http "
print ( " \n \n %s \n \n " % ( t , ) )
raise
2022-12-11 17:41:10 +00:00
2022-12-20 13:28:48 +00:00
if ar . a and ar . a . startswith ( " $ " ) :
fn = ar . a [ 1 : ]
2023-03-11 21:39:56 +00:00
print ( " reading password from file [ {0} ] " . format ( fn ) )
2022-12-20 13:28:48 +00:00
with open ( fn , " rb " ) as f :
ar . a = f . read ( ) . decode ( " utf-8 " ) . strip ( )
2023-04-27 19:06:35 +00:00
for n in range ( ar . rh ) :
try :
ar . url = undns ( ar . url )
break
except KeyboardInterrupt :
raise
except :
if n > ar . rh - 2 :
raise
2023-04-26 18:46:42 +00:00
2022-12-11 22:46:21 +00:00
if ar . cls :
2023-10-06 17:50:35 +00:00
eprint ( " \033 [H \033 [2J \033 [3J " , end = " " )
2022-12-11 17:41:10 +00:00
2024-09-22 18:07:36 +00:00
web = HCli ( ar )
2022-12-12 21:59:50 +00:00
ctl = Ctl ( ar )
2022-12-11 17:41:10 +00:00
2023-04-20 20:40:09 +00:00
if ar . dr and not ar . drd and ctl . ok :
2022-12-13 18:56:40 +00:00
print ( " \n pass 2/2: delete " )
2022-12-11 17:41:10 +00:00
ar . drd = True
ar . z = True
2023-04-20 20:40:09 +00:00
ctl = Ctl ( ar , ctl . stats )
2025-01-27 01:35:36 +00:00
if links :
print ( )
print ( " \n " . join ( links ) )
if linkfile :
linkfile . close ( )
2023-11-11 14:30:46 +00:00
if ctl . errs :
print ( " WARNING: %d errors " % ( ctl . errs ) )
2023-04-20 20:40:09 +00:00
sys . exit ( 0 if ctl . ok else 1 )
2021-09-27 23:28:34 +02:00
if __name__ == " __main__ " :
main ( )