Files
copyparty/bin/up2k.py

965 lines
29 KiB
Python
Raw Normal View History

2021-09-27 23:52:36 +02:00
#!/usr/bin/env python3
2021-09-27 23:28:34 +02:00
from __future__ import print_function, unicode_literals
"""
up2k.py: upload to copyparty
2022-09-05, v0.19, ed <irc.rizon.net>, MIT-Licensed
2021-09-27 23:28:34 +02:00
https://github.com/9001/copyparty/blob/hovudstraum/bin/up2k.py
- dependencies: requests
- supports python 2.6, 2.7, and 3.3 through 3.11
2021-09-27 23:28:34 +02:00
- almost zero error-handling
- but if something breaks just try again and it'll autoresume
"""
import os
import sys
import stat
import math
import time
2021-10-01 02:10:03 +02:00
import atexit
import signal
2021-09-27 23:28:34 +02:00
import base64
import hashlib
import platform
import threading
import datetime
2021-09-27 23:28:34 +02:00
2022-08-10 23:12:01 +02:00
try:
import argparse
except:
m = "\n ERROR: need 'argparse'; download it here:\n https://github.com/ThomasWaldmann/argparse/raw/master/argparse.py\n"
print(m)
raise
try:
import requests
except ImportError:
2022-08-10 23:12:01 +02:00
if sys.version_info > (2, 7):
m = "\nERROR: need 'requests'; please run this command:\n {0} -m pip install --user requests\n"
2022-08-10 23:12:01 +02:00
else:
m = "requests/2.18.4 urllib3/1.23 chardet/3.0.4 certifi/2020.4.5.1 idna/2.7"
m = [" https://pypi.org/project/" + x + "/#files" for x in m.split()]
m = "\n ERROR: need these:\n" + "\n".join(m) + "\n"
print(m.format(sys.executable))
sys.exit(1)
2022-06-07 23:08:43 +02:00
2021-09-27 23:28:34 +02:00
# from copyparty/__init__.py
PY2 = sys.version_info[0] == 2
if PY2:
from Queue import Queue
2021-10-13 00:03:49 +02:00
from urllib import unquote
from urllib import quote
2021-09-27 23:28:34 +02:00
sys.dont_write_bytecode = True
bytes = str
else:
from queue import Queue
2021-10-13 00:03:49 +02:00
from urllib.parse import unquote_to_bytes as unquote
from urllib.parse import quote_from_bytes as quote
2021-09-27 23:28:34 +02:00
unicode = str
VT100 = platform.system() != "Windows"
2021-09-27 23:28:34 +02:00
req_ses = requests.Session()
2021-09-27 23:28:34 +02:00
class File(object):
2021-09-27 23:52:36 +02:00
"""an up2k upload task; represents a single file"""
2021-09-27 23:28:34 +02:00
def __init__(self, top, rel, size, lmod):
2021-10-01 00:33:45 +02:00
self.top = top # type: bytes
self.rel = rel.replace(b"\\", b"/") # type: bytes
self.size = size # type: int
self.lmod = lmod # type: float
2021-09-27 23:28:34 +02:00
2021-10-01 00:33:45 +02:00
self.abs = os.path.join(top, rel) # type: bytes
self.name = self.rel.split(b"/")[-1].decode("utf-8", "replace") # type: str
2021-09-27 23:28:34 +02:00
# set by get_hashlist
2021-10-01 00:33:45 +02:00
self.cids = [] # type: list[tuple[str, int, int]] # [ hash, ofs, sz ]
self.kchunks = {} # type: dict[str, tuple[int, int]] # hash: [ ofs, sz ]
2021-09-27 23:28:34 +02:00
# set by handshake
2021-10-01 00:33:45 +02:00
self.ucids = [] # type: list[str] # chunks which need to be uploaded
self.wark = None # type: str
self.url = None # type: str
2021-09-27 23:28:34 +02:00
# set by upload
2021-10-01 00:33:45 +02:00
self.up_b = 0 # type: int
self.up_c = 0 # type: int
2021-09-27 23:28:34 +02:00
2022-06-16 01:07:15 +02:00
# t = "size({}) lmod({}) top({}) rel({}) abs({}) name({})\n"
# eprint(t.format(self.size, self.lmod, self.top, self.rel, self.abs, self.name))
2021-09-27 23:28:34 +02:00
class FileSlice(object):
2021-09-27 23:52:36 +02:00
"""file-like object providing a fixed window into a file"""
2021-09-27 23:28:34 +02:00
def __init__(self, file, cid):
2022-06-16 01:07:15 +02:00
# type: (File, str) -> None
2021-10-01 00:33:45 +02:00
2021-09-27 23:28:34 +02:00
self.car, self.len = file.kchunks[cid]
self.cdr = self.car + self.len
2021-10-01 00:33:45 +02:00
self.ofs = 0 # type: int
2021-09-27 23:28:34 +02:00
self.f = open(file.abs, "rb", 512 * 1024)
self.f.seek(self.car)
# https://stackoverflow.com/questions/4359495/what-is-exactly-a-file-like-object-in-python
# IOBase, RawIOBase, BufferedIOBase
funs = "close closed __enter__ __exit__ __iter__ isatty __next__ readable seekable writable"
try:
for fun in funs.split():
setattr(self, fun, getattr(self.f, fun))
except:
pass # py27 probably
def tell(self):
return self.ofs
def seek(self, ofs, wh=0):
if wh == 1:
ofs = self.ofs + ofs
elif wh == 2:
ofs = self.len + ofs # provided ofs is negative
if ofs < 0:
ofs = 0
elif ofs >= self.len:
ofs = self.len - 1
self.ofs = ofs
self.f.seek(self.car + ofs)
def read(self, sz):
sz = min(sz, self.len - self.ofs)
ret = self.f.read(sz)
self.ofs += len(ret)
return ret
2022-08-10 23:12:01 +02:00
class MTHash(object):
def __init__(self, cores):
self.f = None
self.sz = 0
self.csz = 0
self.omutex = threading.Lock()
self.imutex = threading.Lock()
self.work_q = Queue()
self.done_q = Queue()
self.thrs = []
for _ in range(cores):
t = threading.Thread(target=self.worker)
t.daemon = True
t.start()
self.thrs.append(t)
def hash(self, f, fsz, chunksz, pcb=None, pcb_opaque=None):
with self.omutex:
self.f = f
self.sz = fsz
self.csz = chunksz
chunks = {}
nchunks = int(math.ceil(fsz / chunksz))
for nch in range(nchunks):
self.work_q.put(nch)
ex = ""
for nch in range(nchunks):
qe = self.done_q.get()
try:
nch, dig, ofs, csz = qe
chunks[nch] = [dig, ofs, csz]
except:
ex = ex or qe
if pcb:
pcb(pcb_opaque, chunksz * nch)
if ex:
raise Exception(ex)
ret = []
for n in range(nchunks):
ret.append(chunks[n])
self.f = None
self.csz = 0
self.sz = 0
return ret
def worker(self):
while True:
ofs = self.work_q.get()
try:
v = self.hash_at(ofs)
except Exception as ex:
v = str(ex)
self.done_q.put(v)
def hash_at(self, nch):
f = self.f
ofs = ofs0 = nch * self.csz
hashobj = hashlib.sha512()
chunk_sz = chunk_rem = min(self.csz, self.sz - ofs)
while chunk_rem > 0:
with self.imutex:
f.seek(ofs)
buf = f.read(min(chunk_rem, 1024 * 1024 * 12))
if not buf:
raise Exception("EOF at " + str(ofs))
hashobj.update(buf)
chunk_rem -= len(buf)
ofs += len(buf)
digest = hashobj.digest()[:33]
digest = base64.urlsafe_b64encode(digest).decode("utf-8")
return nch, digest, ofs0, chunk_sz
2021-10-18 20:35:50 +02:00
_print = print
2021-10-01 00:33:45 +02:00
def eprint(*a, **ka):
ka["file"] = sys.stderr
ka["end"] = ""
2021-10-01 00:33:45 +02:00
if not PY2:
ka["flush"] = True
2021-10-18 20:35:50 +02:00
_print(*a, **ka)
if PY2 or not VT100:
2021-10-01 00:33:45 +02:00
sys.stderr.flush()
2021-10-18 20:35:50 +02:00
def flushing_print(*a, **ka):
_print(*a, **ka)
if "flush" not in ka:
sys.stdout.flush()
if not VT100:
print = flushing_print
2021-10-01 02:10:03 +02:00
def termsize():
env = os.environ
def ioctl_GWINSZ(fd):
try:
2022-06-07 23:08:43 +02:00
import fcntl, termios, struct
2021-10-01 02:10:03 +02:00
cr = struct.unpack("hh", fcntl.ioctl(fd, termios.TIOCGWINSZ, "1234"))
except:
return
return cr
cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
if not cr:
try:
fd = os.open(os.ctermid(), os.O_RDONLY)
cr = ioctl_GWINSZ(fd)
os.close(fd)
except:
pass
if not cr:
try:
cr = (env["LINES"], env["COLUMNS"])
except:
cr = (25, 80)
return int(cr[1]), int(cr[0])
class CTermsize(object):
def __init__(self):
self.ev = False
self.margin = None
self.g = None
self.w, self.h = termsize()
try:
signal.signal(signal.SIGWINCH, self.ev_sig)
except:
return
thr = threading.Thread(target=self.worker)
thr.daemon = True
thr.start()
def worker(self):
while True:
time.sleep(0.5)
if not self.ev:
continue
self.ev = False
self.w, self.h = termsize()
if self.margin is not None:
self.scroll_region(self.margin)
def ev_sig(self, *a, **ka):
self.ev = True
def scroll_region(self, margin):
self.margin = margin
if margin is None:
self.g = None
eprint("\033[s\033[r\033[u")
else:
self.g = 1 + self.h - margin
2022-06-16 01:07:15 +02:00
t = "{0}\033[{1}A".format("\n" * margin, margin)
eprint("{0}\033[s\033[1;{1}r\033[u".format(t, self.g - 1))
2021-10-01 02:10:03 +02:00
ss = CTermsize()
def _scd(err, top):
2021-09-27 23:28:34 +02:00
"""non-recursive listing of directory contents, along with stat() info"""
with os.scandir(top) as dh:
for fh in dh:
abspath = os.path.join(top, fh.name)
try:
yield [abspath, fh.stat()]
except Exception as ex:
err.append((abspath, str(ex)))
def _lsd(err, top):
"""non-recursive listing of directory contents, along with stat() info"""
for name in os.listdir(top):
abspath = os.path.join(top, name)
try:
2021-09-27 23:28:34 +02:00
yield [abspath, os.stat(abspath)]
except Exception as ex:
err.append((abspath, str(ex)))
2021-09-27 23:28:34 +02:00
if hasattr(os, "scandir") and sys.version_info > (3, 6):
statdir = _scd
else:
statdir = _lsd
def walkdir(err, top, seen):
2021-09-27 23:28:34 +02:00
"""recursive statdir"""
atop = os.path.abspath(os.path.realpath(top))
if atop in seen:
2022-08-13 00:58:49 +02:00
err.append((top, "recursive-symlink"))
return
seen = seen[:] + [atop]
for ap, inf in sorted(statdir(err, top)):
2021-09-27 23:28:34 +02:00
if stat.S_ISDIR(inf.st_mode):
try:
for x in walkdir(err, ap, seen):
yield x
except Exception as ex:
err.append((ap, str(ex)))
2021-09-27 23:28:34 +02:00
else:
yield ap, inf
def walkdirs(err, tops):
2021-09-27 23:28:34 +02:00
"""recursive statdir for a list of tops, yields [top, relpath, stat]"""
2021-10-13 00:03:49 +02:00
sep = "{0}".format(os.sep).encode("ascii")
2021-09-27 23:28:34 +02:00
for top in tops:
2021-10-13 00:03:49 +02:00
if top[-1:] == sep:
2021-10-29 01:49:40 +02:00
stop = top.rstrip(sep)
else:
stop = os.path.dirname(top)
2021-10-12 22:46:33 +02:00
2021-09-27 23:28:34 +02:00
if os.path.isdir(top):
for ap, inf in walkdir(err, top, []):
2021-10-13 00:03:49 +02:00
yield stop, ap[len(stop) :].lstrip(sep), inf
2021-09-27 23:28:34 +02:00
else:
d, n = top.rsplit(sep, 1)
yield d, n, os.stat(top)
2021-10-13 00:03:49 +02:00
# mostly from copyparty/util.py
def quotep(btxt):
quot1 = quote(btxt, safe=b"/")
if not PY2:
quot1 = quot1.encode("ascii")
return quot1.replace(b" ", b"+")
2021-09-27 23:28:34 +02:00
# from copyparty/util.py
def humansize(sz, terse=False):
2021-09-27 23:52:36 +02:00
"""picks a sensible unit for the given extent"""
2021-09-27 23:28:34 +02:00
for unit in ["B", "KiB", "MiB", "GiB", "TiB"]:
if sz < 1024:
break
sz /= 1024.0
ret = " ".join([str(sz)[:4].rstrip("."), unit])
if not terse:
return ret
return ret.replace("iB", "").replace(" ", "")
# from copyparty/up2k.py
def up2k_chunksize(filesize):
2021-09-27 23:52:36 +02:00
"""gives The correct chunksize for up2k hashing"""
2021-09-27 23:28:34 +02:00
chunksize = 1024 * 1024
stepsize = 512 * 1024
while True:
for mul in [1, 2]:
nchunks = math.ceil(filesize * 1.0 / chunksize)
if nchunks <= 256 or chunksize >= 32 * 1024 * 1024:
return chunksize
chunksize += stepsize
stepsize *= mul
# mostly from copyparty/up2k.py
2022-08-10 23:12:01 +02:00
def get_hashlist(file, pcb, mth):
# type: (File, any, any) -> None
2021-09-27 23:52:36 +02:00
"""generates the up2k hashlist from file contents, inserts it into `file`"""
2021-09-27 23:28:34 +02:00
chunk_sz = up2k_chunksize(file.size)
file_rem = file.size
file_ofs = 0
ret = []
with open(file.abs, "rb", 512 * 1024) as f:
2022-08-10 23:12:01 +02:00
if mth and file.size >= 1024 * 512:
ret = mth.hash(f, file.size, chunk_sz, pcb, file)
file_rem = 0
2021-09-27 23:28:34 +02:00
while file_rem > 0:
2022-08-10 23:12:01 +02:00
# same as `hash_at` except for `imutex` / bufsz
2021-09-27 23:28:34 +02:00
hashobj = hashlib.sha512()
chunk_sz = chunk_rem = min(chunk_sz, file_rem)
while chunk_rem > 0:
buf = f.read(min(chunk_rem, 64 * 1024))
if not buf:
raise Exception("EOF at " + str(f.tell()))
hashobj.update(buf)
chunk_rem -= len(buf)
digest = hashobj.digest()[:33]
digest = base64.urlsafe_b64encode(digest).decode("utf-8")
ret.append([digest, file_ofs, chunk_sz])
file_ofs += chunk_sz
file_rem -= chunk_sz
2021-10-01 00:33:45 +02:00
if pcb:
pcb(file, file_ofs)
2021-09-27 23:28:34 +02:00
file.cids = ret
file.kchunks = {}
for k, v1, v2 in ret:
file.kchunks[k] = [v1, v2]
2021-09-27 23:28:34 +02:00
2021-09-30 19:36:47 +02:00
def handshake(req_ses, url, file, pw, search):
2022-06-07 23:08:43 +02:00
# type: (requests.Session, str, File, any, bool) -> list[str]
2021-09-30 19:36:47 +02:00
"""
performs a handshake with the server; reply is:
if search, a list of search results
otherwise, a list of chunks to upload
"""
2021-09-27 23:52:36 +02:00
2021-09-27 23:28:34 +02:00
req = {
"hash": [x[0] for x in file.cids],
"name": file.name,
"lmod": file.lmod,
"size": file.size,
}
2021-09-30 19:36:47 +02:00
if search:
req["srch"] = 1
2021-09-27 23:28:34 +02:00
headers = {"Content-Type": "text/plain"} # wtf ed
if pw:
headers["Cookie"] = "=".join(["cppwd", pw])
if file.url:
url = file.url
elif b"/" in file.rel:
2021-10-13 00:03:49 +02:00
url += quotep(file.rel.rsplit(b"/", 1)[0]).decode("utf-8", "replace")
2021-09-27 23:28:34 +02:00
while True:
try:
r = req_ses.post(url, headers=headers, json=req)
break
2022-08-09 00:11:34 +02:00
except Exception as ex:
em = str(ex).split("SSLError(")[-1]
eprint("handshake failed, retrying: {0}\n {1}\n\n".format(file.name, em))
time.sleep(1)
2021-09-27 23:28:34 +02:00
try:
r = r.json()
except:
raise Exception(r.text)
2021-09-30 19:36:47 +02:00
if search:
2022-08-10 23:12:01 +02:00
return r["hits"], False
2021-09-30 19:36:47 +02:00
2021-09-27 23:28:34 +02:00
try:
pre, url = url.split("://")
pre += "://"
except:
pre = ""
file.url = pre + url.split("/")[0] + r["purl"]
file.name = r["name"]
file.wark = r["wark"]
return r["hash"], r["sprs"]
2021-09-27 23:28:34 +02:00
def upload(req_ses, file, cid, pw):
# type: (requests.Session, File, str, any) -> None
2021-09-27 23:52:36 +02:00
"""upload one specific chunk, `cid` (a chunk-hash)"""
2021-09-27 23:28:34 +02:00
headers = {
"X-Up2k-Hash": cid,
"X-Up2k-Wark": file.wark,
"Content-Type": "application/octet-stream",
}
if pw:
headers["Cookie"] = "=".join(["cppwd", pw])
f = FileSlice(file, cid)
try:
r = req_ses.post(file.url, headers=headers, data=f)
2021-09-27 23:28:34 +02:00
if not r:
raise Exception(repr(r))
_ = r.content
2021-09-27 23:28:34 +02:00
finally:
f.f.close()
class Daemon(threading.Thread):
2021-10-01 00:33:45 +02:00
def __init__(self, *a, **ka):
threading.Thread.__init__(self, *a, **ka)
2021-09-27 23:28:34 +02:00
self.daemon = True
class Ctl(object):
2021-09-27 23:52:36 +02:00
"""
this will be the coordinator which runs everything in parallel
(hashing, handshakes, uploads) but right now it's p dumb
"""
2021-09-27 23:28:34 +02:00
def __init__(self, ar):
self.ar = ar
ar.files = [
2021-10-12 22:46:33 +02:00
os.path.abspath(os.path.realpath(x.encode("utf-8")))
+ (x[-1:] if x[-1:] == os.sep else "").encode("utf-8")
for x in ar.files
2021-09-27 23:28:34 +02:00
]
ar.url = ar.url.rstrip("/") + "/"
if "://" not in ar.url:
ar.url = "http://" + ar.url
2021-09-27 23:28:34 +02:00
eprint("\nscanning {0} locations\n".format(len(ar.files)))
2021-09-27 23:28:34 +02:00
nfiles = 0
nbytes = 0
err = []
for _, _, inf in walkdirs(err, ar.files):
2021-09-27 23:28:34 +02:00
nfiles += 1
nbytes += inf.st_size
if err:
2021-11-28 03:38:57 +01:00
eprint("\n# failed to access {0} paths:\n".format(len(err)))
for ap, msg in err:
if ar.v:
eprint("{0}\n `-{1}\n\n".format(ap.decode("utf-8", "replace"), msg))
else:
eprint(ap.decode("utf-8", "replace") + "\n")
2021-11-28 03:38:57 +01:00
eprint("^ failed to access those {0} paths ^\n\n".format(len(err)))
if not ar.v:
eprint("hint: set -v for detailed error messages\n")
if not ar.ok:
eprint("hint: aborting because --ok is not set\n")
return
eprint("found {0} files, {1}\n\n".format(nfiles, humansize(nbytes)))
2021-09-30 19:36:47 +02:00
self.nfiles = nfiles
self.nbytes = nbytes
2021-09-27 23:28:34 +02:00
if ar.td:
requests.packages.urllib3.disable_warnings()
req_ses.verify = False
2021-09-27 23:28:34 +02:00
if ar.te:
req_ses.verify = ar.te
2021-09-27 23:28:34 +02:00
self.filegen = walkdirs([], ar.files)
2021-09-30 19:36:47 +02:00
if ar.safe:
2022-06-07 23:08:43 +02:00
self._safe()
2021-10-01 00:33:45 +02:00
else:
2022-06-07 23:08:43 +02:00
self.hash_f = 0
self.hash_c = 0
self.hash_b = 0
self.up_f = 0
self.up_c = 0
self.up_b = 0
self.up_br = 0
self.hasher_busy = 1
self.handshaker_busy = 0
self.uploader_busy = 0
self.serialized = False
2022-06-07 23:08:43 +02:00
self.t0 = time.time()
self.t0_up = None
self.spd = None
self.mutex = threading.Lock()
self.q_handshake = Queue() # type: Queue[File]
self.q_recheck = Queue() # type: Queue[File] # partial upload exists [...]
self.q_upload = Queue() # type: Queue[tuple[File, str]]
self.st_hash = [None, "(idle, starting...)"] # type: tuple[File, int]
self.st_up = [None, "(idle, starting...)"] # type: tuple[File, int]
2022-08-10 23:12:01 +02:00
self.mth = MTHash(ar.J) if ar.J > 1 else None
2022-06-07 23:08:43 +02:00
self._fancy()
def _safe(self):
2021-09-30 19:36:47 +02:00
"""minimal basic slow boring fallback codepath"""
search = self.ar.s
2021-09-27 23:28:34 +02:00
for nf, (top, rel, inf) in enumerate(self.filegen):
file = File(top, rel, inf.st_size, inf.st_mtime)
upath = file.abs.decode("utf-8", "replace")
print("{0} {1}\n hash...".format(self.nfiles - nf, upath))
2022-08-10 23:12:01 +02:00
get_hashlist(file, None, None)
2021-09-27 23:28:34 +02:00
2021-11-28 03:38:57 +01:00
burl = self.ar.url[:12] + self.ar.url[8:].split("/")[0] + "/"
2021-09-27 23:28:34 +02:00
while True:
print(" hs...")
hs, _ = handshake(req_ses, self.ar.url, file, self.ar.a, search)
2021-09-30 19:36:47 +02:00
if search:
if hs:
for hit in hs:
2021-10-13 00:03:49 +02:00
print(" found: {0}{1}".format(burl, hit["rp"]))
2021-09-30 19:36:47 +02:00
else:
print(" NOT found")
break
file.ucids = hs
if not hs:
2021-09-27 23:28:34 +02:00
break
print("{0} {1}".format(self.nfiles - nf, upath))
2021-09-30 19:36:47 +02:00
ncs = len(hs)
for nc, cid in enumerate(hs):
print(" {0} up {1}".format(ncs - nc, cid))
2021-09-30 19:36:47 +02:00
upload(req_ses, file, cid, self.ar.a)
2021-09-27 23:28:34 +02:00
print(" ok!")
2022-06-07 23:08:43 +02:00
def _fancy(self):
2021-10-01 02:10:03 +02:00
if VT100:
atexit.register(self.cleanup_vt100)
ss.scroll_region(3)
2021-10-01 00:33:45 +02:00
Daemon(target=self.hasher).start()
for _ in range(self.ar.j):
Daemon(target=self.handshaker).start()
Daemon(target=self.uploader).start()
idles = 0
while idles < 3:
2021-10-01 02:10:03 +02:00
time.sleep(0.07)
2021-10-01 00:33:45 +02:00
with self.mutex:
if (
self.q_handshake.empty()
and self.q_upload.empty()
and not self.hasher_busy
and not self.handshaker_busy
and not self.uploader_busy
):
idles += 1
else:
idles = 0
2021-10-01 00:33:45 +02:00
2021-10-01 02:10:03 +02:00
if VT100:
maxlen = ss.w - len(str(self.nfiles)) - 14
txt = "\033[s\033[{0}H".format(ss.g)
for y, k, st, f in [
[0, "hash", self.st_hash, self.hash_f],
[1, "send", self.st_up, self.up_f],
2021-10-01 02:10:03 +02:00
]:
txt += "\033[{0}H{1}:".format(ss.g + y, k)
2021-10-01 02:10:03 +02:00
file, arg = st
if not file:
txt += " {0}\033[K".format(arg)
2021-10-01 02:10:03 +02:00
else:
if y:
p = 100 * file.up_b / file.size
else:
p = 100 * arg / file.size
name = file.abs.decode("utf-8", "replace")[-maxlen:]
if "/" in name:
name = "\033[36m{0}\033[0m/{1}".format(*name.rsplit("/", 1))
2021-10-01 02:10:03 +02:00
2022-06-16 01:07:15 +02:00
t = "{0:6.1f}% {1} {2}\033[K"
txt += t.format(p, self.nfiles - f, name)
2021-10-01 02:10:03 +02:00
txt += "\033[{0}H ".format(ss.g + 2)
else:
txt = " "
2021-10-01 02:10:03 +02:00
if not self.up_br:
spd = self.hash_b / (time.time() - self.t0)
eta = (self.nbytes - self.hash_b) / (spd + 1)
else:
spd = self.up_br / (time.time() - self.t0_up)
spd = self.spd = (self.spd or spd) * 0.9 + spd * 0.1
eta = (self.nbytes - self.up_b) / (spd + 1)
2021-10-01 02:10:03 +02:00
spd = humansize(spd)
eta = str(datetime.timedelta(seconds=int(eta)))
sleft = humansize(self.nbytes - self.up_b)
nleft = self.nfiles - self.up_f
tail = "\033[K\033[u" if VT100 else "\r"
2021-10-01 00:33:45 +02:00
2022-06-16 01:07:15 +02:00
t = "{0} eta @ {1}/s, {2}, {3}# left".format(eta, spd, sleft, nleft)
eprint(txt + "\033]0;{0}\033\\\r{0}{1}".format(t, tail))
2021-10-01 02:10:03 +02:00
def cleanup_vt100(self):
ss.scroll_region(None)
eprint("\033[J\033]0;\033\\")
2021-10-01 02:10:03 +02:00
def cb_hasher(self, file, ofs):
self.st_hash = [file, ofs]
2021-10-01 02:10:03 +02:00
2021-10-01 00:33:45 +02:00
def hasher(self):
2021-10-13 00:03:49 +02:00
prd = None
ls = {}
for top, rel, inf in self.filegen:
2021-10-13 00:03:49 +02:00
if self.ar.z:
rd = os.path.dirname(rel)
if prd != rd:
prd = rd
headers = {}
if self.ar.a:
headers["Cookie"] = "=".join(["cppwd", self.ar.a])
ls = {}
try:
print(" ls ~{0}".format(rd.decode("utf-8", "replace")))
r = req_ses.get(
self.ar.url.encode("utf-8") + quotep(rd) + b"?ls",
headers=headers,
)
for f in r.json()["files"]:
rfn = f["href"].split("?")[0].encode("utf-8", "replace")
ls[unquote(rfn)] = f
except:
print(" mkdir ~{0}".format(rd.decode("utf-8", "replace")))
rf = ls.get(os.path.basename(rel), None)
if rf and rf["sz"] == inf.st_size and abs(rf["ts"] - inf.st_mtime) <= 1:
self.nfiles -= 1
self.nbytes -= inf.st_size
continue
2021-10-01 00:33:45 +02:00
file = File(top, rel, inf.st_size, inf.st_mtime)
while True:
with self.mutex:
if (
self.hash_b - self.up_b < 1024 * 1024 * 128
and self.hash_c - self.up_c < 64
and (
not self.ar.nh
or (
self.q_upload.empty()
and self.q_handshake.empty()
and not self.uploader_busy
)
)
):
break
time.sleep(0.05)
2022-08-10 23:12:01 +02:00
get_hashlist(file, self.cb_hasher, self.mth)
2021-10-01 00:33:45 +02:00
with self.mutex:
self.hash_f += 1
self.hash_c += len(file.cids)
self.hash_b += file.size
self.q_handshake.put(file)
self.hasher_busy = 0
2021-10-01 02:10:03 +02:00
self.st_hash = [None, "(finished)"]
2021-10-01 00:33:45 +02:00
def handshaker(self):
search = self.ar.s
q = self.q_handshake
2021-10-13 00:03:49 +02:00
burl = self.ar.url[:8] + self.ar.url[8:].split("/")[0] + "/"
2021-10-01 00:33:45 +02:00
while True:
file = q.get()
if not file:
if q == self.q_handshake:
q = self.q_recheck
q.put(None)
continue
self.q_upload.put(None)
break
with self.mutex:
self.handshaker_busy += 1
upath = file.abs.decode("utf-8", "replace")
try:
hs, sprs = handshake(req_ses, self.ar.url, file, self.ar.a, search)
2021-10-01 00:33:45 +02:00
except Exception as ex:
if q == self.q_handshake and "<pre>partial upload exists" in str(ex):
self.q_recheck.put(file)
hs = []
else:
raise
if search:
if hs:
for hit in hs:
2022-06-16 01:07:15 +02:00
t = "found: {0}\n {1}{2}\n"
print(t.format(upath, burl, hit["rp"]), end="")
2021-10-01 00:33:45 +02:00
else:
print("NOT found: {0}\n".format(upath), end="")
2021-10-01 00:33:45 +02:00
with self.mutex:
self.up_f += 1
self.up_c += len(file.cids)
self.up_b += file.size
self.handshaker_busy -= 1
continue
with self.mutex:
if not sprs and not self.serialized:
t = "server filesystem does not support sparse files; serializing uploads\n"
eprint(t)
self.serialized = True
for _ in range(self.ar.j - 1):
self.q_upload.put(None)
2021-10-01 00:33:45 +02:00
if not hs:
# all chunks done
self.up_f += 1
self.up_c += len(file.cids) - file.up_c
self.up_b += file.size - file.up_b
2021-10-01 02:10:03 +02:00
if hs and file.up_c:
2021-10-01 00:33:45 +02:00
# some chunks failed
self.up_c -= len(hs)
file.up_c -= len(hs)
for cid in hs:
sz = file.kchunks[cid][1]
self.up_b -= sz
file.up_b -= sz
file.ucids = hs
self.handshaker_busy -= 1
if not hs:
2021-10-13 00:03:49 +02:00
kw = "uploaded" if file.up_b else " found"
print("{0} {1}".format(kw, upath))
2021-10-01 00:33:45 +02:00
for cid in hs:
self.q_upload.put([file, cid])
def uploader(self):
while True:
task = self.q_upload.get()
if not task:
2021-10-01 02:10:03 +02:00
self.st_up = [None, "(finished)"]
2021-10-01 00:33:45 +02:00
break
with self.mutex:
self.uploader_busy += 1
self.t0_up = self.t0_up or time.time()
2021-10-01 00:33:45 +02:00
file, cid = task
try:
upload(req_ses, file, cid, self.ar.a)
except:
2021-11-28 03:38:57 +01:00
eprint("upload failed, retrying: {0} #{1}\n".format(file.name, cid[:8]))
pass # handshake will fix it
2021-10-01 00:33:45 +02:00
with self.mutex:
sz = file.kchunks[cid][1]
file.ucids = [x for x in file.ucids if x != cid]
if not file.ucids:
self.q_handshake.put(file)
self.st_up = [file, cid]
2021-10-01 00:33:45 +02:00
file.up_b += sz
self.up_b += sz
self.up_br += sz
2021-10-01 00:33:45 +02:00
file.up_c += 1
self.up_c += 1
self.uploader_busy -= 1
2021-09-27 23:28:34 +02:00
2021-10-12 22:46:33 +02:00
class APF(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
pass
2021-09-27 23:28:34 +02:00
def main():
time.strptime("19970815", "%Y%m%d") # python#7980
if not VT100:
2021-09-27 23:28:34 +02:00
os.system("rem") # enables colors
2022-08-29 19:24:48 +02:00
cores = (os.cpu_count() if hasattr(os, "cpu_count") else 0) or 2
2022-08-10 23:12:01 +02:00
hcores = min(cores, 3) # 4% faster than 4+ on py3.9 @ r5-4500U
2021-09-27 23:28:34 +02:00
# fmt: off
2021-10-12 22:46:33 +02:00
ap = app = argparse.ArgumentParser(formatter_class=APF, epilog="""
NOTE:
source file/folder selection uses rsync syntax, meaning that:
"foo" uploads the entire folder to URL/foo/
"foo/" uploads the CONTENTS of the folder into URL/
""")
2021-09-27 23:28:34 +02:00
ap.add_argument("url", type=unicode, help="server url, including destination folder")
ap.add_argument("files", type=unicode, nargs="+", help="files and/or folders to process")
ap.add_argument("-v", action="store_true", help="verbose")
2021-09-27 23:28:34 +02:00
ap.add_argument("-a", metavar="PASSWORD", help="password")
2021-09-30 19:36:47 +02:00
ap.add_argument("-s", action="store_true", help="file-search (disables upload)")
ap.add_argument("--ok", action="store_true", help="continue even if some local files are inaccessible")
2021-10-01 00:33:45 +02:00
ap = app.add_argument_group("performance tweaks")
ap.add_argument("-j", type=int, metavar="THREADS", default=4, help="parallel connections")
2022-08-10 23:12:01 +02:00
ap.add_argument("-J", type=int, metavar="THREADS", default=hcores, help="num cpu-cores to use for hashing; set 0 or 1 for single-core hashing")
2021-10-01 00:33:45 +02:00
ap.add_argument("-nh", action="store_true", help="disable hashing while uploading")
ap.add_argument("--safe", action="store_true", help="use simple fallback approach")
2021-10-13 00:03:49 +02:00
ap.add_argument("-z", action="store_true", help="ZOOMIN' (skip uploading files if they exist at the destination with the ~same last-modified timestamp, so same as yolo / turbo with date-chk but even faster)")
2021-10-01 00:33:45 +02:00
ap = app.add_argument_group("tls")
2021-09-27 23:28:34 +02:00
ap.add_argument("-te", metavar="PEM_FILE", help="certificate to expect/verify")
ap.add_argument("-td", action="store_true", help="disable certificate check")
# fmt: on
2021-10-01 00:33:45 +02:00
Ctl(app.parse_args())
2021-09-27 23:28:34 +02:00
if __name__ == "__main__":
main()