Files
copyparty/bin/partyfuse.py

1227 lines
37 KiB
Python
Raw Normal View History

2019-09-19 00:53:48 +00:00
#!/usr/bin/env python3
2022-11-26 20:01:20 +00:00
"""partyfuse: remote copyparty as a local filesystem"""
2019-09-19 00:53:48 +00:00
__author__ = "ed <copyparty@ocv.me>"
__copyright__ = 2019
__license__ = "MIT"
__url__ = "https://github.com/9001/copyparty/"
S_VERSION = "2.0"
S_BUILD_DT = "2024-10-01"
2020-08-16 23:04:10 +00:00
"""
mount a copyparty server (local or remote) as a filesystem
speeds:
1 GiB/s reading large files
27'000 files/sec: copy small files
700 folders/sec: copy small folders
2020-08-16 23:04:10 +00:00
usage:
2022-11-26 20:01:20 +00:00
python partyfuse.py http://192.168.1.69:3923/ ./music
2020-08-16 23:04:10 +00:00
dependencies:
python3 -m pip install --user fusepy # or grab it from the connect page
2020-08-16 23:04:10 +00:00
+ on Linux: sudo apk add fuse
+ on Macos: https://osxfuse.github.io/
+ on Windows: https://github.com/billziss-gh/winfsp/releases/latest
2021-01-06 17:22:07 +01:00
note:
you probably want to run this on windows clients:
2021-09-07 23:18:54 +02:00
https://github.com/9001/copyparty/blob/hovudstraum/contrib/explorer-nothumbs-nofoldertypes.reg
2021-01-06 17:22:07 +01:00
get server cert:
awk '/-BEGIN CERTIFICATE-/ {a=1} a; /-END CERTIFICATE-/{exit}' <(openssl s_client -connect 127.0.0.1:3923 </dev/null 2>/dev/null) >cert.pem
2020-08-16 23:04:10 +00:00
"""
import argparse
import calendar
import codecs
import errno
2021-02-21 02:07:34 +00:00
import json
import os
import platform
import re
2019-09-19 00:53:48 +00:00
import stat
2019-09-19 21:43:14 +00:00
import struct
import sys
2019-09-19 00:53:48 +00:00
import threading
import time
2020-08-06 19:31:52 +00:00
import traceback
2019-09-19 00:53:48 +00:00
import urllib.parse
from datetime import datetime, timezone
2019-09-19 00:53:48 +00:00
from urllib.parse import quote_from_bytes as quote
2020-10-25 08:07:17 +01:00
from urllib.parse import unquote_to_bytes as unquote
2019-09-19 00:53:48 +00:00
import builtins
import http.client
2020-08-16 23:04:10 +00:00
WINDOWS = sys.platform == "win32"
MACOS = platform.system() == "Darwin"
UTC = timezone.utc
2023-11-30 17:33:07 +00:00
# !rm.yes>
MON3S = "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"
2024-10-02 21:59:53 +00:00
MON3 = {b: a for a, b in enumerate(MON3S.split(), 1)}
# !rm.no>
2023-11-30 17:33:07 +00:00
def print(*args, **kwargs):
try:
builtins.print(*list(args), **kwargs)
except:
builtins.print(termsafe(" ".join(str(x) for x in args)), **kwargs)
2019-09-19 21:43:14 +00:00
2021-07-12 00:15:13 +02:00
print(
"{} v{} @ {}".format(
platform.python_implementation(),
".".join([str(x) for x in sys.version_info]),
sys.executable,
)
)
2021-06-08 20:19:51 +02:00
def nullfun(*a):
2023-11-30 17:33:07 +00:00
pass
info = dbg = nullfun
is_dbg = False
2023-11-30 17:33:07 +00:00
2020-08-16 23:04:10 +00:00
try:
from fuse import FUSE, FuseOSError, Operations
except:
if WINDOWS:
libfuse = "install https://github.com/billziss-gh/winfsp/releases/latest"
elif MACOS:
libfuse = "install https://osxfuse.github.io/"
else:
2021-09-26 19:35:13 +02:00
libfuse = "apt install libfuse3-3\n modprobe fuse"
2020-08-09 04:09:42 +00:00
2022-11-26 22:27:09 +00:00
m = """\033[33m
could not import fuse; these may help:
{} -m pip install --user fusepy
{}
\033[0m"""
print(m.format(sys.executable, libfuse))
2020-08-16 23:04:10 +00:00
raise
2019-09-19 00:53:48 +00:00
2020-08-04 22:50:45 +00:00
def termsafe(txt):
enc = sys.stdout.encoding
2020-08-04 22:50:45 +00:00
try:
return txt.encode(enc, "backslashreplace").decode(enc)
2020-08-04 22:50:45 +00:00
except:
return txt.encode(enc, "replace").decode(enc)
2020-08-04 22:50:45 +00:00
def threadless_log(fmt, *a):
fmt += "\n"
print(fmt % a if a else fmt, end="")
2019-09-19 21:43:14 +00:00
riced_tids = {}
2019-09-19 00:53:48 +00:00
2019-09-19 21:43:14 +00:00
def rice_tid():
tid = threading.current_thread().ident
try:
return riced_tids[tid]
except:
c = struct.unpack(b"B" * 5, struct.pack(b">Q", tid)[-5:])
ret = "".join("\033[1;37;48;5;%dm%02x" % (x, x) for x in c) + "\033[0m"
riced_tids[tid] = ret
return ret
2019-09-19 21:43:14 +00:00
def fancy_log(fmt, *a):
msg = fmt % a if a else fmt
print("%10.6f %s %s\n" % (time.time() % 900, rice_tid(), msg), end="")
2020-10-25 08:07:17 +01:00
def register_wtf8():
def wtf8_enc(text):
return str(text).encode("utf-8", "surrogateescape"), len(text)
def wtf8_dec(binary):
return bytes(binary).decode("utf-8", "surrogateescape"), len(binary)
def wtf8_search(encoding_name):
return codecs.CodecInfo(wtf8_enc, wtf8_dec, name="wtf-8")
codecs.register(wtf8_search)
bad_good = {}
good_bad = {}
def enwin(txt):
return "".join([bad_good.get(x, x) for x in txt])
def dewin(txt):
return "".join([good_bad.get(x, x) for x in txt])
2020-08-09 04:09:42 +00:00
class RecentLog(object):
def __init__(self, ar):
self.ar = ar
2020-08-09 04:09:42 +00:00
self.mtx = threading.Lock()
self.f = open(ar.logf, "wb") if ar.logf else None
2020-08-09 04:09:42 +00:00
self.q = []
thr = threading.Thread(target=self.printer)
thr.daemon = True
thr.start()
def put(self, fmt, *a):
msg = fmt % a if a else fmt
msg = "%10.6f %s %s\n" % (time.time() % 900, rice_tid(), msg)
if self.f:
zd = datetime.now(UTC)
fmsg = "%d-%04d-%06d.%06d %s" % (
zd.year,
zd.month * 100 + zd.day,
(zd.hour * 100 + zd.minute) * 100 + zd.second,
zd.microsecond,
msg,
)
self.f.write(fmsg.encode("utf-8"))
2020-08-09 04:09:42 +00:00
with self.mtx:
self.q.append(msg)
2020-08-09 04:09:42 +00:00
if len(self.q) > 200:
self.q = self.q[-50:]
def printer(self):
while True:
time.sleep(0.05)
with self.mtx:
q = self.q
if not q:
continue
self.q = []
print("".join(q), end="")
2022-11-26 20:01:20 +00:00
# [windows/cmd/cpy3] python dev\copyparty\bin\partyfuse.py q: http://192.168.1.159:1234/
# [windows/cmd/msys2] C:\msys64\mingw64\bin\python3 dev\copyparty\bin\partyfuse.py q: http://192.168.1.159:1234/
# [windows/mty/msys2] /mingw64/bin/python3 /c/Users/ed/dev/copyparty/bin/partyfuse.py q: http://192.168.1.159:1234/
2020-08-09 04:09:42 +00:00
#
# [windows] find /q/music/albums/Phant*24bit -printf '%s %p\n' | sort -n | tail -n 8 | sed -r 's/^[0-9]+ //' | while IFS= read -r x; do dd if="$x" of=/dev/null bs=4k count=8192 & done
# [alpine] ll t; for x in t/2020_0724_16{2,3}*; do dd if="$x" of=/dev/null bs=4k count=10240 & done
#
# 72.4983 windows mintty msys2 fancy_log
# 219.5781 windows cmd msys2 fancy_log
# nope.avi windows cmd cpy3 fancy_log
# 9.8817 windows mintty msys2 RecentLog 200 50 0.1
# 10.2241 windows cmd cpy3 RecentLog 200 50 0.1
# 9.8494 windows cmd msys2 RecentLog 200 50 0.1
# 7.8061 windows mintty msys2 fancy_log <info-only>
# 7.9961 windows mintty msys2 RecentLog <info-only>
# 4.2603 alpine xfce4 cpy3 RecentLog
# 4.1538 alpine xfce4 cpy3 fancy_log
# 3.1742 alpine urxvt cpy3 fancy_log
2019-09-19 21:43:14 +00:00
2019-09-19 19:49:25 +00:00
def get_tid():
return threading.current_thread().ident
2020-08-04 22:50:45 +00:00
def html_dec(txt):
2020-08-07 20:00:30 +00:00
return (
txt.replace("&lt;", "<")
.replace("&gt;", ">")
.replace("&quot;", '"')
2020-10-25 08:07:17 +01:00
.replace("&#13;", "\r")
.replace("&#10;", "\n")
2020-08-07 20:00:30 +00:00
.replace("&amp;", "&")
)
2020-08-04 22:50:45 +00:00
2019-09-19 00:53:48 +00:00
class CacheNode(object):
2019-09-19 21:43:14 +00:00
def __init__(self, tag, data):
self.tag = tag
2019-09-19 00:53:48 +00:00
self.data = data
self.ts = time.time()
class Gateway(object):
def __init__(self, ar):
zs = ar.base_url
if "://" not in zs:
zs = "http://" + zs
self.base_url = zs
self.password = ar.a
2019-09-19 00:53:48 +00:00
ui = urllib.parse.urlparse(zs)
2019-09-19 00:53:48 +00:00
self.web_root = ui.path.strip("/")
self.SRS = "/%s/" % (self.web_root,) if self.web_root else "/"
2019-09-19 00:53:48 +00:00
try:
self.web_host, self.web_port = ui.netloc.split(":")
self.web_port = int(self.web_port)
except:
self.web_host = ui.netloc
if ui.scheme == "http":
self.web_port = 80
elif ui.scheme == "https":
self.web_port = 443
2019-09-19 00:53:48 +00:00
else:
raise Exception("bad url?")
self.ssl_context = None
self.use_tls = ui.scheme.lower() == "https"
if self.use_tls:
import ssl
if ar.td:
self.ssl_context = ssl._create_unverified_context()
elif ar.te:
self.ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
self.ssl_context.load_verify_locations(ar.te)
2019-09-19 00:53:48 +00:00
self.conns = {}
self.fsuf = "?raw"
self.dsuf = "?ls&lt&dots"
# !rm.yes>
if not ar.html:
self.parse_html = None
elif ar.html == "cpp":
self.parse_html = self.parse_cpp
self.dsuf = "?lt&dots"
self.re_row = re.compile(
r'^<tr><td>(-|DIR|<a [^<]+</a>)</td><td><a[^>]* href="([^"]+)"[^>]*>([^<]+)</a></td><td>([^<]+)</td><td>.*</td><td>([^<]+)</td></tr>$'
)
elif ar.html == "nginx":
self.parse_html = self.parse_nginx
self.fsuf = ""
self.dsuf = ""
self.re_row = re.compile(
r'^<a href="([^"]+)">([^<]+)</a> *([0-9]{2})-([A-Z][a-z]{2})-([0-9]{4}) ([0-9]{2}:[0-9]{2}) *(-|[0-9]+)\r?$'
)
elif ar.html == "iis":
self.parse_html = self.parse_iis
self.fsuf = ""
self.dsuf = ""
self.re_2nl = re.compile(br"<br>|</pre>")
self.re_row = re.compile(
r'^ *([0-9]{1,2})/([0-9]{1,2})/([0-9]{4}) {1,2}([0-9]{1,2}:[0-9]{2}) ([AP]M) +(&lt;dir&gt;|[0-9]+) <A HREF="([^"]+)">([^<>]+)</A>$'
)
else:
raise Exception("unknown HTML dialect: [%s]" % (ar.html,))
# !rm.no>
2019-09-19 00:53:48 +00:00
def quotep(self, path):
2020-10-25 08:07:17 +01:00
path = path.encode("wtf-8")
2019-09-19 19:49:25 +00:00
return quote(path, safe="/")
2019-09-19 00:53:48 +00:00
def getconn(self, tid=None):
2019-09-19 19:49:25 +00:00
tid = tid or get_tid()
2019-09-19 00:53:48 +00:00
try:
return self.conns[tid]
except:
2020-05-15 00:00:49 +02:00
info("new conn [{}] [{}]".format(self.web_host, self.web_port))
2019-09-19 00:53:48 +00:00
args = {}
if not self.use_tls:
C = http.client.HTTPConnection
else:
C = http.client.HTTPSConnection
if self.ssl_context:
args = {"context": self.ssl_context}
conn = C(self.web_host, self.web_port, timeout=260, **args)
2019-09-19 00:53:48 +00:00
self.conns[tid] = conn
return conn
def closeconn(self, tid=None):
2019-09-19 19:49:25 +00:00
tid = tid or get_tid()
2019-09-19 00:53:48 +00:00
try:
self.conns[tid].close()
del self.conns[tid]
except:
pass
2021-07-12 00:15:13 +02:00
def sendreq(self, meth, path, headers, **kwargs):
2019-09-19 19:49:25 +00:00
tid = get_tid()
if self.password:
headers["Cookie"] = "=".join(["cppwd", self.password])
2019-09-19 00:53:48 +00:00
try:
c = self.getconn(tid)
2021-07-12 00:15:13 +02:00
c.request(meth, path, headers=headers, **kwargs)
2019-09-19 19:49:25 +00:00
return c.getresponse()
except Exception as ex:
info("HTTP %r", ex)
self.closeconn(tid)
try:
2019-09-19 00:53:48 +00:00
c = self.getconn(tid)
2021-07-12 00:15:13 +02:00
c.request(meth, path, headers=headers, **kwargs)
2019-09-19 19:49:25 +00:00
return c.getresponse()
except:
info("http connection failed:\n" + traceback.format_exc())
if self.use_tls and not self.ssl_context:
import ssl
cert = ssl.get_server_certificate((self.web_host, self.web_port))
info("server certificate probably not trusted:\n" + cert)
raise
2019-09-19 00:53:48 +00:00
def listdir(self, path):
2020-10-25 08:07:17 +01:00
if bad_good:
path = dewin(path)
zs = "%s%s/" if path else "%s%s"
web_path = self.quotep(zs % (self.SRS, path)) + self.dsuf
2021-07-12 00:15:13 +02:00
r = self.sendreq("GET", web_path, {})
2019-09-19 00:53:48 +00:00
if r.status != 200:
self.closeconn()
info("http error %s reading dir %r", r.status, web_path)
err = errno.ENOENT if r.status == 404 else errno.EIO
raise FuseOSError(err)
2021-02-21 02:07:34 +00:00
ctype = r.getheader("Content-Type", "")
if ctype == "application/json":
parser = self.parse_jls
2024-10-01 18:35:36 +00:00
# !rm.yes>
2021-02-21 02:07:34 +00:00
elif ctype.startswith("text/html"):
parser = self.parse_html
2024-10-01 18:35:36 +00:00
# !rm.no>
2021-02-21 02:07:34 +00:00
else:
info("listdir on file (%s): %r", ctype, path)
raise FuseOSError(errno.ENOENT)
2019-09-19 00:53:48 +00:00
2020-08-06 19:31:52 +00:00
try:
2021-02-21 02:07:34 +00:00
return parser(r)
2020-08-06 19:31:52 +00:00
except:
info("parser: %r\n%s", path, traceback.format_exc())
raise FuseOSError(errno.EIO)
2019-09-19 00:53:48 +00:00
2019-09-19 21:43:14 +00:00
def download_file_range(self, path, ofs1, ofs2):
2020-10-25 08:07:17 +01:00
if bad_good:
path = dewin(path)
web_path = self.quotep("%s%s" % (self.SRS, path)) + self.fsuf
hdr_range = "bytes=%d-%d" % (ofs1, ofs2 - 1)
t = "DL %4.0fK\033[36m%9d-%-9d\033[0m%r"
info(t, (ofs2 - ofs1) / 1024.0, ofs1, ofs2 - 1, path)
2019-09-19 00:53:48 +00:00
2021-07-12 00:15:13 +02:00
r = self.sendreq("GET", web_path, {"Range": hdr_range})
2019-09-19 00:53:48 +00:00
if r.status != http.client.PARTIAL_CONTENT:
t = "http error %d reading file %r range %s in %s"
info(t, r.status, web_path, hdr_range, rice_tid())
2019-09-19 00:53:48 +00:00
self.closeconn()
raise FuseOSError(errno.EIO)
2019-09-19 00:53:48 +00:00
return r.read()
2024-10-01 16:55:00 +00:00
def parse_jls(self, sck):
2021-02-21 02:07:34 +00:00
rsp = b""
while True:
2024-10-01 16:55:00 +00:00
buf = sck.read(1024 * 32)
2021-02-21 02:07:34 +00:00
if not buf:
break
rsp += buf
rsp = json.loads(rsp.decode("utf-8"))
ret = {}
2021-09-26 19:35:13 +02:00
for statfun, nodes in [
[self.stat_dir, rsp["dirs"]],
[self.stat_file, rsp["files"]],
]:
2021-02-21 02:07:34 +00:00
for n in nodes:
2021-09-26 19:35:13 +02:00
fname = unquote(n["href"].split("?")[0]).rstrip(b"/").decode("wtf-8")
2021-02-21 02:07:34 +00:00
if bad_good:
fname = enwin(fname)
ret[fname] = statfun(n["ts"], n["sz"])
2021-02-21 02:07:34 +00:00
return ret
2024-10-01 18:35:36 +00:00
# !rm.yes>
####################################################################
####################################################################
def parse_cpp(self, sck):
# https://a.ocv.me/pub/
ret = {}
2024-10-01 16:55:00 +00:00
rem = b""
ptn = self.re_row
2019-09-19 00:53:48 +00:00
while True:
buf = sck.read(1024 * 32)
2019-09-19 00:53:48 +00:00
if not buf:
break
2024-10-01 16:55:00 +00:00
buf = rem + buf
rem = b""
idx = buf.rfind(b"\n")
if idx >= 0:
rem = buf[idx + 1 :]
buf = buf[:idx]
2019-09-19 00:53:48 +00:00
lines = buf.decode("utf-8").split("\n")
for line in lines:
m = ptn.match(line)
if not m:
continue
2020-10-25 08:07:17 +01:00
ftype, furl, fname, fsize, fdate = m.groups()
fname = furl.rstrip("/").split("/")[-1]
fname = unquote(fname)
fname = fname.decode("wtf-8")
if bad_good:
fname = enwin(fname)
2020-08-06 19:31:52 +00:00
sz = 1
ts = 60 * 60 * 24 * 2
try:
sz = int(fsize)
2022-05-15 13:51:44 +02:00
ts = calendar.timegm(time.strptime(fdate, "%Y-%m-%d %H:%M:%S"))
2020-08-06 19:31:52 +00:00
except:
info("bad HTML or OS %r %r\n%r", fdate, fsize, line)
2020-08-06 19:31:52 +00:00
# python cannot strptime(1959-01-01) on windows
if ftype != "DIR" and "zip=crc" not in ftype:
ret[fname] = self.stat_file(ts, sz)
2019-09-19 00:53:48 +00:00
else:
ret[fname] = self.stat_dir(ts, sz)
2019-09-19 00:53:48 +00:00
return ret
def parse_nginx(self, sck):
# https://ocv.me/stuff/ "06-Feb-2015 15:43"
ret = {}
rem = b""
re_row = self.re_row
while True:
buf = sck.read(1024 * 32)
if not buf:
break
buf = rem + buf
rem = b""
idx = buf.rfind(b"\n")
if idx >= 0:
rem = buf[idx + 1 :]
buf = buf[:idx]
fdate = ""
lines = buf.decode("utf-8").split("\n")
for line in lines:
m = re_row.match(line)
if not m:
continue
furl, fname, day, smon, year, hm, fsize = m.groups()
fname = furl.rstrip("/").split("/")[-1]
fname = unquote(fname)
fname = fname.decode("wtf-8")
if bad_good:
fname = enwin(fname)
sz = 1
ts = 60 * 60 * 24 * 2
try:
fdate = "%s-%02d-%s %s" % (year, MON3[smon], day, hm)
ts = calendar.timegm(time.strptime(fdate, "%Y-%m-%d %H:%M"))
sz = -1 if fsize == "-" else int(fsize)
except:
info("bad HTML or OS %r %r\n%r", fdate, fsize, line)
if sz == -1:
ret[fname] = self.stat_dir(ts, 4096)
else:
ret[fname] = self.stat_file(ts, sz)
return ret
def parse_iis(self, sck):
# https://nedlasting.miljodirektoratet.no/miljodata/ " 9/28/2024 5:24 AM"
# https://grandcanyon.usgs.gov/photos/Foodbase/CISP/ " 6/29/2012 3:12 PM"
ret = {}
rem = b""
re_row = self.re_row
re_2nl = self.re_2nl
while True:
buf = sck.read(1024 * 32)
if not buf:
break
buf = rem + buf
rem = b""
buf = re_2nl.sub(b"\n", buf)
idx = buf.rfind(b"\n")
if idx >= 0:
rem = buf[idx + 1 :]
buf = buf[:idx]
lines = buf.decode("utf-8").split("\n")
for line in lines:
m = re_row.match(line)
if not m:
continue
mon, day, year, hm, xm, fsize, furl, fname = m.groups()
fname = furl.rstrip("/").split("/")[-1]
fname = unquote(fname)
fname = fname.decode("wtf-8")
if bad_good:
fname = enwin(fname)
sz = 1
ts = 60 * 60 * 24 * 2
fdate = "%s-%s-%s %s %s" % (year, mon, day, hm, xm)
try:
ts = calendar.timegm(time.strptime(fdate, "%Y-%m-%d %H:%M %p"))
sz = -1 if fsize == "&lt;dir&gt;" else int(fsize)
except:
info("bad HTML or OS %r %r\n%r", fdate, fsize, line)
if sz == -1:
ret[fname] = self.stat_dir(ts, 4096)
else:
ret[fname] = self.stat_file(ts, sz)
return ret
####################################################################
####################################################################
2024-10-01 18:35:36 +00:00
# !rm.no>
2019-09-19 00:53:48 +00:00
def stat_dir(self, ts, sz):
2019-09-19 00:53:48 +00:00
return {
2020-08-04 22:50:45 +00:00
"st_mode": stat.S_IFDIR | 0o555,
2019-09-19 00:53:48 +00:00
"st_uid": 1000,
"st_gid": 1000,
"st_size": sz,
"st_atime": ts,
"st_mtime": ts,
"st_ctime": ts,
"st_blocks": int((sz + 511) / 512),
}
def stat_file(self, ts, sz):
return {
2020-08-04 22:50:45 +00:00
"st_mode": stat.S_IFREG | 0o444,
2019-09-19 00:53:48 +00:00
"st_uid": 1000,
"st_gid": 1000,
"st_size": sz,
"st_atime": ts,
"st_mtime": ts,
"st_ctime": ts,
"st_blocks": int((sz + 511) / 512),
}
class CPPF(Operations):
def __init__(self, ar):
self.gw = Gateway(ar)
2020-08-04 22:50:45 +00:00
self.junk_fh_ctr = 3
self.t_dircache = ar.cds
self.n_dircache = ar.cdn
self.n_filecache = ar.cf
2019-09-19 00:53:48 +00:00
self.dircache = []
self.dircache_mtx = threading.Lock()
2019-09-19 21:43:14 +00:00
self.filecache = []
self.filecache_mtx = threading.Lock()
2020-05-15 00:00:49 +02:00
info("up")
2019-09-19 00:53:48 +00:00
2020-08-07 21:55:40 +00:00
def _describe(self):
msg = []
with self.filecache_mtx:
for n, cn in enumerate(self.filecache):
cache_path, cache1 = cn.tag
cache2 = cache1 + len(cn.data)
t = "\n{:<2} {:>7} {:>10}:{:<9} {}".format(
2020-10-25 08:07:17 +01:00
n,
len(cn.data),
cache1,
cache2,
cache_path.replace("\r", "\\r").replace("\n", "\\n"),
)
msg.append(t)
return "".join(msg)
2020-08-07 21:55:40 +00:00
2019-09-19 00:53:48 +00:00
def clean_dircache(self):
"""not threadsafe"""
now = time.time()
cutoff = 0
for cn in self.dircache:
if now - cn.ts <= self.t_dircache:
2019-09-19 00:53:48 +00:00
break
cutoff += 1
2019-09-19 00:53:48 +00:00
if cutoff > 0:
self.dircache = self.dircache[cutoff:]
elif len(self.dircache) > self.n_dircache:
self.dircache.pop(0)
2019-09-19 00:53:48 +00:00
def get_cached_dir(self, dirpath):
2020-08-06 18:22:25 +00:00
with self.dircache_mtx:
2019-09-19 00:53:48 +00:00
for cn in self.dircache:
2019-09-19 21:43:14 +00:00
if cn.tag == dirpath:
if time.time() - cn.ts <= self.t_dircache:
return cn
break
2019-09-19 00:53:48 +00:00
return None
2024-10-01 18:35:36 +00:00
# !rm.yes>
2019-09-19 21:43:14 +00:00
"""
,-------------------------------, g1>=c1, g2<=c2
|cache1 cache2| buf[g1-c1:(g1-c1)+(g2-g1)]
`-------------------------------'
,---------------,
|get1 get2|
`---------------'
__________________________________________________________________________
,-------------------------------, g2<=c2, (g2>=c1)
|cache1 cache2| cdr=buf[:g2-c1]
`-------------------------------' dl car; g1-512K:c1
,---------------,
|get1 get2|
`---------------'
__________________________________________________________________________
,-------------------------------, g1>=c1, (g1<=c2)
|cache1 cache2| car=buf[c2-g1:]
`-------------------------------' dl cdr; c2:c2+1M
,---------------,
|get1 get2|
`---------------'
"""
2024-10-01 18:35:36 +00:00
# !rm.no>
2019-09-19 21:43:14 +00:00
def get_cached_file(self, path, get1, get2, file_sz):
car = None
cdr = None
ncn = -1
if is_dbg:
dbg("cache request %d:%d |%d|%s", get1, get2, file_sz, self._describe())
2020-08-06 18:22:25 +00:00
with self.filecache_mtx:
2019-09-19 21:43:14 +00:00
for cn in self.filecache:
ncn += 1
cache_path, cache1 = cn.tag
if cache_path != path:
continue
cache2 = cache1 + len(cn.data)
if get2 <= cache1 or get1 >= cache2:
2020-08-09 00:51:48 +00:00
# request does not overlap with cached area at all
continue
if get1 < cache1 and get2 > cache2:
# cached area does overlap, but must specifically contain
# either the first or last byte in the requested range
2019-09-19 21:43:14 +00:00
continue
if get1 >= cache1 and get2 <= cache2:
# keep cache entry alive by moving it to the end
self.filecache = (
self.filecache[:ncn] + self.filecache[ncn + 1 :] + [cn]
)
buf_ofs = get1 - cache1
buf_end = buf_ofs + (get2 - get1)
dbg(
"found all (#%d %d:%d |%d|) [%d:%d] = %d",
ncn,
cache1,
cache2,
len(cn.data),
buf_ofs,
buf_end,
buf_end - buf_ofs,
2019-09-19 21:43:14 +00:00
)
return cn.data[buf_ofs:buf_end]
2020-08-07 23:55:48 +00:00
if get2 <= cache2:
2019-09-19 21:43:14 +00:00
x = cn.data[: get2 - cache1]
if not cdr or len(cdr) < len(x):
dbg(
"found cdr (#%d %d:%d |%d|) [:%d-%d] = [:%d] = %d",
ncn,
cache1,
cache2,
len(cn.data),
get2,
cache1,
get2 - cache1,
len(x),
2019-09-19 21:43:14 +00:00
)
cdr = x
continue
2020-08-07 23:55:48 +00:00
if get1 >= cache1:
x = cn.data[-(max(0, cache2 - get1)) :]
2019-09-19 21:43:14 +00:00
if not car or len(car) < len(x):
dbg(
"found car (#%d %d:%d |%d|) [-(%d-%d):] = [-%d:] = %d",
ncn,
cache1,
cache2,
len(cn.data),
cache2,
get1,
cache2 - get1,
len(x),
2019-09-19 21:43:14 +00:00
)
car = x
continue
msg = "cache fallthrough\n%d %d %d\n%d %d %d\n%d %d --\n%s" % (
2020-08-07 21:55:40 +00:00
get1,
get2,
get2 - get1,
cache1,
cache2,
cache2 - cache1,
get1 - cache1,
get2 - cache2,
self._describe(),
2020-08-07 21:55:40 +00:00
)
info(msg)
raise FuseOSError(errno.EIO)
2019-09-19 21:43:14 +00:00
2020-08-07 23:55:48 +00:00
if car and cdr and len(car) + len(cdr) == get2 - get1:
2019-09-19 21:43:14 +00:00
dbg("<cache> have both")
2020-08-07 23:55:48 +00:00
return car + cdr
2019-09-19 21:43:14 +00:00
2020-08-07 23:55:48 +00:00
elif cdr and (not car or len(car) < len(cdr)):
2019-09-19 21:43:14 +00:00
h_end = get1 + (get2 - get1) - len(cdr)
h_ofs = min(get1, h_end - 0x80000) # 512k
2019-09-19 21:43:14 +00:00
if h_ofs < 0:
h_ofs = 0
buf_ofs = get1 - h_ofs
2019-09-19 21:43:14 +00:00
if dbg:
t = "<cache> cdr %d, car %d:%d |%d| [%d:]"
dbg(t, len(cdr), h_ofs, h_end, h_end - h_ofs, buf_ofs)
2019-09-19 21:43:14 +00:00
2020-05-15 00:00:49 +02:00
buf = self.gw.download_file_range(path, h_ofs, h_end)
if len(buf) == h_end - h_ofs:
ret = buf[buf_ofs:] + cdr
else:
ret = buf[get1 - h_ofs :]
t = "remote truncated %d:%d to |%d|, will return |%d|"
info(t, h_ofs, h_end, len(buf), len(ret))
2019-09-19 21:43:14 +00:00
elif car:
h_ofs = get1 + len(car)
if get2 < 0x100000:
# already cached from 0 to 64k, now do ~64k plus 1 MiB
h_end = max(get2, h_ofs + 0x100000) # 1m
else:
# after 1 MiB, bump window to 8 MiB
h_end = max(get2, h_ofs + 0x800000) # 8m
2019-09-19 21:43:14 +00:00
if h_end > file_sz:
h_end = file_sz
buf_ofs = (get2 - get1) - len(car)
t = "<cache> car %d, cdr %d:%d |%d| [:%d]"
dbg(t, len(car), h_ofs, h_end, h_end - h_ofs, buf_ofs)
2019-09-19 21:43:14 +00:00
2020-05-15 00:00:49 +02:00
buf = self.gw.download_file_range(path, h_ofs, h_end)
2019-09-19 21:43:14 +00:00
ret = car + buf[:buf_ofs]
else:
if get2 - get1 < 0x500000: # 5m
2021-01-06 17:22:07 +01:00
# unless the request is for the last n bytes of the file,
# grow the start to cache some stuff around the range
if get2 < file_sz - 1:
h_ofs = get1 - 0x40000 # 256k
2021-01-06 17:22:07 +01:00
else:
h_ofs = get1 - 0x10000 # 64k
2021-01-06 17:22:07 +01:00
# likewise grow the end unless start is 0
if get1 >= 0x100000:
h_end = get2 + 0x400000 # 4m
elif get1 > 0:
h_end = get2 + 0x100000 # 1m
2021-01-06 17:22:07 +01:00
else:
h_end = get2 + 0x10000 # 64k
else:
# big enough, doesn't need pads
h_ofs = get1
h_end = get2
2019-09-19 21:43:14 +00:00
if h_ofs < 0:
h_ofs = 0
if h_end > file_sz:
h_end = file_sz
buf_ofs = get1 - h_ofs
buf_end = buf_ofs + get2 - get1
t = "<cache> %d:%d |%d| [%d:%d]"
dbg(t, h_ofs, h_end, h_end - h_ofs, buf_ofs, buf_end)
2019-09-19 21:43:14 +00:00
2020-05-15 00:00:49 +02:00
buf = self.gw.download_file_range(path, h_ofs, h_end)
2019-09-19 21:43:14 +00:00
ret = buf[buf_ofs:buf_end]
cn = CacheNode([path, h_ofs], buf)
2020-08-06 18:22:25 +00:00
with self.filecache_mtx:
2020-08-07 21:55:40 +00:00
if len(self.filecache) >= self.n_filecache:
2019-09-19 21:43:14 +00:00
self.filecache = self.filecache[1:] + [cn]
else:
self.filecache.append(cn)
return ret
2020-08-07 21:55:40 +00:00
def _readdir(self, path, fh=None):
2019-09-19 00:53:48 +00:00
path = path.strip("/")
dbg("readdir %r [%s]", path, fh)
2019-09-19 00:53:48 +00:00
ret = self.gw.listdir(path)
2020-08-07 21:55:40 +00:00
if not self.n_dircache:
return ret
2019-09-19 00:53:48 +00:00
2020-08-06 18:22:25 +00:00
with self.dircache_mtx:
2019-09-19 00:53:48 +00:00
cn = CacheNode(path, ret)
self.dircache.append(cn)
self.clean_dircache()
2021-01-06 22:54:54 +01:00
# import pprint; pprint.pprint(ret)
2019-09-19 00:53:48 +00:00
return ret
2020-08-07 21:55:40 +00:00
def readdir(self, path, fh=None):
return [".", ".."] + list(self._readdir(path, fh))
2020-08-07 21:55:40 +00:00
2019-09-19 00:53:48 +00:00
def read(self, path, length, offset, fh=None):
2020-08-09 04:09:42 +00:00
req_max = 1024 * 1024 * 8
cache_max = 1024 * 1024 * 2
if length > req_max:
# windows actually doing 240 MiB read calls, sausage
info("truncate |%d| to %dMiB", length, req_max >> 20)
2020-08-09 04:09:42 +00:00
length = req_max
2019-09-19 00:53:48 +00:00
2020-08-09 04:09:42 +00:00
path = path.strip("/")
2019-09-19 21:43:14 +00:00
ofs2 = offset + length
2019-09-19 00:53:48 +00:00
file_sz = self.getattr(path)["st_size"]
dbg("read %r |%d| %d:%d max %d", path, length, offset, ofs2, file_sz)
2020-05-15 00:00:49 +02:00
if ofs2 > file_sz:
ofs2 = file_sz
dbg("truncate to |%d| :%d", ofs2 - offset, ofs2)
2020-05-15 00:00:49 +02:00
if file_sz == 0 or offset >= ofs2:
return b""
2019-09-19 00:53:48 +00:00
if self.n_filecache and length <= cache_max:
ret = self.get_cached_file(path, offset, ofs2, file_sz)
else:
ret = self.gw.download_file_range(path, offset, ofs2)
2020-08-07 21:55:40 +00:00
return ret
2024-10-02 21:59:53 +00:00
# !rm.yes>
fn = "cppf-{}-{}-{}".format(time.time(), offset, length)
if False:
with open(fn, "wb", len(ret)) as f:
f.write(ret)
elif self.n_filecache:
ret2 = self.gw.download_file_range(path, offset, ofs2)
if ret != ret2:
info(fn)
for v in [ret, ret2]:
try:
info(len(v))
except:
info("uhh " + repr(v))
with open(fn + ".bad", "wb") as f:
f.write(ret)
with open(fn + ".good", "wb") as f:
f.write(ret2)
raise Exception("cache bork")
return ret
2024-10-02 21:59:53 +00:00
# !rm.no>
2019-09-19 00:53:48 +00:00
def getattr(self, path, fh=None):
dbg("getattr %r", path)
2020-10-25 08:07:17 +01:00
if WINDOWS:
path = enwin(path) # windows occasionally decodes f0xx to xx
2020-08-04 22:50:45 +00:00
2019-09-19 00:53:48 +00:00
path = path.strip("/")
if not path:
ret = self.gw.stat_dir(time.time(), 4096)
dbg("/=%r", ret)
return ret
2019-09-19 00:53:48 +00:00
try:
dirpath, fname = path.rsplit("/", 1)
except:
dirpath = ""
fname = path
cn = self.get_cached_dir(dirpath)
if cn:
dents = cn.data
else:
2020-08-09 00:51:48 +00:00
dbg("cache miss")
2020-08-07 21:55:40 +00:00
dents = self._readdir(dirpath)
2019-09-19 00:53:48 +00:00
try:
ret = dents[fname]
dbg("s=%r", ret)
return ret
except:
pass
2019-09-19 00:53:48 +00:00
2021-01-06 22:54:54 +01:00
fun = info
2021-02-21 02:07:34 +00:00
if MACOS and path.split("/")[-1].startswith("._"):
2021-01-06 22:54:54 +01:00
fun = dbg
2021-02-21 02:07:34 +00:00
fun("=ENOENT %r", path)
2019-09-19 00:53:48 +00:00
raise FuseOSError(errno.ENOENT)
access = None
flush = None
getxattr = None
listxattr = None
open = None
opendir = None
release = None
releasedir = None
statfs = None
2020-08-07 20:00:30 +00:00
2024-10-01 18:35:36 +00:00
# !rm.yes>
2020-08-04 22:50:45 +00:00
if False:
# incorrect semantics but good for debugging stuff like samba and msys2
def access(self, path, mode):
dbg("@@ access [{}] [{}]".format(path, mode))
2020-08-04 22:50:45 +00:00
return 1 if self.getattr(path) else 0
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
def flush(self, path, fh):
dbg("@@ flush [{}] [{}]".format(path, fh))
2020-08-04 22:50:45 +00:00
return True
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
def getxattr(self, *args):
dbg("@@ getxattr [{}]".format("] [".join(str(x) for x in args)))
2020-08-04 22:50:45 +00:00
return False
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
def listxattr(self, *args):
dbg("@@ listxattr [{}]".format("] [".join(str(x) for x in args)))
2020-08-04 22:50:45 +00:00
return False
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
def open(self, path, flags):
dbg("@@ open [{}] [{}]".format(path, flags))
2020-08-04 22:50:45 +00:00
return 42
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
def opendir(self, fh):
dbg("@@ opendir [{}]".format(fh))
2020-08-04 22:50:45 +00:00
return 69
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
def release(self, ino, fi):
dbg("@@ release [{}] [{}]".format(ino, fi))
2020-08-04 22:50:45 +00:00
return True
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
def releasedir(self, ino, fi):
dbg("@@ releasedir [{}] [{}]".format(ino, fi))
2020-08-04 22:50:45 +00:00
return True
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
def statfs(self, path):
dbg("@@ statfs [{}]".format(path))
2020-08-04 22:50:45 +00:00
return {}
2024-10-02 21:59:53 +00:00
2024-10-01 18:35:36 +00:00
# !rm.no>
2020-08-04 22:50:45 +00:00
2020-08-07 20:00:30 +00:00
if sys.platform == "win32":
2020-08-04 22:50:45 +00:00
# quick compat for /mingw64/bin/python3 (msys2)
2020-08-04 23:46:57 +00:00
def _open(self, path):
2020-08-04 22:50:45 +00:00
try:
x = self.getattr(path)
if x["st_mode"] <= 0:
raise Exception()
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
self.junk_fh_ctr += 1
if self.junk_fh_ctr > 32000: # TODO untested
2020-08-04 23:46:57 +00:00
self.junk_fh_ctr = 4
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
return self.junk_fh_ctr
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
except Exception as ex:
info("open ERR %r", ex)
2020-08-04 22:50:45 +00:00
raise FuseOSError(errno.ENOENT)
2020-08-07 20:00:30 +00:00
2020-08-04 23:46:57 +00:00
def open(self, path, flags):
dbg("open %r [%s]", path, flags)
2020-08-04 23:46:57 +00:00
return self._open(path)
2020-08-07 20:00:30 +00:00
2020-08-04 23:46:57 +00:00
def opendir(self, path):
dbg("opendir %r", path)
2020-08-04 23:46:57 +00:00
return self._open(path)
2020-08-07 20:00:30 +00:00
2020-08-04 22:50:45 +00:00
def flush(self, path, fh):
dbg("flush %r [%s]", path, fh)
2020-08-04 22:50:45 +00:00
def release(self, ino, fi):
dbg("release %r [%s]", ino, fi)
2020-08-04 23:46:57 +00:00
def releasedir(self, ino, fi):
dbg("releasedir %r [%s]", ino, fi)
2020-08-07 20:00:30 +00:00
2020-08-04 23:46:57 +00:00
def access(self, path, mode):
dbg("access %r [%s]", path, mode)
2020-08-04 23:46:57 +00:00
try:
x = self.getattr(path)
if x["st_mode"] <= 0:
raise Exception()
except:
raise FuseOSError(errno.ENOENT)
2019-09-19 00:53:48 +00:00
class TheArgparseFormatter(
argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter
):
pass
2019-09-19 00:53:48 +00:00
def main():
global info, dbg, is_dbg
2020-12-01 23:20:44 +01:00
time.strptime("19970815", "%Y%m%d") # python#7980
ver = "{0}, v{1}".format(S_BUILD_DT, S_VERSION)
if "--version" in sys.argv:
print("partyfuse", ver)
return
2020-08-09 04:09:42 +00:00
# filecache helps for reads that are ~64k or smaller;
# windows likes to use 4k and 64k so cache is important,
# linux generally does 128k so the cache is still nice,
# value is numChunks (1~8M each) to keep in the cache
nf = 12
2020-08-09 04:09:42 +00:00
# dircache is always a boost,
# only want to disable it for tests etc,
cdn = 24 # max num dirs; keep larger than max dir depth; 0=disable
cds = 1 # numsec until an entry goes stale
2020-08-09 04:09:42 +00:00
where = "local directory"
if WINDOWS:
where += " or DRIVE:"
ex_pre = "\n " + os.path.basename(__file__) + " "
examples = ["http://192.168.1.69:3923/music/ ./music"]
if WINDOWS:
examples.append("http://192.168.1.69:3923/music/ M:")
ap = argparse.ArgumentParser(
formatter_class=TheArgparseFormatter,
description="mount a copyparty server as a local filesystem -- " + ver,
epilog="example:" + ex_pre + ex_pre.join(examples),
)
# fmt: off
ap.add_argument("base_url", type=str, help="remote copyparty URL to mount")
ap.add_argument("local_path", type=str, help=where + " to mount it on")
ap.add_argument("-a", metavar="PASSWORD", help="password or $filepath")
# !rm.yes>
ap.add_argument("--html", metavar="TYPE", default="", help="which HTML parser to use; cpp, nginx, iis")
# !rm.no>
ap2 = ap.add_argument_group("https/TLS")
ap2.add_argument("-te", metavar="PEMFILE", help="certificate to expect/verify")
ap2.add_argument("-td", action="store_true", help="disable certificate check")
ap2 = ap.add_argument_group("cache/perf")
ap2.add_argument("-cdn", metavar="DIRS", type=float, default=cdn, help="directory-cache, max num dirs; 0=disable")
ap2.add_argument("-cds", metavar="SECS", type=float, default=cds, help="directory-cache, expiration time")
ap2.add_argument("-cf", metavar="BLOCKS", type=int, default=nf, help="file cache; each block is <= 1 MiB")
ap2 = ap.add_argument_group("logging")
ap2.add_argument("-q", action="store_true", help="quiet")
ap2.add_argument("-d", action="store_true", help="debug/verbose")
ap2.add_argument("--slowterm", action="store_true", help="only most recent msgs; good for windows")
ap2.add_argument("--logf", metavar="FILE", type=str, default="", help="log to FILE; enables --slowterm")
ap2 = ap.add_argument_group("fuse")
ap2.add_argument("--oth", action="store_true", help="tell FUSE to '-o allow_other'")
ap2.add_argument("--nonempty", action="store_true", help="tell FUSE to '-o nonempty'")
ar = ap.parse_args()
# fmt: on
if ar.logf:
ar.slowterm = True
# windows terminals are slow (cmd.exe, mintty)
# otoh fancy_log beats RecentLog on linux
logger = RecentLog(ar).put if ar.slowterm else fancy_log
if ar.d:
info = logger
dbg = logger
is_dbg = True
elif not ar.q:
info = logger
2019-09-19 00:53:48 +00:00
2021-03-13 00:14:22 +01:00
if ar.a and ar.a.startswith("$"):
fn = ar.a[1:]
info("reading password from file %r", fn)
2021-03-13 00:14:22 +01:00
with open(fn, "rb") as f:
ar.a = f.read().decode("utf-8").strip()
2020-08-06 18:22:25 +00:00
if WINDOWS:
2021-02-04 01:00:41 +01:00
os.system("rem")
2020-08-07 20:00:30 +00:00
2020-10-25 08:07:17 +01:00
for ch in '<>:"\\|?*':
# microsoft maps illegal characters to f0xx
# (e000 to f8ff is basic-plane private-use)
bad_good[ch] = chr(ord(ch) + 0xF000)
for n in range(0, 0x100):
# map surrogateescape to another private-use area
bad_good[chr(n + 0xDC00)] = chr(n + 0xF100)
for k, v in bad_good.items():
good_bad[v] = k
register_wtf8()
args = {"foreground": True, "nothreads": True}
if ar.oth:
args["allow_other"] = True
if ar.nonempty:
2020-08-16 23:04:10 +00:00
args["nonempty"] = True
2020-10-25 08:07:17 +01:00
FUSE(CPPF(ar), ar.local_path, encoding="wtf-8", **args)
2019-09-19 00:53:48 +00:00
if __name__ == "__main__":
main()