from __future__ import annotations import abc import ast import collections import copy import fnmatch import gzip import io import logging import pickle import re import typing import weakref from defusedxml import minidom from hippolyzer.lib.base import serialization as se, llsd from hippolyzer.lib.base.message.llsd_msg_serializer import LLSDMessageSerializer from hippolyzer.lib.base.message.message import Message from hippolyzer.lib.base.datatypes import TaggedUnion, UUID, TupleCoord from hippolyzer.lib.base.helpers import bytes_escape from hippolyzer.lib.base.message.message_formatting import HumanMessageSerializer from hippolyzer.lib.base.message.msgtypes import PacketFlags from hippolyzer.lib.base.message.template_dict import DEFAULT_TEMPLATE_DICT from hippolyzer.lib.base.network.transport import Direction from hippolyzer.lib.proxy.message_filter import MetaFieldSpecifier, compile_filter, BaseFilterNode, MessageFilterNode, \ EnumFieldSpecifier, MatchResult from hippolyzer.lib.proxy.http_flow import HippoHTTPFlow from hippolyzer.lib.proxy.caps import CapType, SerializedCapData if typing.TYPE_CHECKING: from hippolyzer.lib.proxy.region import ProxiedRegion from hippolyzer.lib.proxy.sessions import Session LOG = logging.getLogger(__name__) class BaseMessageLogger: paused: bool def log_lludp_message(self, session: Session, region: ProxiedRegion, message: Message): if self.paused: return False return self.add_log_entry(LLUDPMessageLogEntry(message, region, session)) def log_http_response(self, flow: HippoHTTPFlow): if self.paused: return False # These are huge, let's not log them for now. if flow.cap_data and flow.cap_data.asset_server_cap: return False return self.add_log_entry(HTTPMessageLogEntry(flow)) def log_eq_event(self, session: Session, region: ProxiedRegion, event: dict): if self.paused: return False return self.add_log_entry(EQMessageLogEntry(event, region, session)) @abc.abstractmethod def add_log_entry(self, entry: AbstractMessageLogEntry): pass class FilteringMessageLogger(BaseMessageLogger): def __init__(self, maxlen=2000): BaseMessageLogger.__init__(self) self._raw_entries = collections.deque(maxlen=maxlen) self._filtered_entries: typing.List[AbstractMessageLogEntry] = [] self.paused = False self.filter: BaseFilterNode = compile_filter("") def __iter__(self) -> typing.Iterator[AbstractMessageLogEntry]: return iter(self._filtered_entries) def set_filter(self, filter_str: str): self.filter = compile_filter(filter_str) self._begin_reset() # Keep any entries that've aged out of the raw entries list that # match the new filter self._filtered_entries = [ m for m in self._filtered_entries if m not in self._raw_entries and self.filter.match(m) ] self._filtered_entries.extend((m for m in self._raw_entries if self.filter.match(m))) self._end_reset() def set_paused(self, paused: bool): self.paused = paused # Hooks that Qt models will want to implement def _begin_insert(self, insert_idx: int): pass def _end_insert(self): pass def _begin_reset(self): pass def _end_reset(self): pass def add_log_entry(self, entry: AbstractMessageLogEntry): try: # Paused, throw it away. if self.paused: return False self._raw_entries.append(entry) if self.filter.match(entry): next_idx = len(self._filtered_entries) self._begin_insert(next_idx) self._filtered_entries.append(entry) self._end_insert() return True except Exception: LOG.exception("Failed to filter queued message") return False def clear(self): self._begin_reset() self._filtered_entries.clear() self._raw_entries.clear() self._end_reset() class WrappingMessageLogger(BaseMessageLogger): def __init__(self): self.loggers: typing.List[BaseMessageLogger] = [] @property def paused(self): return all(x.paused for x in self.loggers) def add_log_entry(self, entry: AbstractMessageLogEntry): logged = False for logger in self.loggers: if logger.add_log_entry(entry): logged = True # At least one logger ended up keeping the message around, so let's # cache the summary before we freeze the message. if logged: entry.cache_summary() entry.freeze() class AbstractMessageLogEntry(abc.ABC): region: typing.Optional[ProxiedRegion] session: typing.Optional[Session] name: str type: str __slots__ = ["_region", "_session", "_region_name", "_agent_id", "_summary", "meta"] def __init__(self, region: typing.Optional[ProxiedRegion], session: typing.Optional[Session]): if region and not isinstance(region, weakref.ReferenceType): region = weakref.ref(region) if session and not isinstance(session, weakref.ReferenceType): session = weakref.ref(session) self._region: typing.Optional[weakref.ReferenceType] = region self._session: typing.Optional[weakref.ReferenceType] = session self._region_name = None self._agent_id = None self._summary = None if self.region: self._region_name = self.region.name if self.session: self._agent_id = self.session.agent_id agent_obj = None if self.region is not None: agent_obj = self.region.objects.lookup_fullid(self.agent_id) self.meta = { "RegionName": self.region_name, "AgentID": self.agent_id, "SessionID": self.session.id if self.session else None, "AgentLocal": agent_obj.LocalID if agent_obj is not None else None, "Method": self.method, "Type": self.type, "SelectedLocal": self._current_selected_local(), "SelectedFull": self._current_selected_full(), } def to_dict(self) -> dict: meta = self.meta.copy() def _dehydrate_meta_uuid(key: str): if meta[key]: meta[key] = str(meta[key]) _dehydrate_meta_uuid("AgentID") _dehydrate_meta_uuid("SelectedFull") _dehydrate_meta_uuid("SessionID") return { "type": self.type, "region_name": self.region_name, "agent_id": str(self.agent_id) if self.agent_id is not None else None, "summary": self.summary, "meta": meta, } @classmethod @abc.abstractmethod def from_dict(cls, val: dict): pass def apply_dict(self, val: dict) -> None: self._region_name = val['region_name'] self._agent_id = UUID(val['agent_id']) if val['agent_id'] else None self._summary = val['summary'] meta = val['meta'].copy() def _hydrate_meta_uuid(key: str): if meta[key]: meta[key] = UUID(meta[key]) _hydrate_meta_uuid("AgentID") _hydrate_meta_uuid("SelectedFull") _hydrate_meta_uuid("SessionID") self.meta.update(meta) def freeze(self): pass def cache_summary(self): self._summary = self.summary def _current_selected_local(self): if self.session: return self.session.selected.object_local return None def _current_selected_full(self): selected_local = self._current_selected_local() if selected_local is None or self.region is None: return None obj = self.region.objects.lookup_localid(selected_local) return obj and obj.FullID def _get_meta(self, name: str) -> typing.Any: # Slight difference in semantics. Filters are meant to return the same # thing no matter when they're run, so SelectedLocal and friends resolve # to the selected items _at the time the message was logged_. To handle # the case where we want to match on the selected object at the time the # filter is evaluated, we resolve these here. if name == "CurrentSelectedLocal": return self._current_selected_local() elif name == "CurrentSelectedFull": return self._current_selected_full() return self.meta.get(name) @property def region(self) -> typing.Optional[ProxiedRegion]: if self._region: return self._region() return None @property def session(self) -> typing.Optional[Session]: if self._session: return self._session() return None @property def region_name(self) -> str: region = self.region if region: self._region_name = region.name return self._region_name # Region may die after a message is logged, need to keep this around. if self._region_name: return self._region_name return "" @property def agent_id(self) -> typing.Optional[UUID]: if self._agent_id: return self._agent_id session = self.session if session: self._agent_id = session.agent_id return self._agent_id return None @property def host(self) -> str: region_name = self.region_name if not region_name: return "" session_str = "" agent_id = self.agent_id if agent_id: session_str = f" ({agent_id})" return region_name + session_str def request(self, beautify=False, replacements=None): return None def response(self, beautify=False): return None def _packet_root_matches(self, pattern): if fnmatch.fnmatchcase(self.name, pattern): return True if fnmatch.fnmatchcase(self.type, pattern): return True return False def _val_matches(self, operator, val, expected): if isinstance(expected, MetaFieldSpecifier): if len(expected) != 1: raise ValueError(f"Can only support single-level Meta specifiers, not {expected!r}") expected = self._get_meta(str(expected[0])) if not isinstance(expected, (int, float, bytes, str, type(None), tuple)): if callable(expected): expected = expected() else: expected = str(expected) elif isinstance(expected, EnumFieldSpecifier): # Local import so we get a fresh copy of the templates module from hippolyzer.lib.proxy import templates enum_cls = getattr(templates, expected.enum_name) expected = enum_cls[expected.field_name] elif expected is not None: # Unbox the expected value expected = expected.value if not isinstance(val, (int, float, bytes, str, type(None), tuple, TupleCoord)): val = str(val) if not operator: return bool(val) elif operator == "==": return val == expected elif operator == "!=": return val != expected elif operator == "^=": if val is None: return False return val.startswith(expected) elif operator == "$=": if val is None: return False return val.endswith(expected) elif operator == "~=": if val is None: return False return expected in val elif operator == "<": return val < expected elif operator == "<=": return val <= expected elif operator == ">": return val > expected elif operator == ">=": return val >= expected elif operator == "&": return val & expected else: raise ValueError(f"Unexpected operator {operator!r}") def _base_matches(self, matcher: "MessageFilterNode") -> typing.Optional[bool]: if len(matcher.selector) == 1: # Comparison operators would make no sense here if matcher.value or matcher.operator: return False return self._packet_root_matches(matcher.selector[0]) if matcher.selector[0] == "Meta": if len(matcher.selector) == 2: return self._val_matches(matcher.operator, self._get_meta(matcher.selector[1]), matcher.value) elif len(matcher.selector) == 3: meta_dict = self._get_meta(matcher.selector[1]) if not meta_dict or not hasattr(meta_dict, 'get'): return False return self._val_matches(matcher.operator, meta_dict.get(matcher.selector[2]), matcher.value) return None def matches(self, matcher: "MessageFilterNode", short_circuit=True) -> "MatchResult": return MatchResult(self._base_matches(matcher) or False, []) @property def seq(self): return "" @property def method(self): return "" @property def summary(self): return "" @staticmethod def _format_llsd(parsed): xmlified = llsd.format_pretty_xml(parsed) # dedent by 1 for easier visual scanning xmlified = re.sub(rb" ", b"", xmlified) return xmlified.decode("utf8", errors="replace") @staticmethod def _format_xml(content): beautified = minidom.parseString(content).toprettyxml(indent=" ") # kill blank lines. will break cdata sections. meh. beautified = re.sub(r'\n\s*\n', '\n', beautified, flags=re.MULTILINE) return re.sub(r'<(\w+)>\s*', r'<\1>', beautified, flags=re.MULTILINE) class HTTPMessageLogEntry(AbstractMessageLogEntry): __slots__ = ["flow"] def __init__(self, flow: HippoHTTPFlow): self.flow: HippoHTTPFlow = flow cap_data = self.flow.cap_data region = cap_data and cap_data.region session = cap_data and cap_data.session super().__init__(region, session) # This was a request the proxy made through itself self.meta["Synthetic"] = flow.request_injected @property def type(self): return "HTTP" @property def name(self): cap_data = self.flow.cap_data name = cap_data and cap_data.cap_name if name: return name return self.flow.request.url @property def method(self): return self.flow.request.method def _format_http_message(self, want_request, beautify): message = self.flow.request if want_request else self.flow.response method = self.flow.request.method buf = io.StringIO() cap_data = self.flow.cap_data cap_name = cap_data and cap_data.cap_name base_url = cap_name and cap_data.base_url temporary_cap = cap_data and cap_data.type == CapType.TEMPORARY beautify_url = (beautify and base_url and cap_name and not temporary_cap and self.session and want_request) if want_request: buf.write(message.method) buf.write(" ") if beautify_url: buf.write(f"[[{cap_name}]]{message.url[len(base_url):]}") else: buf.write(message.url) buf.write(" ") buf.write(message.http_version) else: buf.write(message.http_version) buf.write(" ") buf.write(str(message.status_code)) buf.write(" ") buf.write(message.reason) buf.write("\r\n") if beautify_url: buf.write("# ") buf.write(message.url) buf.write("\r\n") headers = copy.deepcopy(message.headers) for key in tuple(headers.keys()): if key.lower().startswith("x-hippo-"): LOG.warning(f"Internal header {key!r} leaked out?") # If this header actually came from somewhere untrusted, we can't # include it. It may change the meaning of the message when replayed. headers[f"X-Untrusted-{key}"] = headers[key] headers.pop(key) beautified = None if beautify and message.content: try: serializer = se.HTTP_SERIALIZERS.get(cap_name) if serializer: if want_request: beautified = serializer.deserialize_req_body(method, message.content) else: beautified = serializer.deserialize_resp_body(method, message.content) if beautified is se.UNSERIALIZABLE: beautified = None else: beautified = self._format_llsd(beautified) headers["X-Hippo-Beautify"] = "1" if not beautified: content_type = self._guess_content_type(message) if content_type.startswith("application/llsd"): try: beautified = self._format_llsd(llsd.parse(message.content)) except llsd.LLSDParseError: # Sometimes LL sends plain XML with a Content-Type of application/llsd+xml. # Try to detect that case and work around it if content_type == "application/llsd+xml" and message.content.startswith(b'<'): beautified = self._format_xml(message.content) else: raise elif any(content_type.startswith(x) for x in ("application/xml", "text/xml")): beautified = self._format_xml(message.content) except: LOG.exception("Failed to beautify message") message_body = beautified or message.content if isinstance(message_body, bytes): try: decoded = message.text # Valid in many codecs, but unprintable. if "\x00" in decoded: raise ValueError("Embedded null") message_body = decoded except (UnicodeError, ValueError): # non-printable characters, return the escaped version. headers["X-Hippo-Escaped-Body"] = "1" message_body = bytes_escape(message_body).decode("utf8") buf.write(bytes(headers).decode("utf8", errors="replace")) buf.write("\r\n") buf.write(message_body or "") return buf.getvalue() def request(self, beautify=False, replacements=None): return self._format_http_message(want_request=True, beautify=beautify) def response(self, beautify=False): return self._format_http_message(want_request=False, beautify=beautify) @property def summary(self): if self._summary is not None: return self._summary msg = self.flow.response self._summary = f"{msg.status_code}: " if not msg.content: return self._summary if len(msg.content) > 1000000: self._summary += "[too large...]" return self._summary content_type = self._guess_content_type(msg) if content_type.startswith("application/llsd"): notation = llsd.format_notation(llsd.parse(msg.content)) self._summary += notation.decode("utf8")[:500] return self._summary def _guess_content_type(self, message): # SL's login service lies and says that its XML-RPC response is LLSD+XML. # It is not, and it blows up the parser. It's been broken ever since the # login rewrite and a fix is likely not forthcoming. I'm sick of seeing # the traceback, so just hack around it. if self.name == "LoginRequest": return "application/xml" content_type = message.headers.get("Content-Type", "") if not message.content or content_type.startswith("application/llsd"): return content_type # Sometimes gets sent with `text/plain` or `text/html`. Cool. if message.content.startswith(rb''): return "application/llsd+xml" if message.content.startswith(rb''): return "application/llsd+xml" if message.content.startswith(rb' typing.Any: lower_name = name.lower() if lower_name == "url": return self.flow.request.url elif lower_name == "reqheaders": return self.flow.request.headers elif lower_name == "respheaders": return self.flow.response.headers elif lower_name == "host": return self.flow.request.host.lower() elif lower_name == "status": return self.flow.response.status_code return super()._get_meta(name) def to_dict(self): val = super().to_dict() val['flow'] = self.flow.get_state() cap_data = val['flow'].get('metadata', {}).get('cap_data_ser') if cap_data is not None: # Have to convert this from a namedtuple to a dict to make # it importable cap_dict = cap_data._asdict() # noqa val['flow']['metadata']['cap_data_ser'] = cap_dict return val @classmethod def from_dict(cls, val: dict): cap_data = val['flow'].get('metadata', {}).get('cap_data_ser') if cap_data: val['flow']['metadata']['cap_data_ser'] = SerializedCapData(**cap_data) ev = cls(HippoHTTPFlow.from_state(val['flow'], None)) ev.apply_dict(val) return ev class EQMessageLogEntry(AbstractMessageLogEntry): __slots__ = ["event"] def __init__(self, event, region, session): super().__init__(region, session) self.event = event @property def type(self): return "EQ" def request(self, beautify=False, replacements=None): # TODO: This is a bit of a hack! Templated messages can be sent over the EQ, so let's # display them as template messages if that's what they are. if self.event['message'] in DEFAULT_TEMPLATE_DICT.message_templates: msg = LLSDMessageSerializer().deserialize(self.event) msg.synthetic = True msg.send_flags = PacketFlags.EQ msg.direction = Direction.IN # Annoyingly, templated messages sent over the EQ can have extra fields not specified # in the template, and this is often the case. ParcelProperties has fields that aren't # in the template. Luckily, we don't really care about extra fields, we just may not # be able to automatically decode U32 and friends without the hint from the template # that that is what they are. return HumanMessageSerializer.to_human_string(msg, replacements, beautify) return f'EQ {self.event["message"]}\n\n{self._format_llsd(self.event["body"])}' @property def name(self): return self.event["message"] @property def summary(self): if self._summary is not None: return self._summary self._summary = "" self._summary = llsd.format_notation(self.event["body"]).decode("utf8")[:500] return self._summary def to_dict(self) -> dict: val = super().to_dict() val['event'] = llsd.format_notation(self.event) return val @classmethod def from_dict(cls, val: dict): ev = cls(llsd.parse_notation(val['event']), None, None) ev.apply_dict(val) return ev class LLUDPMessageLogEntry(AbstractMessageLogEntry): __slots__ = ["_message", "_name", "_direction", "_frozen_message", "_seq", "_deserializer"] def __init__(self, message: Message, region, session): self._message: Message = message self._deserializer = None self._name = message.name self._direction = message.direction self._frozen_message: typing.Optional[bytes] = None self._seq = message.packet_id super().__init__(region, session) _MESSAGE_META_ATTRS = { "Synthetic", "Dropped", "Extra", "Resent", "Zerocoded", "Acks", "Reliable", } def _get_meta(self, name: str): # These may change between when the message is logged and when we # actually filter on it, since logging happens before addons. msg = self.message if name in self._MESSAGE_META_ATTRS: return getattr(msg, name.lower(), None) msg_meta = getattr(msg, "meta", None) if msg_meta is not None: if name in msg_meta: return msg_meta[name] return super()._get_meta(name) @property def message(self) -> Message: if self._message: return self._message elif self._frozen_message: message = pickle.loads(self._frozen_message) message.deserializer = self._deserializer return message else: raise ValueError("Didn't have a fresh or frozen message somehow") def freeze(self): message = self.message message.invalidate_caches() # These are expensive to keep around. pickle them and un-pickle on # an as-needed basis. self._deserializer = self.message.deserializer message.deserializer = None try: self._frozen_message = pickle.dumps(self._message, protocol=pickle.HIGHEST_PROTOCOL) finally: message.deserializer = self._deserializer self._message = None @property def type(self): return "LLUDP" @property def name(self): if self._message: self._name = self._message.name return self._name @property def method(self): if self._message: self._direction = self._message.direction return self._direction.name if self._direction is not None else "" def request(self, beautify=False, replacements=None): return HumanMessageSerializer.to_human_string(self.message, replacements, beautify) def matches(self, matcher, short_circuit=True) -> "MatchResult": base_matched = self._base_matches(matcher) if base_matched is not None: return MatchResult(base_matched, []) if not self._packet_root_matches(matcher.selector[0]): return MatchResult(False, []) message = self.message selector_len = len(matcher.selector) # name, block_name, var_name(, subfield_name)? if selector_len not in (3, 4): return MatchResult(False, []) found_field_keys = [] for block_name in message.blocks: if not fnmatch.fnmatchcase(block_name, matcher.selector[1]): continue for block_num, block in enumerate(message[block_name]): for var_name in block.vars.keys(): if not fnmatch.fnmatchcase(var_name, matcher.selector[2]): continue # So we know where the match happened field_key = (message.name, block_name, block_num, var_name) if selector_len == 3: # We're just matching on the var existing, not having any particular value if matcher.value is None: found_field_keys.append(field_key) elif self._val_matches(matcher.operator, block[var_name], matcher.value): found_field_keys.append(field_key) # Need to invoke a special unpacker elif selector_len == 4: try: deserialized = block.deserialize_var(var_name) except KeyError: continue # Discard the tag if this is a tagged union, we only want the value if isinstance(deserialized, TaggedUnion): deserialized = deserialized.value if not isinstance(deserialized, dict): continue for key in deserialized.keys(): if fnmatch.fnmatchcase(str(key), matcher.selector[3]): if matcher.value is None: # Short-circuiting checking individual subfields is fine since # we only highlight fields anyway. found_field_keys.append(field_key) break elif self._val_matches(matcher.operator, deserialized[key], matcher.value): found_field_keys.append(field_key) break if short_circuit and found_field_keys: return MatchResult(True, found_field_keys) return MatchResult(bool(found_field_keys), found_field_keys) @property def summary(self): if self._summary is None: self._summary = self.message.to_summary()[:500] return self._summary @property def seq(self): if self._message: self._seq = self._message.packet_id return self._seq def to_dict(self): val = super().to_dict() val['message'] = llsd.format_notation(self.message.to_dict(extended=True)) return val @classmethod def from_dict(cls, val: dict): ev = cls(Message.from_dict(llsd.parse_notation(val['message'])), None, None) ev.apply_dict(val) return ev def export_log_entries(entries: typing.Iterable[AbstractMessageLogEntry]) -> bytes: return gzip.compress(repr([e.to_dict() for e in entries]).encode("utf8")) _TYPE_CLASSES = { "HTTP": HTTPMessageLogEntry, "LLUDP": LLUDPMessageLogEntry, "EQ": EQMessageLogEntry, } def import_log_entries(data: bytes) -> typing.List[AbstractMessageLogEntry]: entries = ast.literal_eval(gzip.decompress(data).decode("utf8")) return [_TYPE_CLASSES[e['type']].from_dict(e) for e in entries]