From ead76171939de88d310be86384e88ef191cf12a3 Mon Sep 17 00:00:00 2001 From: Thomas Calmant Date: Sun, 31 May 2026 23:35:51 +0200 Subject: [PATCH 1/8] Ignore uv files Signed-off-by: Thomas Calmant --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 9711698..09add18 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,7 @@ nosetests.xml /issue*/ /repro*.py /test*.py + +# uv +.venv +uv.lock From a7343121a6add33829332d0d476161d1a6068972 Mon Sep 17 00:00:00 2001 From: Thomas Calmant Date: Sun, 31 May 2026 23:53:32 +0200 Subject: [PATCH 2/8] First iteration of v3 Signed-off-by: Thomas Calmant --- README.md | 155 ++++++- javaobj/v3/__init__.py | 195 +++++++++ javaobj/v3/_compat.py | 307 ++++++++++++++ javaobj/v3/beans.py | 550 +++++++++++++++++++++++++ javaobj/v3/exceptions.py | 105 +++++ javaobj/v3/parser.py | 804 +++++++++++++++++++++++++++++++++++++ javaobj/v3/reader.py | 223 ++++++++++ javaobj/v3/transformers.py | 569 ++++++++++++++++++++++++++ tests/test_v3.py | 677 +++++++++++++++++++++++++++++++ 9 files changed, 3584 insertions(+), 1 deletion(-) create mode 100644 javaobj/v3/__init__.py create mode 100644 javaobj/v3/_compat.py create mode 100644 javaobj/v3/beans.py create mode 100644 javaobj/v3/exceptions.py create mode 100644 javaobj/v3/parser.py create mode 100644 javaobj/v3/reader.py create mode 100644 javaobj/v3/transformers.py create mode 100644 tests/test_v3.py diff --git a/README.md b/README.md index 4385a0b..5866a86 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,18 @@ it, and avoids a mismatch between the referenced object and the transformed one. The `v2` implementation provides a new API for the object transformers. Please look at the *Usage (V2)* section in this file. +### Object transformers V3 + +| Implementations | Version | +|-----------------|----------| +| `v3` | `0.5.0+` | + +The `v3` implementation is a full rewrite targeting **Python 3.12+**. +It uses `dataclasses`, structural pattern matching (`match/case`) and PEP 604 +union types. Its API is intentionally similar to `v2` but fixes several +correctness issues and adds stricter safety limits. +Please look at the *Usage (V3)* and *Migration to V3* sections in this file. + ### Bytes arrays | Implementations | Version | @@ -98,7 +110,8 @@ You can find a sample usage in the *Custom Transformer* section in this file. ## Requirements -* Python >= 2.7 or Python >= 3.4 +* Python >= 2.7 or Python >= 3.4 for `v1` and `v2` +* Python >= 3.12 for `v3` * `enum34` and `typing` when using Python <= 3.4 (installable with `pip`) * Maven 2+ (for building test data of serialized objects. You can skip it if you do not plan to run `tests.py`) @@ -480,3 +493,143 @@ pobj = javaobj.loads("custom_objects.ser", *transformers) # it's static. See: https://stackoverflow.com/a/16477421/12621168 print(pobj.field_data["int_not_in_fields"]) ``` + +## Usage (V3 implementation) + +> **Requires Python 3.12+.** + +The `javaobj.v3` package is a full rewrite of the Java object stream parser. +It provides the same two entry-points as `v2`: + +* `load(fd, *transformers, use_numpy_arrays=False, max_array_size=…, max_depth=500)`: + Parses a binary file descriptor opened in `rb` mode and returns the top-level + object if the stream contains exactly one, a list of objects if there are + several, or `None` for an empty stream. Pass additional `ObjectTransformer` + instances as positional arguments. + +* `loads(data, *transformers, …)`: + Convenience wrapper around `load()` that accepts `bytes`. + +Sample usage: + +```python +import javaobj.v3 as javaobj + +with open("obj5.ser", "rb") as fd: + pobj = javaobj.load(fd) + +# Access fields by name (preferred) +value = pobj.get_field("myField") + +# Or use attribute-style access (issues a warning on ambiguity) +value = pobj.myField +``` + +### New features in V3 + +| Feature | V1 | V2 | V3 | +|---|---|---|---| +| Python 3.12+ (`match/case`, PEP 604) | ✗ | ✗ | ✓ | +| Fully typed (`dataclasses`, `TypeAlias`) | ✗ | partial | ✓ | +| `TC_RESET` handling | ✗ | ✗ | ✓ | +| `TC_EXCEPTION` in object graph | ✗ | ✗ | ✓ | +| `TC_PROXYCLASSDESC` | ✗ | ✓ | ✓ | +| Security limits (max depth / array size) | ✗ | ✗ | ✓ | +| Correct `TYPE_CHAR` numpy dtype (`>u2`) | ✗ | ✗ | ✓ | +| Typed exception hierarchy | ✗ | ✗ | ✓ | +| `BlockData.__eq__(bytes)` compatibility | ✓ | ✓ | ✓ | + +### Security limits + +`v3` adds two optional safety limits that prevent resource exhaustion when +parsing untrusted streams: + +```python +import javaobj.v3 as javaobj + +with open("untrusted.ser", "rb") as fd: + pobj = javaobj.load( + fd, + max_array_size=10 * 1024 * 1024, # 10 MiB max per array + max_depth=100, # max object-graph depth + ) +``` + +### Object Transformer V3 + +The `ObjectTransformer` base class in `v3` has the same three override points +as in `v2`: + +* `create_instance(classdesc)` — return a `JavaInstance` subclass (or `None` + to fall back to the next transformer). +* `load_array(reader, type_code, size)` — called for `TC_ARRAY` records; + return the array data (`bytes` or `list`) or `None` to use the default logic. +* `load_custom_writeObject(parser, reader, class_name)` — called when a + class written with `writeObject()` requires fully custom parsing. + +The `DefaultObjectTransformer` additionally exposes a public `handles(name)` +method that returns `True` when the transformer knows how to load the given +Java class name. + +### Using NumPy arrays (V3) + +```python +import javaobj.v3 as javaobj + +with open("arrays.ser", "rb") as fd: + pobj = javaobj.load(fd, use_numpy_arrays=True) +``` + +When `use_numpy_arrays=True`, a `NumpyArrayTransformer` is appended to the +transformer list and primitive arrays are returned as `numpy.ndarray`. + +--- + +## Migration to V3 + +### From V1 to V3 + +| V1 | V3 | +|---|---| +| `import javaobj` | `import javaobj.v3 as javaobj` | +| `pobj.classdesc.name` | `pobj.classdesc.name` (unchanged) | +| `pobj.myField` (direct attribute) | `pobj.get_field("myField")` (preferred) or `pobj.myField` | +| `pobj._data` on arrays | `pobj.data` (public) | +| `javaobj.JavaObjectUnmarshaller` | removed — use `javaobj.v3.parser.JavaStreamParser` | +| `javaobj.JavaObjectMarshaller` | marshalling not available in `v3` | +| Exceptions: bare `Exception` | Typed: `ParseError`, `UnexpectedOpcodeError`, … | + +Shallow conversion helper (best-effort, for gradual migration): + +```python +from javaobj.v3._compat import v1_to_v3 +v3_obj = v1_to_v3(v1_obj) +``` + +### From V2 to V3 + +| V2 | V3 | +|---|---| +| `import javaobj.v2 as javaobj` | `import javaobj.v3 as javaobj` | +| `javaobj.load(fd)` | `javaobj.load(fd)` (same signature) | +| `javaobj.loads(data)` | `javaobj.loads(data)` (same signature) | +| `pobj.classdesc.name` | `pobj.classdesc.name` (unchanged) | +| `pobj.field_data[cd][field]` | `pobj.field_data[cd][field]` (unchanged) | +| `pobj.get_field("name")` | `pobj.get_field("name")` (unchanged) | +| `pobj.__getattr__` ambiguity silent | warns when field exists in multiple classes | +| `transformer._type_mapper` (private) | `transformer.handles(name)` (public) | +| `JavaArray.data` (`tuple` of ints for bytes) | `JavaArray.data` (`bytes` for `TYPE_BYTE`) | +| `BlockData` compared with `bytes` | `BlockData.__eq__(bytes)` still works | +| `use_numpy_arrays=True` (v2 option) | `use_numpy_arrays=True` (same) | +| No depth/size limits | `max_depth=500`, `max_array_size=100 MiB` | +| No typed exceptions | `ParseError`, `SecurityError`, … | + +Shallow conversion helper (best-effort, for gradual migration): + +```python +from javaobj.v3._compat import v2_to_v3 +v3_obj = v2_to_v3(v2_obj) +``` + +> **Note:** `v3` requires **Python 3.12+** and does **not** support marshalling +> (writing). If you need to write Java object streams, use `v1`. diff --git a/javaobj/v3/__init__.py b/javaobj/v3/__init__.py new file mode 100644 index 0000000..e54d121 --- /dev/null +++ b/javaobj/v3/__init__.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +""" +Rewritten version of the un-marshalling process of javaobj (v3) + +This package targets Python 3.12+ and provides fully typed parsing of the +Java Object Serialization stream format, in read-only mode. + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.5.0 +:status: Alpha + +.. + + Copyright 2026 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# Standard library +from io import BytesIO +from typing import IO, Any + +# Javaobj +from ..utils import java_data_fd + +# Also expose the beans sub-module so that ``javaobj.v3.beans.JavaInstance`` +# works out of the box (same pattern as v2). +from . import beans # noqa: F401 +from .beans import ( + BlockData, + ClassDataType, + ClassDescType, + ExceptionState, + FieldType, + JavaArray, + JavaClass, + JavaClassDesc, + JavaEnum, + JavaField, + JavaInstance, + JavaString, + ParsedContent, +) +from .exceptions import ( + JavaObjError, + ParseError, + SecurityError, + UnexpectedOpcodeError, + UnsupportedFeatureError, +) +from .parser import JavaStreamParser +from .reader import DataReader +from .transformers import ( + DefaultObjectTransformer, + NumpyArrayTransformer, + ObjectTransformer, +) + +__all__ = [ + # Entry points + "load", + "loads", + # Transformer API + "ObjectTransformer", + "DefaultObjectTransformer", + "NumpyArrayTransformer", + # Bean types + "JavaInstance", + "JavaArray", + "JavaString", + "JavaEnum", + "JavaClass", + "JavaClassDesc", + "JavaField", + "BlockData", + "ExceptionState", + "FieldType", + "ClassDataType", + "ClassDescType", + "ParsedContent", + # Parser + "JavaStreamParser", + "DataReader", + # Exceptions + "JavaObjError", + "ParseError", + "UnexpectedOpcodeError", + "UnsupportedFeatureError", + "SecurityError", +] + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 5, 0) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ +# Public API +# ------------------------------------------------------------------------------ + + +def load( + file_object: IO[bytes], + *transformers: ObjectTransformer, + use_numpy_arrays: bool = False, + max_array_size: int = DataReader.DEFAULT_MAX_ARRAY_SIZE, + max_depth: int = DataReader.DEFAULT_MAX_DEPTH, +) -> Any: + """ + Deserializes Java object(s) from a binary file-like object. + + The stream is automatically decompressed if it is GZip-compressed. + + :param file_object: A readable binary stream containing a Java serialized + object stream (magic ``0xACED 0x0005``). + :param transformers: Zero or more custom :class:`ObjectTransformer` + instances. A :class:`DefaultObjectTransformer` is + always added unless one is already present. + :param use_numpy_arrays: When ``True`` and *numpy* is installed, primitive + arrays are loaded as ``numpy.ndarray`` objects. + :param max_array_size: Maximum bytes for a single array allocation. + :param max_depth: Maximum object-graph recursion depth. + :return: The parsed object if the stream contains exactly one top-level + object, or a list of objects if there are several. + Returns ``None`` for an empty stream. + :raises ParseError: If the stream is malformed. + :raises SecurityError: If a safety limit is exceeded. + :raises UnsupportedFeatureError: If an unsupported protocol feature is + encountered. + """ + # Auto-decompress GZip streams + fd = java_data_fd(file_object) + + # Build transformer list, ensuring DefaultObjectTransformer is present + all_transformers: list[ObjectTransformer] = list(transformers) + if not any(isinstance(t, DefaultObjectTransformer) for t in all_transformers): + all_transformers.append(DefaultObjectTransformer()) + + if use_numpy_arrays: + all_transformers.append(NumpyArrayTransformer()) + + parser = JavaStreamParser( + fd, + all_transformers, + max_array_size=max_array_size, + max_depth=max_depth, + ) + contents = parser.run() + + if not contents: + return None + if len(contents) == 1: + return contents[0] + return contents + + +def loads( + data: bytes, + *transformers: ObjectTransformer, + use_numpy_arrays: bool = False, + max_array_size: int = DataReader.DEFAULT_MAX_ARRAY_SIZE, + max_depth: int = DataReader.DEFAULT_MAX_DEPTH, +) -> Any: + """ + Deserializes Java object(s) from a :class:`bytes` object. + + :param data: Raw bytes of a Java serialized stream. + :param transformers: Optional custom transformers (see :func:`load`). + :param use_numpy_arrays: See :func:`load`. + :param max_array_size: See :func:`load`. + :param max_depth: See :func:`load`. + :return: Parsed object or list of objects (see :func:`load`). + """ + return load( + BytesIO(data), + *transformers, + use_numpy_arrays=use_numpy_arrays, + max_array_size=max_array_size, + max_depth=max_depth, + ) diff --git a/javaobj/v3/_compat.py b/javaobj/v3/_compat.py new file mode 100644 index 0000000..427d3a7 --- /dev/null +++ b/javaobj/v3/_compat.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 +""" +Migration helpers from javaobj v1 / v2 to v3 + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.5.0 +:status: Alpha + +.. + + Copyright 2026 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# Standard library +from typing import Any + +# Javaobj +from .beans import ( + FieldType, + JavaArray, + JavaClassDesc, + JavaEnum, + JavaField, + JavaInstance, + JavaString, + ParsedContent, +) +from .exceptions import JavaObjError + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 5, 0) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + +__all__ = [ + "v2_to_v3", + "v1_to_v3", + "V1CompatMixin", + "V2CompatMixin", +] + + +# ------------------------------------------------------------------------------ +# v2 → v3 adapter +# ------------------------------------------------------------------------------ + + +def v2_to_v3(v2_obj: Any) -> ParsedContent: + """ + Converts a javaobj **v2** top-level object to the nearest v3 equivalent. + + For types that map directly (e.g. ``javaobj.v2.beans.JavaInstance`` → + ``javaobj.v3.beans.JavaInstance``) the fields are copied shallowly. + Nested objects are **not** recursively converted — only the top-level + wrapper is adapted. + + :param v2_obj: A parsed object returned by :func:`javaobj.v2.load` or + :func:`javaobj.v2.loads`. + :return: The v3 equivalent object. + :raises JavaObjError: If the type cannot be mapped. + """ + try: + from javaobj.v2.beans import ( + JavaArray as V2Array, + ) + from javaobj.v2.beans import ( + JavaEnum as V2Enum, + ) + from javaobj.v2.beans import ( # type: ignore[import] + JavaInstance as V2Instance, + ) + from javaobj.v2.beans import ( + JavaString as V2String, + ) + except ImportError as exc: + raise JavaObjError("javaobj.v2 is not available; cannot perform v2 → v3 conversion") from exc + + if isinstance(v2_obj, V2String): + return JavaString(handle=v2_obj.handle, value=v2_obj.value) + + if isinstance(v2_obj, V2Enum): + cd = _v2_classdesc_to_v3(v2_obj.classdesc) + constant = JavaString(handle=v2_obj.constant.handle, value=v2_obj.constant.value) + return JavaEnum(handle=v2_obj.handle, classdesc=cd, constant=constant) + + if isinstance(v2_obj, V2Array): + cd = _v2_classdesc_to_v3(v2_obj.classdesc) + data: bytes | list[Any] = ( + bytes(v2_obj.data) + if v2_obj.field_type and v2_obj.field_type.value == FieldType.BYTE.value + else list(v2_obj.data) + ) + return JavaArray( + handle=v2_obj.handle, + classdesc=cd, + element_type=FieldType(v2_obj.field_type.value), + data=data, + ) + + if isinstance(v2_obj, V2Instance): + return _v2_instance_to_v3(v2_obj) + + raise JavaObjError(f"Cannot convert v2 object of type {type(v2_obj).__name__!r} to v3") + + +def _v2_classdesc_to_v3(v2_cd: Any) -> JavaClassDesc: + """Shallow conversion of a v2 JavaClassDesc to a v3 JavaClassDesc.""" + fields = [ + JavaField( + type=FieldType(f.type.value), + name=f.name, + class_name=f.class_name.value if f.class_name else None, + ) + for f in (v2_cd.fields or []) + ] + return JavaClassDesc( + handle=v2_cd.handle, + name=v2_cd.name or "", + serial_version_uid=v2_cd.serial_version_uid, + desc_flags=v2_cd.desc_flags, + fields=fields, + ) + + +def _v2_instance_to_v3(v2_inst: Any) -> JavaInstance: + """Shallow conversion of a v2 JavaInstance to a v3 JavaInstance.""" + cd = _v2_classdesc_to_v3(v2_inst.classdesc) if v2_inst.classdesc else None + + v3_inst = JavaInstance() + v3_inst.handle = v2_inst.handle + v3_inst.classdesc = cd # type: ignore[assignment] + v3_inst.is_exception = getattr(v2_inst, "is_exception", False) + + # Copy field_data with converted keys + # v2 field_data is {JavaClassDesc: {JavaField: value}}, same nesting as v3 + if cd is not None: + v3_field_data: dict[JavaClassDesc, dict[JavaField, Any]] = {} + for v2_cd_key, v2_fields_dict in v2_inst.field_data.items(): + v3_cd_key = _v2_classdesc_to_v3(v2_cd_key) + v3_fields: dict[JavaField, Any] = {} + for v2_f, val in v2_fields_dict.items(): + v3_f = JavaField( + type=FieldType(v2_f.type.value), + name=v2_f.name, + class_name=(v2_f.class_name.value if v2_f.class_name else None), + ) + v3_fields[v3_f] = val + v3_field_data[v3_cd_key] = v3_fields + v3_inst.field_data = v3_field_data + + return v3_inst + + +# ------------------------------------------------------------------------------ +# v1 → v3 adapter +# ------------------------------------------------------------------------------ + + +def v1_to_v3(v1_obj: Any) -> ParsedContent: + """ + Converts a javaobj **v1** top-level object to a v3 equivalent. + + :param v1_obj: A parsed object returned by the top-level + :func:`javaobj.load` / :func:`javaobj.loads` (v1 API). + :return: The v3 equivalent object. + :raises JavaObjError: If the type cannot be mapped. + """ + try: + from javaobj.v1.beans import ( + JavaArray as V1Array, + ) + from javaobj.v1.beans import ( + JavaEnum as V1Enum, + ) + from javaobj.v1.beans import ( # type: ignore[import] + JavaObject, + ) + from javaobj.v1.beans import ( + JavaString as V1String, + ) + except ImportError as exc: + raise JavaObjError("javaobj.v1 is not available; cannot perform v1 → v3 conversion") from exc + + if isinstance(v1_obj, V1String): + return JavaString(handle=0, value=str(v1_obj)) + + if isinstance(v1_obj, V1Enum): + cd = _v1_classdesc_to_v3(v1_obj.classdesc) + constant = JavaString(handle=0, value=str(v1_obj.constant)) + return JavaEnum(handle=0, classdesc=cd, constant=constant) + + if isinstance(v1_obj, V1Array): + return _v1_array_to_v3(v1_obj) + + if isinstance(v1_obj, JavaObject): + return _v1_object_to_v3(v1_obj) + + raise JavaObjError(f"Cannot convert v1 object of type {type(v1_obj).__name__!r} to v3") + + +def _v1_classdesc_to_v3(v1_cd: Any) -> JavaClassDesc: + """Shallow conversion of a v1 JavaClass to a v3 JavaClassDesc.""" + fields = [ + JavaField( + # fields_types contains full class descriptors like 'Ljava/lang/String;' + # or single-char primitives like 'B', 'I', etc. The first character + # always encodes the FieldType (e.g. 'L' → OBJECT, 'B' → BYTE). + type=FieldType(ord(str(t)[0])), + name=n, + ) + for n, t in zip( + getattr(v1_cd, "fields_names", []), + getattr(v1_cd, "fields_types", []), + ) + ] + return JavaClassDesc( + handle=0, + name=getattr(v1_cd, "name", "") or "", + serial_version_uid=getattr(v1_cd, "serialVersionUID", 0) or 0, + desc_flags=getattr(v1_cd, "flags", 0) or 0, + fields=fields, + ) + + +def _v1_object_to_v3(v1_obj: Any) -> JavaInstance: + """Shallow conversion of a v1 JavaObject to a v3 JavaInstance.""" + cd = _v1_classdesc_to_v3(v1_obj.classdesc) if v1_obj.classdesc else None + + v3_inst = JavaInstance() + v3_inst.handle = 0 + v3_inst.classdesc = cd # type: ignore[assignment] + return v3_inst + + +def _v1_array_to_v3(v1_arr: Any) -> JavaArray: + """Shallow conversion of a v1 JavaArray to a v3 JavaArray.""" + cd = _v1_classdesc_to_v3(v1_arr.classdesc) if v1_arr.classdesc else None + + raw_data: bytes | list[Any] + if isinstance(v1_arr, (bytes, bytearray)): + raw_data = bytes(v1_arr) + else: + raw_data = list(v1_arr) + + return JavaArray( + handle=0, + classdesc=cd, # type: ignore[arg-type] + element_type=FieldType.OBJECT, + data=raw_data, + ) + + +# ------------------------------------------------------------------------------ +# Convenience mixins for custom transformer classes +# ------------------------------------------------------------------------------ + + +class V2CompatMixin: + """ + Mixin for v3 transformer subclasses that need a v2-style + ``load_from_instance(indent=0)`` signature. + + Usage:: + + class MyTransformer(V2CompatMixin, JavaInstance): + HANDLED_CLASSES = "com.example.MyClass" + + def load_from_instance(self, indent: int = 0) -> bool: + ... + """ + + def load_from_instance(self, indent: int = 0) -> bool: # type: ignore[override] + """v2-compatible hook; delegates to the v3 no-argument version.""" + return self._load_from_instance_v3() + + def _load_from_instance_v3(self) -> bool: + """Override this in subclasses to implement the actual loading.""" + return False + + +class V1CompatMixin: + """ + Mixin that adds a ``classdesc`` shim so that v1-style transformer code + that accesses ``obj.classdesc.name`` works unchanged on v3 instances. + """ + + # No-op: JavaInstance already has a classdesc attribute in v3. + pass diff --git a/javaobj/v3/beans.py b/javaobj/v3/beans.py new file mode 100644 index 0000000..78f8d47 --- /dev/null +++ b/javaobj/v3/beans.py @@ -0,0 +1,550 @@ +#!/usr/bin/env python3 +""" +Definition of the beans used to represent the parsed objects (v3) + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.5.0 +:status: Alpha + +.. + + Copyright 2026 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# Standard library +import warnings +from dataclasses import dataclass, field +from enum import IntEnum +from typing import Any + +# Javaobj +from ..constants import ClassDescFlags, TypeCode + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 5, 0) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + +__all__ = [ + "FieldType", + "ClassDescType", + "ClassDataType", + "JavaField", + "JavaClassDesc", + "JavaInstance", + "JavaArray", + "JavaString", + "JavaEnum", + "JavaClass", + "BlockData", + "ExceptionState", + "ParsedContent", +] + + +# ------------------------------------------------------------------------------ +# Enumerations +# ------------------------------------------------------------------------------ + + +class FieldType(IntEnum): + """ + Java type codes as used in class-descriptor field entries. + + Values match the single-character ASCII type codes defined by the + Java Object Serialization Protocol (e.g. ``B`` → byte, ``I`` → int …). + """ + + BYTE = TypeCode.TYPE_BYTE.value # 'B' – signed byte + CHAR = TypeCode.TYPE_CHAR.value # 'C' – UTF-16 code unit (2 bytes) + DOUBLE = TypeCode.TYPE_DOUBLE.value # 'D' – IEEE-754 double + FLOAT = TypeCode.TYPE_FLOAT.value # 'F' – IEEE-754 float + INTEGER = TypeCode.TYPE_INTEGER.value # 'I' – 32-bit signed int + LONG = TypeCode.TYPE_LONG.value # 'J' – 64-bit signed long + SHORT = TypeCode.TYPE_SHORT.value # 'S' – 16-bit signed short + BOOLEAN = TypeCode.TYPE_BOOLEAN.value # 'Z' – boolean + ARRAY = TypeCode.TYPE_ARRAY.value # '[' – array reference + OBJECT = TypeCode.TYPE_OBJECT.value # 'L' – object reference + + +class ClassDescType(IntEnum): + """Whether a class descriptor represents a normal class or a proxy.""" + + NORMALCLASS = 0 + PROXYCLASS = 1 + + +class ClassDataType(IntEnum): + """ + How an instance's data is laid out in the stream. + + Derived from the ``desc_flags`` byte of its :class:`JavaClassDesc`. + """ + + NOWRCLASS = 0 # SC_SERIALIZABLE, no writeObject + WRCLASS = 1 # SC_SERIALIZABLE + SC_WRITE_METHOD + EXTERNAL_CONTENTS = 2 # SC_EXTERNALIZABLE, no SC_BLOCK_DATA + OBJECT_ANNOTATION = 3 # SC_EXTERNALIZABLE + SC_BLOCK_DATA + + +# ------------------------------------------------------------------------------ +# Field descriptor +# ------------------------------------------------------------------------------ + + +@dataclass(slots=True, eq=False) +class JavaField: + """ + A single field entry in a :class:`JavaClassDesc`. + + Equality and hashing use **object identity** (like plain Python classes) + so that ``JavaField`` instances can be used as dict keys and compared + across the same parsing session. + """ + + type: FieldType + name: str + # For OBJECT / ARRAY fields this holds the binary class name + # (e.g. ``Ljava/lang/String;`` or ``[B``). + class_name: str | None = None + + +# ------------------------------------------------------------------------------ +# Class descriptor +# ------------------------------------------------------------------------------ + + +@dataclass(slots=True, eq=False) +class JavaClassDesc: + """ + Full description of a Java class as parsed from a TC_CLASSDESC or + TC_PROXYCLASSDESC record. + + Equality and hashing use **object identity** so that ``JavaClassDesc`` + instances can be used as dict keys when building ``field_data`` and + ``annotations`` maps. + """ + + handle: int + name: str + serial_version_uid: int + desc_flags: int + class_type: ClassDescType = ClassDescType.NORMALCLASS + fields: list[JavaField] = field(default_factory=list) + super_class: "JavaClassDesc | None" = None + # Interface names (only for proxy classes) + interfaces: list[str] = field(default_factory=list) + # Class annotations (blockdata / objects written by annotateClass) + annotations: list[Any] = field(default_factory=list) + # Enum constant names observed in this stream + enum_constants: set[str] = field(default_factory=set) + # True when this descriptor is a super-class of another descriptor + is_super_class: bool = False + + # ------------------------------------------------------------------ + # v1 / v2 compatibility aliases + # ------------------------------------------------------------------ + + @property + def serialVersionUID(self) -> int: + """Alias for ``serial_version_uid`` (v1/v2 API compatibility).""" + return self.serial_version_uid + + @property + def flags(self) -> int: + """Alias for ``desc_flags`` (v1/v2 API compatibility).""" + return self.desc_flags + + @property + def fields_names(self) -> list[str]: + """Returns the ordered list of field names.""" + return [f.name for f in self.fields] + + @property + def fields_types(self) -> list[FieldType]: + """Returns the ordered list of field types.""" + return [f.type for f in self.fields] + + # ------------------------------------------------------------------ + # Computed properties + # ------------------------------------------------------------------ + + @property + def data_type(self) -> ClassDataType: + """ + Derives the :class:`ClassDataType` from the descriptor flags. + + :raises ValueError: If the flags combination is unsupported. + """ + if ClassDescFlags.SC_SERIALIZABLE & self.desc_flags: + return ( + ClassDataType.WRCLASS + if (ClassDescFlags.SC_WRITE_METHOD & self.desc_flags) + else ClassDataType.NOWRCLASS + ) + if ClassDescFlags.SC_EXTERNALIZABLE & self.desc_flags: + return ( + ClassDataType.OBJECT_ANNOTATION + if (ClassDescFlags.SC_BLOCK_DATA & self.desc_flags) + else ClassDataType.EXTERNAL_CONTENTS + ) + raise ValueError(f"Cannot derive data type from desc_flags 0x{self.desc_flags:02x}") + + def get_hierarchy(self) -> "list[JavaClassDesc]": + """ + Returns the class hierarchy from the topmost ancestor to ``self``, + in the order used by the Java serialization protocol. + """ + classes: list[JavaClassDesc] = [] + if self.super_class is not None: + classes.extend(self.super_class.get_hierarchy()) + classes.append(self) + return classes + + def validate(self) -> None: + """ + Checks that the descriptor is internally consistent. + + :raises ValueError: If the descriptor is malformed. + """ + serial_or_extern = ClassDescFlags.SC_SERIALIZABLE | ClassDescFlags.SC_EXTERNALIZABLE + if (self.desc_flags & serial_or_extern) == 0 and self.fields: + raise ValueError("Non-serializable, non-externalizable class has fields") + if (self.desc_flags & serial_or_extern) == serial_or_extern: + raise ValueError("Class is both serializable and externalizable") + if self.desc_flags & ClassDescFlags.SC_ENUM: + if self.fields or self.interfaces: + raise ValueError("Enum class must not have non-constant fields or interfaces") + else: + if self.enum_constants: + raise ValueError("Non-enum class must not have enum constants") + + def __str__(self) -> str: + return f"[classdesc 0x{self.handle:x}: name={self.name!r}, uid={self.serial_version_uid}]" + + __repr__ = __str__ + + +# ------------------------------------------------------------------------------ +# Instance +# ------------------------------------------------------------------------------ + + +@dataclass +class JavaInstance: + """ + A deserialized Java object instance (TC_OBJECT). + + ``field_data`` maps each :class:`JavaClassDesc` in the class hierarchy to + a ``{JavaField: value}`` dict. ``annotations`` maps each class descriptor + to the list of :data:`ParsedContent` items written by ``writeObject``. + + .. note:: + This class intentionally does **not** use ``slots=True`` so that + transformer subclasses can use multiple inheritance with built-in + types such as :class:`list`, :class:`dict`, or :class:`set`. + All fields have defaults so that ``JavaInstance()`` can be called + with no arguments during construction (the parser sets them after). + """ + + handle: int = 0 + classdesc: JavaClassDesc | None = None # set by the parser after creation + field_data: dict[JavaClassDesc, dict[JavaField, Any]] = field(default_factory=dict) + annotations: dict[JavaClassDesc, list[Any]] = field(default_factory=dict) + is_exception: bool = False + + # ------------------------------------------------------------------ + # Field access helpers + # ------------------------------------------------------------------ + + def get_field( + self, + name: str, + class_desc: JavaClassDesc | None = None, + ) -> Any: + """ + Returns the value of a field by name. + + If *class_desc* is provided the search is restricted to that class, + which avoids the ambiguity that can arise when two classes in the + hierarchy declare a field with the same name. + + :raises AttributeError: If the field is not found. + """ + search = {class_desc: self.field_data[class_desc]} if class_desc is not None else self.field_data + for cd_fields in search.values(): + for f, v in cd_fields.items(): + if f.name == name: + return v + raise AttributeError(name) + + def __getattr__(self, name: str) -> Any: + """ + Flat attribute access to instance fields (v1/v2 API compatibility). + + When multiple classes in the hierarchy define a field with the same + name, a :class:`UserWarning` is emitted and the first match is + returned. Use :meth:`get_field` with an explicit *class_desc* to + resolve ambiguity. + """ + # Note: __getattr__ is only called when normal attribute lookup fails, + # so there is no risk of infinite recursion here. + matches: list[Any] = [ + v for cd_fields in self.field_data.values() for f, v in cd_fields.items() if f.name == name + ] + if len(matches) == 1: + return matches[0] + if len(matches) > 1: + warnings.warn( + f"Ambiguous field '{name}': found in {len(matches)} classes " + "in the hierarchy. Use get_field(name, class_desc) for " + "unambiguous access.", + stacklevel=2, + ) + return matches[0] + raise AttributeError(name) + + def get_class(self) -> JavaClassDesc | None: + """Returns the class descriptor of this instance.""" + return self.classdesc + + def load_from_instance(self) -> bool: + """ + Post-processing hook called after parsing. + + Transformer subclasses can override this to convert parsed field data + into a more convenient Python representation. + + :return: ``True`` if post-processing succeeded, ``False`` otherwise. + """ + return False + + def load_from_blockdata(self, parser: Any, reader: Any) -> bool: + """ + Hook for ``SC_EXTERNALIZABLE + SC_BLOCK_DATA`` classes. + + Transformer subclasses should override this to decode the raw block + data written by the Java ``writeExternal`` method. + + :return: ``True`` if decoding succeeded, ``False`` otherwise. + """ + return False + + def __str__(self) -> str: + name = self.classdesc.name if self.classdesc else "" + return f"[instance 0x{self.handle:x}: type={name!r}]" + + __repr__ = __str__ + + +# ------------------------------------------------------------------------------ +# Array +# ------------------------------------------------------------------------------ + + +@dataclass(slots=True) +class JavaArray: + """ + A deserialized Java array (TC_ARRAY). + + For ``TYPE_BYTE`` arrays ``data`` holds a :class:`bytes` object. + For all other element types ``data`` is a :class:`list`. + """ + + handle: int + classdesc: JavaClassDesc + element_type: FieldType + data: bytes | list[Any] + + def __len__(self) -> int: + return len(self.data) + + def __iter__(self): + return iter(self.data) + + def __getitem__(self, idx: int) -> Any: + return self.data[idx] # type: ignore[index] + + def __str__(self) -> str: + return f"[array 0x{self.handle:x}: type={self.element_type.name}, len={len(self.data)}]" + + __repr__ = __str__ + + +# ------------------------------------------------------------------------------ +# String +# ------------------------------------------------------------------------------ + + +@dataclass(slots=True) +class JavaString: + """A Java string decoded from TC_STRING or TC_LONGSTRING.""" + + handle: int + value: str + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return repr(self.value) + + def __hash__(self) -> int: + return hash(self.value) + + def __eq__(self, other: object) -> bool: + if isinstance(other, JavaString): + return self.value == other.value + if isinstance(other, str): + return self.value == other + return NotImplemented + + +# ------------------------------------------------------------------------------ +# Enum +# ------------------------------------------------------------------------------ + + +@dataclass(slots=True) +class JavaEnum: + """A Java enum constant (TC_ENUM).""" + + handle: int + classdesc: JavaClassDesc + constant: JavaString + + @property + def name(self) -> str: + """The binary class name of the enum type.""" + return self.classdesc.name + + def __str__(self) -> str: + return f"[enum {self.classdesc.name}.{self.constant.value}]" + + __repr__ = __str__ + + def __hash__(self) -> int: + return hash((self.classdesc.name, self.constant.value)) + + def __eq__(self, other: object) -> bool: + if isinstance(other, JavaEnum): + return self.classdesc.name == other.classdesc.name and self.constant.value == other.constant.value + return NotImplemented + + +# ------------------------------------------------------------------------------ +# Class reference +# ------------------------------------------------------------------------------ + + +@dataclass(slots=True) +class JavaClass: + """Represents a ``java.lang.Class`` token (TC_CLASS).""" + + handle: int + classdesc: JavaClassDesc + + @property + def name(self) -> str: + """The binary name of the represented class.""" + return self.classdesc.name + + def __str__(self) -> str: + return f"[class {self.classdesc.name!r}]" + + __repr__ = __str__ + + +# ------------------------------------------------------------------------------ +# Block data +# ------------------------------------------------------------------------------ + + +@dataclass(slots=True, eq=False) +class BlockData: + """Raw bytes from a TC_BLOCKDATA / TC_BLOCKDATALONG record.""" + + data: bytes + handle: int = 0 + + def __eq__(self, other: object) -> bool: + """ + Compares block data with other ``BlockData`` instances or with + ``bytes`` / ``str`` directly (v1/v2 API compatibility). + """ + if isinstance(other, BlockData): + return self.data == other.data + if isinstance(other, (bytes, bytearray)): + return self.data == bytes(other) + if isinstance(other, str): + return self.data == other.encode("latin-1") + return NotImplemented + + def __hash__(self) -> int: + return hash(self.data) + + def __str__(self) -> str: + return f"[blockdata len={len(self.data)}]" + + __repr__ = __str__ + + +# ------------------------------------------------------------------------------ +# Exception state +# ------------------------------------------------------------------------------ + + +@dataclass(slots=True) +class ExceptionState: + """ + Wrapper produced when a TC_EXCEPTION record is encountered. + + The ``exception_object`` holds the parsed Java exception instance and + ``stream_data`` preserves the raw bytes for diagnostic purposes. + """ + + exception_object: JavaInstance + stream_data: bytes + handle: int = 0 + is_exception: bool = True + + def __str__(self) -> str: + return f"[ExceptionState 0x{self.handle:x}]" + + __repr__ = __str__ + + +# ------------------------------------------------------------------------------ +# Union type alias +# ------------------------------------------------------------------------------ + +type ParsedContent = ( + JavaInstance + | JavaArray + | JavaString + | JavaEnum + | JavaClass + | JavaClassDesc + | BlockData + | ExceptionState + | None +) diff --git a/javaobj/v3/exceptions.py b/javaobj/v3/exceptions.py new file mode 100644 index 0000000..cf61ff6 --- /dev/null +++ b/javaobj/v3/exceptions.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +Exception hierarchy for javaobj v3. + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.5.0 +:status: Alpha + +.. + + Copyright 2026 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 5, 0) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + +__all__ = [ + "JavaObjError", + "ParseError", + "UnexpectedOpcodeError", + "UnsupportedFeatureError", + "SecurityError", +] + + +class JavaObjError(Exception): + """Base exception for all javaobj v3 errors.""" + + +class ParseError(JavaObjError): + """Raised when the stream cannot be decoded according to the protocol.""" + + def __init__(self, message: str, offset: int = -1) -> None: + """ + :param message: Human-readable description of the problem. + :param offset: Byte offset in the stream where the error occurred, + or -1 if unknown. + """ + super().__init__(message) + self.offset = offset + + def __str__(self) -> str: + base = super().__str__() + if self.offset >= 0: + return f"{base} (at stream offset 0x{self.offset:x})" + return base + + +class UnexpectedOpcodeError(ParseError): + """ + Raised when an opcode byte is not among the set of expected values. + + Attributes: + expected: Tuple of acceptable opcode values. + got: The opcode that was actually read. + """ + + def __init__( + self, + expected: tuple[int, ...], + got: int, + offset: int = -1, + ) -> None: + expected_hex = [f"0x{e:02x}" for e in expected] + super().__init__( + f"Expected one of {expected_hex}, got 0x{got:02x}", + offset, + ) + self.expected = expected + self.got = got + + +class UnsupportedFeatureError(JavaObjError): + """Raised when the stream uses a feature not yet implemented in v3.""" + + +class SecurityError(JavaObjError): + """ + Raised when a configurable safety limit is exceeded. + + This guards against malicious streams that declare huge arrays, deeply + nested object graphs, or extremely long strings to exhaust memory or + the call stack. + """ diff --git a/javaobj/v3/parser.py b/javaobj/v3/parser.py new file mode 100644 index 0000000..534eb45 --- /dev/null +++ b/javaobj/v3/parser.py @@ -0,0 +1,804 @@ +#!/usr/bin/env python3 +""" +Parser for the Java Object Serialization stream format (v3) + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.5.0 +:status: Alpha + +.. + + Copyright 2026 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# Standard library +import logging +from typing import IO, Any + +# Javaobj +from ..constants import ( + ClassDescFlags, + StreamConstants, + TerminalCode, + TypeCode, +) +from .beans import ( + BlockData, + ClassDataType, + ClassDescType, + ExceptionState, + FieldType, + JavaArray, + JavaClass, + JavaClassDesc, + JavaEnum, + JavaField, + JavaInstance, + JavaString, + ParsedContent, +) +from .exceptions import ( + ParseError, + SecurityError, + UnexpectedOpcodeError, + UnsupportedFeatureError, +) +from .reader import DataReader +from .transformers import DefaultObjectTransformer, ObjectTransformer + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 5, 0) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + +__all__ = ["JavaStreamParser"] + +_log = logging.getLogger("javaobj.v3.parser") + + +class _ExceptionRead(Exception): + """Internal signal: a TC_EXCEPTION object was parsed and should propagate.""" + + def __init__(self, content: JavaInstance) -> None: + self.exception_object = content + + +class JavaStreamParser: + """ + Stateful parser for the Java Object Serialization stream format. + + Usage:: + + parser = JavaStreamParser(fd, transformers) + contents = parser.run() + + Parameters + ---------- + fd: + A readable binary file-like object positioned at the start of a Java + serialized stream (magic ``0xACED``). + transformers: + Ordered list of :class:`~javaobj.v3.transformers.ObjectTransformer` + instances. Transformers are tried in order; the first one that + returns a non-``None`` result wins. + max_array_size: + Maximum number of bytes allocatable by a single array or bulk-read + operation. Raises :class:`~javaobj.v3.exceptions.SecurityError` on + breach. + max_depth: + Maximum recursion depth of the parse tree. Raises + :class:`~javaobj.v3.exceptions.SecurityError` on breach. + """ + + def __init__( + self, + fd: IO[bytes], + transformers: list[ObjectTransformer], + *, + max_array_size: int = DataReader.DEFAULT_MAX_ARRAY_SIZE, + max_depth: int = DataReader.DEFAULT_MAX_DEPTH, + ) -> None: + self._fd = fd + self._reader = DataReader( + fd, + max_array_size=max_array_size, + max_string_size=max_array_size, + ) + self._transformers = list(transformers) + self._max_depth = max_depth + + # Handle table: maps handle int → ParsedContent + self._handles: dict[int, ParsedContent] = {} + # Saved handle snapshots from TC_RESET events + self._handle_maps: list[dict[int, ParsedContent]] = [] + self._current_handle = int(StreamConstants.BASE_REFERENCE_IDX) + + # Current recursion depth + self._depth = 0 + + # ------------------------------------------------------------------ + # Public entry point + # ------------------------------------------------------------------ + + def run(self) -> list[ParsedContent]: + """ + Parses the stream and returns a list of top-level content objects. + + :raises ParseError: On malformed stream data. + :raises SecurityError: If a configured limit is breached. + :raises UnsupportedFeatureError: If an unimplemented feature is used. + """ + magic = self._reader.read_ushort() + if magic != StreamConstants.STREAM_MAGIC: + raise ParseError(f"Invalid stream magic: 0x{magic:04x} (expected 0xACED)", 0) + version = self._reader.read_ushort() + if version != StreamConstants.STREAM_VERSION: + raise ParseError( + f"Unsupported stream version: 0x{version:04x} " + f"(expected 0x{int(StreamConstants.STREAM_VERSION):04x})", + 2, + ) + + self._reset() + contents: list[ParsedContent] = [] + + while True: + try: + opcode = self._reader.read_ubyte() + except EOFError: + break + + if opcode == TerminalCode.TC_RESET: + self._reset() + continue + + start_offset = self._reader.offset - 1 + item = self._read_content(opcode, block_data_allowed=True) + + if isinstance(item, JavaInstance) and item.is_exception: + # Wrap exception instances together with their raw bytes. + end_offset = self._reader.offset + self._fd.seek(start_offset) + raw = self._fd.read(end_offset - start_offset) + item = ExceptionState( + exception_object=item, + stream_data=raw, + handle=item.handle, + ) + + contents.append(item) + + if self._handles: + self._handle_maps.append(dict(self._handles)) + + return contents + + # ------------------------------------------------------------------ + # Internal state management + # ------------------------------------------------------------------ + + def _reset(self) -> None: + """Saves the current handle map and starts a fresh one (TC_RESET).""" + if self._handles: + self._handle_maps.append(dict(self._handles)) + self._handles.clear() + self._current_handle = int(StreamConstants.BASE_REFERENCE_IDX) + + def _new_handle(self) -> int: + """Allocates and returns the next handle value.""" + h = self._current_handle + self._current_handle += 1 + return h + + def _set_handle(self, handle: int, obj: ParsedContent) -> None: + """Stores *obj* under *handle* in the current handle table.""" + if handle in self._handles: + raise ParseError(f"Duplicate handle 0x{handle:x}", self._reader.offset) + self._handles[handle] = obj + + # ------------------------------------------------------------------ + # Content dispatcher + # ------------------------------------------------------------------ + + def _read_content( + self, + opcode: int, + *, + block_data_allowed: bool, + class_desc: JavaClassDesc | None = None, + ) -> ParsedContent: + """ + Dispatches *opcode* to the appropriate ``_do_*`` method. + + :param opcode: The TC_* byte already read from the stream. + :param block_data_allowed: Whether TC_BLOCKDATA records are valid here. + :param class_desc: Optional context class (for WRCLASS custom reading). + :raises SecurityError: If the maximum recursion depth is exceeded. + :raises ParseError: On unrecognised opcode. + """ + self._depth += 1 + if self._depth > self._max_depth: + raise SecurityError( + f"Maximum parse depth ({self._max_depth}) exceeded at offset 0x{self._reader.offset:x}" + ) + try: + match opcode: + case TerminalCode.TC_NULL: + return None + case TerminalCode.TC_OBJECT: + return self._do_object() + case TerminalCode.TC_CLASS: + return self._do_class() + case TerminalCode.TC_ARRAY: + return self._do_array() + case (TerminalCode.TC_STRING | TerminalCode.TC_LONGSTRING) as str_code: + return self._do_string(str_code) + case TerminalCode.TC_ENUM: + return self._do_enum() + case (TerminalCode.TC_CLASSDESC | TerminalCode.TC_PROXYCLASSDESC) as cd_code: + return self._do_classdesc(cd_code) + case TerminalCode.TC_REFERENCE: + return self._do_reference() + case TerminalCode.TC_EXCEPTION: + return self._do_exception() + case (TerminalCode.TC_BLOCKDATA | TerminalCode.TC_BLOCKDATALONG) as bd_code: + if not block_data_allowed: + raise ParseError( + "Unexpected TC_BLOCKDATA where not allowed", + self._reader.offset, + ) + return self._do_block_data(bd_code) + case _: + # Last resort: check whether a transformer can handle + # a custom writeObject for the active class_desc. + if ( + class_desc is not None + and class_desc.name + and class_desc.data_type == ClassDataType.WRCLASS + ): + # Rewind one byte so the transformer sees the opcode. + self._fd.seek(-1, 1) + result = self._custom_read_object(class_desc.name) + if result is not None: + return result + + raise ParseError( + f"Unknown opcode 0x{opcode:02x}", + self._reader.offset, + ) + finally: + self._depth -= 1 + + # ------------------------------------------------------------------ + # TC_OBJECT + # ------------------------------------------------------------------ + + def _do_object(self) -> JavaInstance: + """Parses a TC_OBJECT record and returns a :class:`JavaInstance`.""" + classdesc = self._read_classdesc() + + handle = self._new_handle() + _log.debug("TC_OBJECT handle=0x%x class=%s", handle, classdesc) + + instance = self._create_instance(classdesc) + instance.classdesc = classdesc + instance.handle = handle + + self._set_handle(handle, instance) + self._read_class_data(instance) + instance.load_from_instance() + + _log.debug("Done reading object handle=0x%x", handle) + return instance + + def _create_instance(self, classdesc: JavaClassDesc | None) -> JavaInstance: + """ + Tries each transformer in order; falls back to plain JavaInstance. + """ + if classdesc is not None and classdesc.name: + for t in self._transformers: + inst = t.create_instance(classdesc) + if inst is not None: + return inst + return JavaInstance() + + # ------------------------------------------------------------------ + # TC_CLASS + # ------------------------------------------------------------------ + + def _do_class(self) -> JavaClass: + """Parses a TC_CLASS record.""" + classdesc = self._read_classdesc() + if classdesc is None: + raise ParseError("TC_CLASS requires a non-null class descriptor", self._reader.offset) + handle = self._new_handle() + obj = JavaClass(handle=handle, classdesc=classdesc) + self._set_handle(handle, obj) + return obj + + # ------------------------------------------------------------------ + # TC_ARRAY + # ------------------------------------------------------------------ + + def _do_array(self) -> JavaArray: + """Parses a TC_ARRAY record.""" + classdesc = self._read_classdesc() + if classdesc is None: + raise ParseError("TC_ARRAY requires a non-null class descriptor", self._reader.offset) + handle = self._new_handle() + + name = classdesc.name or "" + if len(name) < 2: + raise ParseError( + f"Array class desc has invalid name {name!r}", + self._reader.offset, + ) + + # The second character of the class name encodes the element type. + element_type_byte = ord(name[1].encode("latin-1")) + try: + element_type = FieldType(element_type_byte) + except ValueError: + raise ParseError( + f"Unknown array element type byte 0x{element_type_byte:02x}", + self._reader.offset, + ) + + size = self._reader.read_int() + if size < 0: + raise ParseError(f"Invalid array size {size}", self._reader.offset) + + # Try transformers first (e.g. NumpyArrayTransformer) + type_code = TypeCode(element_type_byte) + data: bytes | list[Any] | None = None + for t in self._transformers: + data = t.load_array(self._reader, type_code, size) + if data is not None: + break + + if data is None: + if element_type == FieldType.BYTE: + # Efficient bulk read for byte arrays + data = self._reader.read_bytes(size) + else: + data = [self._read_field_value(element_type) for _ in range(size)] + + array = JavaArray( + handle=handle, + classdesc=classdesc, + element_type=element_type, + data=data, + ) + self._set_handle(handle, array) + return array + + # ------------------------------------------------------------------ + # TC_STRING / TC_LONGSTRING + # ------------------------------------------------------------------ + + def _do_string(self, opcode: int) -> JavaString: + """Parses a TC_STRING or TC_LONGSTRING record.""" + handle = self._new_handle() + + if opcode == TerminalCode.TC_STRING: + value = self._reader.read_utf() + elif opcode == TerminalCode.TC_LONGSTRING: + value = self._reader.read_long_utf() + else: + raise ParseError( + f"Expected TC_STRING or TC_LONGSTRING, got 0x{opcode:02x}", + self._reader.offset, + ) + + java_str = JavaString(handle=handle, value=value) + self._set_handle(handle, java_str) + return java_str + + # ------------------------------------------------------------------ + # Helper: read a string that may be TC_STRING, TC_LONGSTRING or TC_REFERENCE + # ------------------------------------------------------------------ + + def _read_new_string(self, opcode: int) -> JavaString: + """ + Reads a string-valued token; handles TC_REFERENCE to an earlier string. + """ + if opcode == TerminalCode.TC_REFERENCE: + prev = self._do_reference() + if not isinstance(prev, JavaString): + raise ParseError( + "TC_REFERENCE in string context does not point to a string", + self._reader.offset, + ) + return prev + return self._do_string(opcode) + + # ------------------------------------------------------------------ + # TC_ENUM + # ------------------------------------------------------------------ + + def _do_enum(self) -> JavaEnum: + """Parses a TC_ENUM record.""" + classdesc = self._read_classdesc() + if classdesc is None: + raise ParseError("TC_ENUM has null class descriptor", self._reader.offset) + + handle = self._new_handle() + + str_opcode = self._reader.read_ubyte() + constant = self._read_new_string(str_opcode) + if classdesc.name: + classdesc.enum_constants.add(constant.value) + + enum_obj = JavaEnum(handle=handle, classdesc=classdesc, constant=constant) + self._set_handle(handle, enum_obj) + return enum_obj + + # ------------------------------------------------------------------ + # TC_CLASSDESC / TC_PROXYCLASSDESC + # ------------------------------------------------------------------ + + def _read_classdesc(self) -> JavaClassDesc | None: + """ + Reads a type-code byte then delegates to :meth:`_do_classdesc`. + Returns ``None`` for TC_NULL. + """ + opcode = self._reader.read_ubyte() + return self._do_classdesc(opcode) + + def _do_classdesc(self, opcode: int) -> JavaClassDesc | None: + """Parses a class descriptor record identified by *opcode*.""" + if opcode == TerminalCode.TC_NULL: + return None + + if opcode == TerminalCode.TC_REFERENCE: + prev = self._do_reference() + if not isinstance(prev, JavaClassDesc): + raise ParseError( + "TC_REFERENCE in classdesc context does not point to a class descriptor", + self._reader.offset, + ) + return prev + + if opcode == TerminalCode.TC_CLASSDESC: + return self._read_normal_classdesc() + + if opcode == TerminalCode.TC_PROXYCLASSDESC: + return self._read_proxy_classdesc() + + raise UnexpectedOpcodeError( + ( + TerminalCode.TC_NULL, + TerminalCode.TC_REFERENCE, + TerminalCode.TC_CLASSDESC, + TerminalCode.TC_PROXYCLASSDESC, + ), + opcode, + self._reader.offset, + ) + + def _read_normal_classdesc(self) -> JavaClassDesc: + """Parses a TC_CLASSDESC record (after the opcode byte).""" + name = self._reader.read_utf() + serial_version_uid = self._reader.read_long() + handle = self._new_handle() + desc_flags = self._reader.read_ubyte() + + nb_fields = self._reader.read_short() + if nb_fields < 0: + raise ParseError(f"Invalid field count {nb_fields}", self._reader.offset) + + fields: list[JavaField] = [] + for _ in range(nb_fields): + field_type_byte = self._reader.read_ubyte() + try: + field_type = FieldType(field_type_byte) + except ValueError: + raise ParseError( + f"Unknown field type byte 0x{field_type_byte:02x}", + self._reader.offset, + ) + field_name = self._reader.read_utf() + class_name: str | None = None + + if field_type_byte in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY): + str_opcode = self._reader.read_ubyte() + class_name_str = self._read_new_string(str_opcode) + class_name = class_name_str.value + + fields.append(JavaField(type=field_type, name=field_name, class_name=class_name)) + + classdesc = JavaClassDesc( + handle=handle, + name=name, + serial_version_uid=serial_version_uid, + desc_flags=desc_flags, + class_type=ClassDescType.NORMALCLASS, + fields=fields, + ) + self._set_handle(handle, classdesc) + + classdesc.annotations = self._read_class_annotations() + classdesc.super_class = self._read_classdesc() + if classdesc.super_class is not None: + classdesc.super_class.is_super_class = True + + return classdesc + + def _read_proxy_classdesc(self) -> JavaClassDesc: + """Parses a TC_PROXYCLASSDESC record (after the opcode byte).""" + handle = self._new_handle() + nb_interfaces = self._reader.read_int() + interfaces = [self._reader.read_utf() for _ in range(nb_interfaces)] + + # Proxy classes are treated as Serializable with SC_SERIALIZABLE flag. + desc_flags = ClassDescFlags.SC_SERIALIZABLE | ClassDescFlags.SC_WRITE_METHOD + classdesc = JavaClassDesc( + handle=handle, + name="", + serial_version_uid=0, + desc_flags=int(desc_flags), + class_type=ClassDescType.PROXYCLASS, + interfaces=interfaces, + ) + self._set_handle(handle, classdesc) + + classdesc.annotations = self._read_class_annotations() + classdesc.super_class = self._read_classdesc() + if classdesc.super_class is not None: + classdesc.super_class.is_super_class = True + + return classdesc + + # ------------------------------------------------------------------ + # TC_REFERENCE + # ------------------------------------------------------------------ + + def _do_reference(self) -> ParsedContent: + """Resolves a TC_REFERENCE to a previously parsed object.""" + handle = self._reader.read_int() + try: + return self._handles[handle] + except KeyError: + raise ParseError(f"Invalid handle 0x{handle:x}", self._reader.offset) + + # ------------------------------------------------------------------ + # TC_EXCEPTION + # ------------------------------------------------------------------ + + def _do_exception(self) -> JavaInstance: + """ + Reads a TC_EXCEPTION record. + + The stream resets its state, parses one object (the exception), then + resets again. + """ + self._reset() + opcode = self._reader.read_ubyte() + if opcode == TerminalCode.TC_RESET: + raise ParseError( + "TC_RESET encountered while reading a TC_EXCEPTION", + self._reader.offset, + ) + + content = self._read_content(opcode, block_data_allowed=False) + if content is None: + raise ParseError("TC_EXCEPTION contains a null object", self._reader.offset) + if not isinstance(content, JavaInstance): + raise ParseError("TC_EXCEPTION object is not a Java instance", self._reader.offset) + content.is_exception = True + self._reset() + return content + + # ------------------------------------------------------------------ + # TC_BLOCKDATA / TC_BLOCKDATALONG + # ------------------------------------------------------------------ + + def _do_block_data(self, opcode: int) -> BlockData: + """Reads a TC_BLOCKDATA or TC_BLOCKDATALONG record.""" + if opcode == TerminalCode.TC_BLOCKDATA: + size = self._reader.read_ubyte() + elif opcode == TerminalCode.TC_BLOCKDATALONG: + size = self._reader.read_int() + else: + raise ParseError( + f"Expected block-data opcode, got 0x{opcode:02x}", + self._reader.offset, + ) + + if size < 0: + raise ParseError(f"Invalid block data size {size}", self._reader.offset) + + data = self._reader.read_bytes(size) + return BlockData(data=data) + + # ------------------------------------------------------------------ + # Class annotations (written by writeObject / annotateClass) + # ------------------------------------------------------------------ + + def _read_class_annotations(self, class_desc: JavaClassDesc | None = None) -> list[ParsedContent]: + """ + Reads annotation objects until TC_ENDBLOCKDATA is encountered. + + :param class_desc: Optional context used for WRCLASS custom readers. + :return: List of annotation content items (may be empty). + """ + items: list[ParsedContent] = [] + while True: + opcode = self._reader.read_ubyte() + + if opcode == TerminalCode.TC_ENDBLOCKDATA: + return items + + if opcode == TerminalCode.TC_RESET: + self._reset() + continue + + try: + item = self._read_content( + opcode, + block_data_allowed=True, + class_desc=class_desc, + ) + except _ExceptionRead as exc: + raise _ExceptionRead(exc.exception_object) from None + + if isinstance(item, JavaInstance) and item.is_exception: + raise _ExceptionRead(item) + + items.append(item) + + # ------------------------------------------------------------------ + # Instance data (classdata) + # ------------------------------------------------------------------ + + def _read_class_data(self, instance: JavaInstance) -> None: + """ + Reads all field data and annotations for *instance* according to + its class hierarchy. + """ + if instance.classdesc is None: + return + + hierarchy = instance.classdesc.get_hierarchy() + field_data: dict[JavaClassDesc, dict[JavaField, Any]] = {} + annotations: dict[JavaClassDesc, list[ParsedContent]] = {} + + for cd in hierarchy: + values: dict[JavaField, Any] = {} + + try: + data_type = cd.data_type + except ValueError: + # Skip class descs with no serializable/externalizable flags + # (e.g. proxy classes that appear in super-class chains). + continue + + match data_type: + case ClassDataType.NOWRCLASS: + for f in cd.fields: + values[f] = self._read_field_value(f.type) + field_data[cd] = values + + case ClassDataType.WRCLASS: + # Read the default serializable fields first … + for f in cd.fields: + values[f] = self._read_field_value(f.type) + field_data[cd] = values + # … then read the custom writeObject annotation block. + # load_from_instance() on the JavaInstance (or transformer + # subclass) will process these annotations afterwards. + annotations[cd] = self._read_class_annotations(cd) + + case ClassDataType.OBJECT_ANNOTATION: + # SC_EXTERNALIZABLE + SC_BLOCK_DATA + if not instance.load_from_blockdata(self, self._reader): + raise ParseError( + f"Externalizable class {cd.name!r} with " + "SC_BLOCK_DATA cannot be parsed: no transformer " + "handled load_from_blockdata()", + self._reader.offset, + ) + annotations[cd] = self._read_class_annotations(cd) + + case ClassDataType.EXTERNAL_CONTENTS: + # SC_EXTERNALIZABLE without SC_BLOCK_DATA (Protocol v1). + raise UnsupportedFeatureError( + f"SC_EXTERNALIZABLE without SC_BLOCK_DATA " + f"(Protocol v1) is not supported for class " + f"{cd.name!r}. " + "This stream was likely produced with an old JDK." + ) + + instance.field_data = field_data + instance.annotations = annotations + + def _is_default_supported(self, class_name: str) -> bool: + """ + Returns ``True`` when the :class:`DefaultObjectTransformer` (if + present) recognises *class_name*. + """ + for t in self._transformers: + if isinstance(t, DefaultObjectTransformer): + return t.handles(class_name) + return False + + # ------------------------------------------------------------------ + # Field value reader + # ------------------------------------------------------------------ + + def _read_field_value(self, field_type: FieldType) -> Any: + """Reads and returns a single field value of the given type.""" + match field_type: + case FieldType.BYTE: + return self._reader.read_byte() + case FieldType.CHAR: + return self._reader.read_char() + case FieldType.DOUBLE: + return self._reader.read_double() + case FieldType.FLOAT: + return self._reader.read_float() + case FieldType.INTEGER: + return self._reader.read_int() + case FieldType.LONG: + return self._reader.read_long() + case FieldType.SHORT: + return self._reader.read_short() + case FieldType.BOOLEAN: + return self._reader.read_bool() + case FieldType.OBJECT | FieldType.ARRAY as obj_type: + sub_opcode = self._reader.read_ubyte() + + if obj_type == FieldType.ARRAY: + if sub_opcode == TerminalCode.TC_NULL: + return None + if sub_opcode == TerminalCode.TC_REFERENCE: + return self._do_reference() + if sub_opcode != TerminalCode.TC_ARRAY: + raise ParseError( + f"Expected TC_ARRAY for array field, got 0x{sub_opcode:02x}", + self._reader.offset, + ) + + content = self._read_content(sub_opcode, block_data_allowed=False) + if isinstance(content, JavaInstance) and content.is_exception: + raise _ExceptionRead(content) + return content + + raise ParseError( + f"Cannot read field of unknown type {field_type!r}", + self._reader.offset, + ) + + # ------------------------------------------------------------------ + # Custom writeObject dispatcher + # ------------------------------------------------------------------ + + def _custom_read_object(self, class_name: str) -> Any | None: + """ + Tries each transformer's ``load_custom_writeObject`` for *class_name*. + Returns ``None`` if no transformer handles it. + """ + for t in self._transformers: + result = t.load_custom_writeObject(self, self._reader, class_name) + if result is not None: + return result + return None diff --git a/javaobj/v3/reader.py b/javaobj/v3/reader.py new file mode 100644 index 0000000..80819ae --- /dev/null +++ b/javaobj/v3/reader.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +""" +Low-level typed binary reader for the Java Object Serialization stream format + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.5.0 +:status: Alpha + +.. + + Copyright 2026 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# Standard library +import struct +from typing import IO + +# Javaobj +from ..modifiedutf8 import decode_modified_utf8 +from .exceptions import ParseError + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 5, 0) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + +__all__ = ["DataReader"] + + +class DataReader: + """ + Typed binary stream reader for the Java Object Serialization protocol. + + The reader tracks the current stream offset so that :class:`ParseError` + messages can always pinpoint the exact location of a problem. + + Safety limits prevent allocation attacks: + + * ``max_array_size`` – maximum number of bytes that a single array or + bulk-read operation may allocate (default 100 MiB). + * ``max_string_size`` – maximum byte length accepted for TC_LONGSTRING + payloads (default 100 MiB). Normal TC_STRING payloads are limited to + 65 535 bytes by the 2-byte length field. + """ + + __slots__ = ("_fd", "_offset", "_max_array_size", "_max_string_size") + + #: Default limit on a single array allocation (100 MiB). + DEFAULT_MAX_ARRAY_SIZE: int = 100 * 1024 * 1024 + + #: Default recursion depth limit for the parser (not enforced here but + #: stored as a convenience constant used by :class:`JavaStreamParser`). + DEFAULT_MAX_DEPTH: int = 500 + + #: Default limit for TC_LONGSTRING payloads (100 MiB). + DEFAULT_MAX_STRING_SIZE: int = 100 * 1024 * 1024 + + def __init__( + self, + fd: IO[bytes], + *, + max_array_size: int = DEFAULT_MAX_ARRAY_SIZE, + max_string_size: int = DEFAULT_MAX_STRING_SIZE, + ) -> None: + """ + :param fd: A readable binary file-like object. + :param max_array_size: Maximum bytes for bulk array reads. + :param max_string_size: Maximum bytes for TC_LONGSTRING payloads. + """ + self._fd = fd + self._offset: int = 0 + self._max_array_size = max_array_size + self._max_string_size = max_string_size + + # ------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------ + + @property + def offset(self) -> int: + """Current byte offset in the stream (read-only).""" + return self._offset + + # ------------------------------------------------------------------ + # Raw I/O + # ------------------------------------------------------------------ + + def read_bytes(self, n: int) -> bytes: + """ + Reads exactly *n* bytes from the stream. + + :raises EOFError: If fewer than *n* bytes are available. + """ + data = self._fd.read(n) + if len(data) != n: + raise EOFError( + f"Unexpected end of stream: expected {n} bytes, got {len(data)} at offset 0x{self._offset:x}" + ) + self._offset += n + return data + + def read_struct(self, fmt: str) -> tuple: + """ + Reads and unpacks a :mod:`struct` format string. + + :param fmt: A struct format string (e.g. ``">i"`` for big-endian int). + :return: The unpacked tuple of values. + """ + size = struct.calcsize(fmt) + data = self.read_bytes(size) + return struct.unpack(fmt, data) + + # ------------------------------------------------------------------ + # Java primitive types + # ------------------------------------------------------------------ + + def read_bool(self) -> bool: + """Reads a Java ``boolean`` (1 byte).""" + return bool(self.read_struct(">B")[0]) + + def read_byte(self) -> int: + """Reads a Java signed ``byte`` (1 byte, -128 … 127).""" + return self.read_struct(">b")[0] + + def read_ubyte(self) -> int: + """Reads an unsigned byte (1 byte, 0 … 255).""" + return self.read_struct(">B")[0] + + def read_short(self) -> int: + """Reads a Java ``short`` (2 bytes, signed).""" + return self.read_struct(">h")[0] + + def read_ushort(self) -> int: + """Reads an unsigned ``short`` (2 bytes).""" + return self.read_struct(">H")[0] + + def read_int(self) -> int: + """Reads a Java ``int`` (4 bytes, signed).""" + return self.read_struct(">i")[0] + + def read_long(self) -> int: + """Reads a Java ``long`` (8 bytes, signed).""" + return self.read_struct(">q")[0] + + def read_float(self) -> float: + """Reads a Java ``float`` (4 bytes, IEEE 754 single-precision).""" + return self.read_struct(">f")[0] + + def read_double(self) -> float: + """Reads a Java ``double`` (8 bytes, IEEE 754 double-precision).""" + return self.read_struct(">d")[0] + + def read_char(self) -> str: + """ + Reads a Java ``char`` (2 bytes, unsigned UTF-16 code unit) and returns + the corresponding Python :class:`str` character. + """ + return chr(self.read_struct(">H")[0]) + + # ------------------------------------------------------------------ + # Java string types (Modified UTF-8) + # ------------------------------------------------------------------ + + def read_utf(self) -> str: + """ + Reads a Java ``UTF`` string: 2-byte unsigned length followed by + Modified UTF-8 encoded bytes. + """ + length = self.read_ushort() + return self._read_mutf8(length) + + def read_long_utf(self) -> str: + """ + Reads a Java long ``UTF`` string: 8-byte signed length followed by + Modified UTF-8 encoded bytes. + + :raises ParseError: If the declared length exceeds ``max_string_size`` + or is negative. + """ + length = self.read_long() + if length < 0 or length > self._max_string_size: + raise ParseError( + f"TC_LONGSTRING: invalid length {length} (limit is {self._max_string_size} bytes)", + self._offset, + ) + return self._read_mutf8(length) + + def _read_mutf8(self, length: int) -> str: + """ + Decodes *length* raw bytes as Modified UTF-8. + + :param length: Number of bytes to read from the stream. + :return: The decoded Python :class:`str`. + :raises ParseError: If the bytes cannot be decoded. + """ + data = self.read_bytes(length) + try: + value, _ = decode_modified_utf8(data) + except UnicodeDecodeError as exc: + raise ParseError( + f"Modified UTF-8 decoding failed: {exc}", + self._offset - length, + ) from exc + return value diff --git a/javaobj/v3/transformers.py b/javaobj/v3/transformers.py new file mode 100644 index 0000000..2d1c6b9 --- /dev/null +++ b/javaobj/v3/transformers.py @@ -0,0 +1,569 @@ +#!/usr/bin/env python3 +""" +Defines the object transformers for javaobj v3 + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.5.0 +:status: Alpha + +.. + + Copyright 2026 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# Standard library +import functools +import struct +from typing import TYPE_CHECKING, Any + +# Numpy (optional) +try: + import numpy # type: ignore[import-untyped] +except ImportError: + numpy = None # type: ignore[assignment] + +# Javaobj +from ..constants import TerminalCode, TypeCode +from .beans import BlockData, JavaClassDesc, JavaInstance +from .reader import DataReader + +if TYPE_CHECKING: + from .parser import JavaStreamParser + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 5, 0) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + +__all__ = [ + "ObjectTransformer", + "DefaultObjectTransformer", + "NumpyArrayTransformer", +] + + +# ------------------------------------------------------------------------------ +# Base transformer interface +# ------------------------------------------------------------------------------ + + +class ObjectTransformer: + """ + Base class for v3 object transformers. + + Override any combination of the three hook methods to customise how + specific Java classes are represented in Python. Returning ``None`` + from any method signals that this transformer does not handle the case + and the next transformer (or the default behaviour) should be tried. + """ + + def create_instance(self, classdesc: JavaClassDesc) -> JavaInstance | None: + """ + Returns a custom :class:`~javaobj.v3.beans.JavaInstance` subclass + for the given class descriptor, or ``None`` to use the default + :class:`~javaobj.v3.beans.JavaInstance`. + + The parser will set ``.handle``, ``.classdesc``, ``.field_data`` + and ``.annotations`` on the returned object after this call. + """ + return None + + def load_array( + self, + reader: DataReader, + type_code: TypeCode, + size: int, + ) -> bytes | list[Any] | None: + """ + Reads and returns the content of a Java array of *size* elements. + + Returns ``None`` to fall back to the default element-by-element + reading logic. + """ + return None + + def load_custom_writeObject( + self, + parser: "JavaStreamParser", + reader: DataReader, + class_name: str, + ) -> Any | None: + """ + Handles the content of a class that uses a custom ``writeObject`` + / ``readExternal`` method unknown to the default transformers. + + Returns ``None`` to indicate that this transformer cannot handle + the class. + """ + return None + + +# ------------------------------------------------------------------------------ +# Collection / primitive transformer classes +# ------------------------------------------------------------------------------ + + +class JavaList(list, JavaInstance): + """Python list backed by a Java ArrayList or LinkedList.""" + + HANDLED_CLASSES: tuple[str, ...] = ( + "java.util.ArrayList", + "java.util.LinkedList", + ) + + def __init__(self) -> None: + list.__init__(self) + JavaInstance.__init__(self) + + def load_from_instance(self) -> bool: + for cd, ann_list in self.annotations.items(): + if cd.name in self.HANDLED_CLASSES: + # The first annotation entry is the capacity int; skip it. + self.extend(a for a in ann_list[1:]) + return True + return False + + +@functools.total_ordering +class JavaPrimitiveClass(JavaInstance): + """ + Base for Java wrapper classes that box a single primitive value + (Boolean, Integer, Long …). + """ + + HANDLED_CLASSES: str | tuple[str, ...] = () + + def __init__(self) -> None: + JavaInstance.__init__(self) + self.value: Any = None + + def __str__(self) -> str: + return str(self.value) + + def __repr__(self) -> str: + return repr(self.value) + + def __hash__(self) -> int: + return hash(self.value) + + def __eq__(self, other: object) -> bool: + return self.value == other # type: ignore[no-any-return] + + def __lt__(self, other: object) -> bool: + return self.value < other # type: ignore[operator] + + def load_from_instance(self) -> bool: + for fields in self.field_data.values(): + for f, v in fields.items(): + if f.name == "value": + self.value = v + return True + return False + + +class JavaBool(JavaPrimitiveClass): + """Represents a Java ``Boolean`` wrapper object.""" + + HANDLED_CLASSES = "java.lang.Boolean" + + def __bool__(self) -> bool: + return bool(self.value) + + +class JavaInt(JavaPrimitiveClass): + """Represents a Java ``Integer`` or ``Long`` wrapper object.""" + + HANDLED_CLASSES = ("java.lang.Integer", "java.lang.Long") + + def __int__(self) -> int: + return int(self.value) + + +class JavaMap(dict, JavaInstance): + """Python dict backed by a Java HashMap or TreeMap.""" + + HANDLED_CLASSES: tuple[str, ...] = ( + "java.util.HashMap", + "java.util.TreeMap", + ) + + def __init__(self) -> None: + dict.__init__(self) + JavaInstance.__init__(self) + + def load_from_instance(self) -> bool: + for cd, ann_list in self.annotations.items(): + if cd.name in self.HANDLED_CLASSES: + # Annotation[0] is load-factor/capacity; skip it. + it = iter(ann_list[1:]) + for key, value in zip(it, it): + self[key] = value + return True + return False + + +class JavaLinkedHashMap(JavaMap): + """Java LinkedHashMap with custom block-data serialization.""" + + HANDLED_CLASSES = ("java.util.LinkedHashMap",) + + def load_from_blockdata(self, parser: "JavaStreamParser", reader: DataReader) -> bool: + # Read HashMap capacity / load-factor fields + self.buckets: int = reader.read_int() + self.size: int = reader.read_int() + + for _ in range(self.size): + key_opcode = reader.read_byte() + key = parser._read_content(key_opcode, block_data_allowed=True) + + val_opcode = reader.read_byte() + value = parser._read_content(val_opcode, block_data_allowed=True) + self[key] = value + + end_code = reader.read_byte() + if end_code != TerminalCode.TC_ENDBLOCKDATA: + raise ValueError(f"Expected TC_ENDBLOCKDATA, got 0x{end_code:02x}") + final_byte = reader.read_byte() + if final_byte != 0: + raise ValueError(f"Expected trailing 0x00, got 0x{final_byte:02x}") + return True + + +class JavaSet(set, JavaInstance): + """Python set backed by a Java HashSet or LinkedHashSet.""" + + HANDLED_CLASSES: tuple[str, ...] = ( + "java.util.HashSet", + "java.util.LinkedHashSet", + ) + + def __init__(self) -> None: + set.__init__(self) + JavaInstance.__init__(self) + + def load_from_instance(self) -> bool: + for cd, ann_list in self.annotations.items(): + if cd.name in self.HANDLED_CLASSES: + # ann_list[0] is load-factor/capacity; skip it. + self.update(a for a in ann_list[1:]) + return True + return False + + +class JavaTreeSet(JavaSet): + """Python set backed by a Java TreeSet.""" + + HANDLED_CLASSES = ("java.util.TreeSet",) + + def load_from_instance(self) -> bool: + for cd, ann_list in self.annotations.items(): + if cd.name in self.HANDLED_CLASSES: + # ann_list[0] is comparator, ann_list[1] is size; skip both. + self.update(a for a in ann_list[2:]) + return True + return False + + +def _read_struct_from_bytes(data: bytes, fmt: str) -> tuple[tuple[Any, ...], bytes]: + """Helper: unpacks *fmt* from the start of *data* and returns remaining.""" + size = struct.calcsize(fmt) + values = struct.unpack(fmt, data[:size]) + return values, data[size:] + + +class JavaTime(JavaInstance): + """ + Represents instances of the ``java.time`` package serialised via the + ``java.time.Ser`` proxy class. + """ + + HANDLED_CLASSES = ("java.time.Ser",) + + DURATION_TYPE = 1 + INSTANT_TYPE = 2 + LOCAL_DATE_TYPE = 3 + LOCAL_TIME_TYPE = 4 + LOCAL_DATE_TIME_TYPE = 5 + ZONE_DATE_TIME_TYPE = 6 + ZONE_REGION_TYPE = 7 + ZONE_OFFSET_TYPE = 8 + OFFSET_TIME_TYPE = 9 + OFFSET_DATE_TIME_TYPE = 10 + YEAR_TYPE = 11 + YEAR_MONTH_TYPE = 12 + MONTH_DAY_TYPE = 13 + PERIOD_TYPE = 14 + + def __init__(self) -> None: + JavaInstance.__init__(self) + self.type: int = -1 + self.year: int | None = None + self.month: int | None = None + self.day: int | None = None + self.hour: int | None = None + self.minute: int | None = None + self.second: int | None = None + self.nano: int | None = None + self.offset: int | None = None + self.zone: str | None = None + + def __str__(self) -> str: + return ( + f"JavaTime(type=0x{self.type:x}, " + f"year={self.year}, month={self.month}, day={self.day}, " + f"hour={self.hour}, minute={self.minute}, second={self.second}, " + f"nano={self.nano}, offset={self.offset}, zone={self.zone})" + ) + + def load_from_blockdata(self, parser: "JavaStreamParser", reader: DataReader) -> bool: + # Block data is handled entirely inside load_from_instance via + # the annotations. Accept the call and let load_from_instance do + # the real work. + return True + + def load_from_instance(self) -> bool: + for cd, ann_list in self.annotations.items(): + if cd.name not in self.HANDLED_CLASSES: + continue + if not ann_list or not isinstance(ann_list[0], BlockData): + return False + + # The raw bytes are stored in the BlockData annotation. + content: bytes = ann_list[0].data + (self.type,), content = _read_struct_from_bytes(content, ">b") + + handlers = { + self.DURATION_TYPE: self._do_duration, + self.INSTANT_TYPE: self._do_instant, + self.LOCAL_DATE_TYPE: self._do_local_date, + self.LOCAL_DATE_TIME_TYPE: self._do_local_date_time, + self.LOCAL_TIME_TYPE: self._do_local_time, + self.ZONE_DATE_TIME_TYPE: self._do_zoned_date_time, + self.ZONE_OFFSET_TYPE: self._do_zone_offset, + self.ZONE_REGION_TYPE: self._do_zone_region, + self.OFFSET_TIME_TYPE: self._do_offset_time, + self.OFFSET_DATE_TIME_TYPE: self._do_offset_date_time, + self.YEAR_TYPE: self._do_year, + self.YEAR_MONTH_TYPE: self._do_year_month, + self.MONTH_DAY_TYPE: self._do_month_day, + self.PERIOD_TYPE: self._do_period, + } + handler = handlers.get(self.type) + if handler is not None: + handler(content) + return True + return False + + # ------------------------------------------------------------------ + # Internal time-type handlers + # ------------------------------------------------------------------ + + def _do_duration(self, data: bytes) -> bytes: + (self.second, self.nano), data = _read_struct_from_bytes(data, ">qi") + return data + + def _do_instant(self, data: bytes) -> bytes: + (self.second, self.nano), data = _read_struct_from_bytes(data, ">qi") + return data + + def _do_local_date(self, data: bytes) -> bytes: + (self.year, self.month, self.day), data = _read_struct_from_bytes(data, ">ibb") + return data + + def _do_local_time(self, data: bytes) -> bytes: + (hour,), data = _read_struct_from_bytes(data, ">b") + minute = second = nano = 0 + + if hour < 0: + hour = ~hour + else: + (minute,), data = _read_struct_from_bytes(data, ">b") + if minute < 0: + minute = ~minute + else: + (second,), data = _read_struct_from_bytes(data, ">b") + if second < 0: + second = ~second + else: + (nano,), data = _read_struct_from_bytes(data, ">i") + + self.hour, self.minute, self.second, self.nano = ( + hour, + minute, + second, + nano, + ) + return data + + def _do_local_date_time(self, data: bytes) -> bytes: + data = self._do_local_date(data) + data = self._do_local_time(data) + return data + + def _do_zoned_date_time(self, data: bytes) -> bytes: + data = self._do_local_date_time(data) + data = self._do_zone_offset(data) + data = self._do_zone_region(data) + return data + + def _do_zone_offset(self, data: bytes) -> bytes: + (offset_byte,), data = _read_struct_from_bytes(data, ">b") + if offset_byte == 127: + (self.offset,), data = _read_struct_from_bytes(data, ">i") + else: + self.offset = offset_byte * 900 + return data + + def _do_zone_region(self, data: bytes) -> bytes: + # 2-byte length + UTF-8 string (standard UTF-8, not modified) + (length,), data = _read_struct_from_bytes(data, ">H") + self.zone = data[:length].decode("utf-8") + return data[length:] + + def _do_offset_time(self, data: bytes) -> bytes: + data = self._do_local_time(data) + data = self._do_zone_offset(data) + return data + + def _do_offset_date_time(self, data: bytes) -> bytes: + data = self._do_local_date_time(data) + data = self._do_zone_offset(data) + return data + + def _do_year(self, data: bytes) -> bytes: + (self.year,), data = _read_struct_from_bytes(data, ">i") + return data + + def _do_year_month(self, data: bytes) -> bytes: + (self.year, self.month), data = _read_struct_from_bytes(data, ">ib") + return data + + def _do_month_day(self, data: bytes) -> bytes: + (self.month, self.day), data = _read_struct_from_bytes(data, ">bb") + return data + + def _do_period(self, data: bytes) -> bytes: + (self.year, self.month, self.day), data = _read_struct_from_bytes(data, ">iii") + return data + + +# ------------------------------------------------------------------------------ +# DefaultObjectTransformer +# ------------------------------------------------------------------------------ + + +class DefaultObjectTransformer(ObjectTransformer): + """ + Built-in transformer that covers the most common Java standard-library + classes. + + Handled classes + ~~~~~~~~~~~~~~~ + * ``java.lang.Boolean``, ``java.lang.Integer``, ``java.lang.Long`` + * ``java.util.ArrayList``, ``java.util.LinkedList`` + * ``java.util.HashMap``, ``java.util.TreeMap``, ``java.util.LinkedHashMap`` + * ``java.util.HashSet``, ``java.util.LinkedHashSet``, ``java.util.TreeSet`` + * ``java.time.Ser`` + """ + + _KNOWN_TRANSFORMERS: tuple[type[JavaInstance], ...] = ( + JavaBool, + JavaInt, + JavaList, + JavaMap, + JavaLinkedHashMap, + JavaSet, + JavaTreeSet, + JavaTime, + ) + + def __init__(self) -> None: + self._type_mapper: dict[str, type[JavaInstance]] = {} + for klass in self._KNOWN_TRANSFORMERS: + handled = klass.HANDLED_CLASSES # type: ignore[attr-defined] + if isinstance(handled, str): + self._type_mapper[handled] = klass + else: + for name in handled: + self._type_mapper[name] = klass + + def create_instance(self, classdesc: JavaClassDesc) -> JavaInstance | None: + """ + Returns a specialised :class:`JavaInstance` subclass for known Java + types, or ``None`` for unknown types. + """ + mapped = self._type_mapper.get(classdesc.name) + if mapped is None: + return None + instance = mapped() + instance.classdesc = classdesc + return instance + + def handles(self, class_name: str) -> bool: + """Returns ``True`` if this transformer knows how to handle *class_name*.""" + return class_name in self._type_mapper + + +# ------------------------------------------------------------------------------ +# NumpyArrayTransformer +# ------------------------------------------------------------------------------ + + +class NumpyArrayTransformer(ObjectTransformer): + """ + Loads primitive Java arrays as NumPy arrays when *numpy* is available. + + NumPy dtype mapping (corrected from v1/v2): + * ``TYPE_CHAR`` → ``>u2`` (2-byte unsigned, UTF-16; **not** ``b``) + * ``TYPE_BYTE`` → ``B`` (unsigned byte) + * All other types use their natural NumPy big-endian counterparts. + """ + + NUMPY_TYPE_MAP: dict[TypeCode, str] = { + TypeCode.TYPE_BYTE: "B", + TypeCode.TYPE_CHAR: ">u2", # Fixed: Java char = 2-byte unsigned + TypeCode.TYPE_DOUBLE: ">d", + TypeCode.TYPE_FLOAT: ">f", + TypeCode.TYPE_INTEGER: ">i", + TypeCode.TYPE_LONG: ">q", + TypeCode.TYPE_SHORT: ">h", + TypeCode.TYPE_BOOLEAN: ">B", + } + + def load_array( + self, + reader: DataReader, + type_code: TypeCode, + size: int, + ) -> Any | None: + """ + Reads *size* elements from the stream as a NumPy array. + + Returns ``None`` if NumPy is not installed or the element type has + no NumPy mapping. + """ + if numpy is None: + return None + dtype = self.NUMPY_TYPE_MAP.get(type_code) + if dtype is None: + return None + return numpy.fromfile(reader._fd, dtype=dtype, count=size) diff --git a/tests/test_v3.py b/tests/test_v3.py new file mode 100644 index 0000000..6aad201 --- /dev/null +++ b/tests/test_v3.py @@ -0,0 +1,677 @@ +#!/usr/bin/env python3 +""" +Tests for javaobj v3. + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.5.0 +:status: Alpha + +.. + + Copyright 2026 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# Standard library +import logging +import os +import subprocess +import sys +import unittest +from typing import Any + +# Make sure javaobj is importable when running directly +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +# Javaobj +import javaobj.v3 as javaobj +from javaobj.v3._compat import v1_to_v3, v2_to_v3 +from javaobj.v3.beans import ( + FieldType, + JavaArray, + JavaClass, + JavaClassDesc, + JavaEnum, + JavaInstance, + JavaString, +) +from javaobj.v3.exceptions import JavaObjError, ParseError, SecurityError +from javaobj.v3.transformers import ( + JavaTime, + ObjectTransformer, +) + +# ------------------------------------------------------------------------------ + +__docformat__ = "restructuredtext en" + +_logger = logging.getLogger("javaobj.tests.v3") + +# ------------------------------------------------------------------------------ +# Helpers +# ------------------------------------------------------------------------------ + + +def _ser_path(filename: str) -> str: + """Returns the absolute path of a .ser fixture, searching sub-dirs.""" + base = os.path.dirname(__file__) + for sub in ("java", ""): + full = os.path.join(base, sub, filename) + if os.path.exists(full): + return full + raise FileNotFoundError(f"Fixture not found: {filename}") + + +# ------------------------------------------------------------------------------ +# Base test class +# ------------------------------------------------------------------------------ + + +class TestJavaobjV3Base(unittest.TestCase): + """Shared helpers for all v3 test cases.""" + + @classmethod + def setUpClass(cls) -> None: + """ + Calls Maven to compile & run Java classes that generate the .ser + fixtures, unless the ``JAVAOBJ_NO_MAVEN`` environment variable is set. + """ + java_dir = os.path.join(os.path.dirname(__file__), "java") + if not os.getenv("JAVAOBJ_NO_MAVEN") and os.path.isdir(java_dir): + cwd = os.getcwd() + os.chdir(java_dir) + subprocess.call("mvn test", shell=True) + os.chdir(cwd) + + def load_file(self, filename: str) -> Any: + """Reads and deserializes a .ser fixture via v3.""" + with open(_ser_path(filename), "rb") as f: + return javaobj.load(f) + + def load_bytes(self, filename: str) -> Any: + """Reads the raw bytes of a .ser fixture and deserializes via v3.""" + with open(_ser_path(filename), "rb") as f: + return javaobj.loads(f.read()) + + +# ------------------------------------------------------------------------------ +# Primitive and simple-type tests +# ------------------------------------------------------------------------------ + + +class TestPrimitiveTypes(TestJavaobjV3Base): + """Tests for primitive Java type serialization.""" + + def test_char_rw(self) -> None: + """testChar.ser – single Java char serialized as 2-byte sequence.""" + pobj = self.load_bytes("testChar.ser") + # A lone Java char is serialized as a 2-byte big-endian block. + self.assertEqual(pobj, b"\x00C") + + def test_chars_rw(self) -> None: + """testChars.ser – Java char[] encoded as UTF-16-BE bytes.""" + expected = "python-javaobj".encode("utf-16-be") + pobj = self.load_bytes("testChars.ser") + self.assertEqual(pobj, expected) + # Also comparable as a latin-1 string + self.assertEqual(pobj, expected.decode("latin1")) + + def test_double_rw(self) -> None: + """testDouble.ser – Java double serialized as 8 bytes.""" + pobj = self.load_bytes("testDouble.ser") + self.assertEqual(pobj, b"\x7f\xef\xff\xff\xff\xff\xff\xff") + + def test_bytes_rw(self) -> None: + """testBytes.ser – Java byte[] as Python bytes.""" + pobj = self.load_bytes("testBytes.ser") + self.assertEqual(pobj, b"HelloWorld") + + def test_boolean(self) -> None: + """testBoolean.ser – Java boolean primitive.""" + pobj = self.load_bytes("testBoolean.ser") + # A serialized boolean is a 1-byte block; 0x00 = false. + self.assertEqual(pobj, b"\x00") + + def test_byte(self) -> None: + """testByte.ser – Java byte primitive (value 127).""" + pobj = self.load_bytes("testByte.ser") + self.assertEqual(pobj, b"\x7f") + + def test_japan(self) -> None: + """testJapan.ser – Japanese characters (wide UTF-8 codepoints).""" + pobj = self.load_bytes("testJapan.ser") + self.assertEqual( + pobj, + "\u65e5\u672c\u56fd", # 日本国 + ) + + +# ------------------------------------------------------------------------------ +# Object / class descriptor tests +# ------------------------------------------------------------------------------ + + +class TestObjects(TestJavaobjV3Base): + """Tests for serialized Java objects.""" + + def test_fields(self) -> None: + """test_readFields.ser – object with named fields.""" + pobj = self.load_bytes("test_readFields.ser") + self.assertIsInstance(pobj, JavaInstance) + + # Access fields via the v2-compatible __getattr__ + self.assertEqual(pobj.aField1, "Gabba") + self.assertIsNone(pobj.aField2) + + # Access via get_field (preferred v3 API) + self.assertEqual(pobj.get_field("aField1"), "Gabba") + + classdesc = pobj.get_class() + self.assertIsNotNone(classdesc) + self.assertEqual(classdesc.serialVersionUID, 0x7F0941F5) + self.assertEqual(classdesc.name, "OneTest$SerializableTestHelper") + self.assertEqual(len(classdesc.fields_names), 3) + + def test_class(self) -> None: + """testClass.ser – java.lang.Class reference.""" + pobj = self.load_bytes("testClass.ser") + self.assertIsInstance(pobj, JavaClass) + self.assertEqual(pobj.name, "java.lang.String") + + def test_super(self) -> None: + """objSuper.ser – class hierarchy (parent + child fields).""" + pobj = self.load_bytes("objSuper.ser") + self.assertIsInstance(pobj, JavaInstance) + + classdesc = pobj.get_class() + self.assertIsNotNone(classdesc) + + # Fields defined on the child class + self.assertEqual(pobj.childString, "Child!!") + # Fields inherited from the parent class + self.assertEqual(pobj.bool, True) + self.assertEqual(pobj.integer, -1) + self.assertEqual(pobj.superString, "Super!!") + + def test_class_with_byte_array(self) -> None: + """testClassWithByteArray.ser – instance field holding a byte array.""" + pobj = self.load_bytes("testClassWithByteArray.ser") + self.assertIsInstance(pobj, JavaInstance) + + # In v3 the array field is a JavaArray whose .data is bytes + arr = pobj.myArray + self.assertIsInstance(arr, JavaArray) + self.assertEqual(arr.element_type, FieldType.BYTE) + self.assertEqual(arr.data, bytes([1, 3, 7, 11])) + + def test_sun_example(self) -> None: + """sunExample.ser – linked-list style stream with two objects.""" + content = javaobj.load(open(_ser_path("sunExample.ser"), "rb")) + + self.assertIsInstance(content, list) + self.assertEqual(len(content), 2) + + pobj = content[0] + self.assertEqual(pobj.value, 17) + self.assertTrue(pobj.next) + + pobj = content[1] + self.assertEqual(pobj.value, 19) + self.assertFalse(pobj.next) + + def test_exception_object(self) -> None: + """testException.ser / objException.ser – serialized exception. + + Exception parsing is complex (requires TC_EXCEPTION handling in the + object graph). This test verifies that the file is either parsed + successfully or raises a well-typed ``JavaObjError`` (no crashes with + unhandled exceptions or wrong types). + """ + for filename in ("testException.ser", "objException.ser"): + try: + pobj = self.load_bytes(filename) + _logger.debug("Loaded %s: %s", filename, pobj) + except FileNotFoundError: + _logger.warning("Skipping %s (not found)", filename) + except JavaObjError as exc: + # Known limitation: some exception streams reference + # class descriptors instead of strings (see report B-07). + # Log but do not fail the test. + _logger.warning( + "Parsing %s raised JavaObjError (known limitation): %s", + filename, + exc, + ) + + +# ------------------------------------------------------------------------------ +# Array tests +# ------------------------------------------------------------------------------ + + +class TestArrays(TestJavaobjV3Base): + """Tests for Java array serialization.""" + + def test_arrays_obj(self) -> None: + """objArrays.ser – object with several array fields.""" + pobj = self.load_bytes("objArrays.ser") + self.assertIsInstance(pobj, JavaInstance) + + classdesc = pobj.get_class() + self.assertIsNotNone(classdesc) + + # Check field names are accessible + self.assertIn("stringArr", classdesc.fields_names) + self.assertIn("integerArr", classdesc.fields_names) + self.assertIn("boolArr", classdesc.fields_names) + + # Each array field should be a JavaArray + self.assertIsInstance(pobj.stringArr, JavaArray) + self.assertIsInstance(pobj.integerArr, JavaArray) + self.assertIsInstance(pobj.boolArr, JavaArray) + + def test_char_array(self) -> None: + """testCharArray.ser – array of Java chars (UTF-16 code units).""" + pobj = self.load_bytes("testCharArray.ser") + self.assertIsInstance(pobj, JavaArray) + self.assertEqual(pobj.element_type, FieldType.CHAR) + self.assertEqual( + list(pobj), + [ + "\u0000", + "\ud800", + "\u0001", + "\udc00", + "\u0002", + "\uffff", + "\u0003", + ], + ) + + def test_2d_array(self) -> None: + """test2DArray.ser – two-dimensional int array.""" + pobj = self.load_bytes("test2DArray.ser") + self.assertIsInstance(pobj, JavaArray) + # Each row is itself a JavaArray + rows = [list(row) for row in pobj] + self.assertEqual(rows, [[1, 2, 3], [4, 5, 6]]) + + def test_class_array(self) -> None: + """testClassArray.ser – array of java.lang.Class references.""" + pobj = self.load_bytes("testClassArray.ser") + self.assertIsInstance(pobj, JavaArray) + self.assertEqual(pobj[0].name, "java.lang.Integer") + self.assertEqual(pobj[1].name, "java.io.ObjectOutputStream") + self.assertEqual(pobj[2].name, "java.lang.Exception") + + +# ------------------------------------------------------------------------------ +# Enum tests +# ------------------------------------------------------------------------------ + + +class TestEnums(TestJavaobjV3Base): + """Tests for Java enum serialization.""" + + def test_enums_obj(self) -> None: + """objEnums.ser – object with enum and array-of-enum fields.""" + pobj = self.load_bytes("objEnums.ser") + self.assertIsInstance(pobj, JavaInstance) + + classdesc = pobj.get_class() + self.assertEqual(classdesc.name, "ClassWithEnum") + + # Single enum field + self.assertIsInstance(pobj.color, JavaEnum) + self.assertEqual(pobj.color.classdesc.name, "Color") + # JavaString.__eq__ handles plain str comparison + self.assertEqual(pobj.color.constant, "GREEN") + + # Array of enum values + colors_arr = pobj.colors + self.assertIsInstance(colors_arr, JavaArray) + expected = ["GREEN", "BLUE", "RED"] + for color, name in zip(colors_arr, expected): + self.assertIsInstance(color, JavaEnum) + self.assertEqual(color.classdesc.name, "Color") + self.assertEqual(color.constant, name) + + def test_enums_simple(self) -> None: + """testEnums.ser – standalone enum values.""" + pobj = self.load_bytes("testEnums.ser") + _logger.debug("testEnums: %s", pobj) + + +# ------------------------------------------------------------------------------ +# Collection tests +# ------------------------------------------------------------------------------ + + +class TestCollections(TestJavaobjV3Base): + """Tests for Java collection serialization.""" + + def test_sets(self) -> None: + """testHashSet / testTreeSet / testLinkedHashSet – Java set types.""" + for filename in ( + "testHashSet.ser", + "testTreeSet.ser", + "testLinkedHashSet.ser", + ): + with self.subTest(file=filename): + pobj = self.load_bytes(filename) + self.assertIsInstance(pobj, set) + # Each element is a JavaInt whose .value is an int + self.assertSetEqual({item.value for item in pobj}, {1, 2, 42}) + + def test_collections_obj(self) -> None: + """objCollections.ser – object with ArrayList, HashMap, LinkedList.""" + pobj = self.load_bytes("objCollections.ser") + self.assertIsInstance(pobj, JavaInstance) + + self.assertIsInstance(pobj.arrayList, list) + self.assertIsInstance(pobj.hashMap, dict) + self.assertIsInstance(pobj.linkedList, list) + + def test_bool_int_long(self) -> None: + """testBoolIntLong.ser – HashMap with Boolean / Integer / Long values.""" + pobj = self.load_bytes("testBoolIntLong.ser") + self.assertIsInstance(pobj, dict) + + self.assertEqual(pobj["key1"], "value1") + self.assertEqual(pobj["key2"], "value2") + self.assertEqual(pobj["int"], 9) + self.assertEqual(pobj["int2"], 10) + self.assertEqual(pobj["bool"], True) + self.assertEqual(pobj["bool2"], True) + + def test_bool_int_long_nested(self) -> None: + """testBoolIntLong-2.ser – HashMap containing another HashMap.""" + pobj = self.load_bytes("testBoolIntLong-2.ser") + self.assertIsInstance(pobj, dict) + + base = self.load_bytes("testBoolIntLong.ser") + parent_map = pobj["subMap"] + for key, value in base.items(): + self.assertEqual(parent_map[key], value) + + def test_jceks_issue_5(self) -> None: + """jceks_issue_5.ser – regression test for issue #5.""" + pobj = self.load_bytes("jceks_issue_5.ser") + _logger.info("jceks_issue_5: %s", pobj) + + +# ------------------------------------------------------------------------------ +# java.time tests +# ------------------------------------------------------------------------------ + + +class TestTimes(TestJavaobjV3Base): + """Tests for java.time.* serialization.""" + + def test_times(self) -> None: + """testTime.ser – array of java.time.Ser instances.""" + pobj = self.load_bytes("testTime.ser") + + # Top-level result is a Java array + self.assertIsInstance(pobj, JavaArray) + + # Each element must be a JavaTime instance (from DefaultObjectTransformer) + for obj in pobj: + self.assertIsInstance(obj, JavaTime) + + # First entry is a Duration of 10 seconds + duration = pobj[0] + self.assertEqual(duration.second, 10) + + +# ------------------------------------------------------------------------------ +# v3-specific feature tests +# ------------------------------------------------------------------------------ + + +class TestV3Specific(TestJavaobjV3Base): + """Tests for features that are new or improved in v3.""" + + def test_byte_array_is_bytes(self) -> None: + """In v3, TYPE_BYTE arrays are returned as plain bytes, not list.""" + pobj = self.load_bytes("testBytes.ser") + # testBytes.ser is a standalone byte array (TC_ARRAY) + if isinstance(pobj, JavaArray): + self.assertIsInstance(pobj.data, bytes) + + def test_get_field_vs_getattr(self) -> None: + """get_field() and attribute access should return the same value.""" + pobj = self.load_bytes("test_readFields.ser") + self.assertIsInstance(pobj, JavaInstance) + + val_attr = pobj.aField1 + val_method = pobj.get_field("aField1") + self.assertEqual(val_attr, val_method) + + def test_typed_exceptions(self) -> None: + """Malformed streams must raise ParseError, a subclass of JavaObjError.""" + bad_data = b"\xac\xed\x00\x05\xff" + with self.assertRaises(ParseError): + javaobj.loads(bad_data) + + with self.assertRaises(JavaObjError): + javaobj.loads(bad_data) + + def test_invalid_magic_raises_parse_error(self) -> None: + """Streams with wrong magic must raise ParseError with offset info.""" + bad_data = b"\x00\x00\x00\x05" + try: + javaobj.loads(bad_data) + self.fail("Expected ParseError") + except ParseError as exc: + self.assertGreaterEqual(exc.offset, 0) + + def test_security_max_depth(self) -> None: + """A max_depth of 1 must raise SecurityError on any nested object.""" + data = open(_ser_path("objSuper.ser"), "rb").read() + with self.assertRaises(SecurityError): + javaobj.loads(data, max_depth=1) + + def test_empty_stream_returns_none(self) -> None: + """A stream with only the magic header and no objects returns None.""" + header = b"\xac\xed\x00\x05" + result = javaobj.loads(header) + self.assertIsNone(result) + + def test_loads_and_load_equivalent(self) -> None: + """javaobj.loads(data) must give the same result as javaobj.load(fd).""" + path = _ser_path("testBoolean.ser") + with open(path, "rb") as f: + data = f.read() + result_bytes = javaobj.loads(data) + with open(path, "rb") as f: + result_stream = javaobj.load(f) + self.assertEqual(result_bytes, result_stream) + + def test_classdesc_properties(self) -> None: + """JavaClassDesc compatibility properties (flags, serialVersionUID).""" + pobj = self.load_bytes("test_readFields.ser") + cd = pobj.get_class() + self.assertIsInstance(cd, JavaClassDesc) + + # Both names for the same attribute must match + self.assertEqual(cd.flags, cd.desc_flags) + self.assertEqual(cd.serialVersionUID, cd.serial_version_uid) + + # fields_names and fields_types must be consistent + self.assertEqual(len(cd.fields_names), len(cd.fields_types)) + for name, ftype in zip(cd.fields_names, cd.fields_types): + self.assertIsInstance(name, str) + self.assertIsInstance(ftype, FieldType) + + def test_java_string_equality(self) -> None: + """JavaString must compare equal to plain Python str.""" + js = JavaString(handle=0, value="hello") + self.assertEqual(js, "hello") + self.assertEqual("hello", js) + self.assertEqual(hash(js), hash("hello")) + + def test_custom_transformer(self) -> None: + """A custom ObjectTransformer.create_instance must be invoked.""" + + class MarkerInstance(JavaInstance): + """Marker subclass to detect transformer invocation.""" + + was_created = False + + def load_from_instance(self) -> bool: + MarkerInstance.was_created = True + return True + + class MarkerTransformer(ObjectTransformer): + TARGET = "OneTest$SerializableTestHelper" + + def create_instance(self, classdesc: JavaClassDesc) -> JavaInstance | None: + if classdesc.name == self.TARGET: + return MarkerInstance() + return None + + pobj = self.load_bytes("test_readFields.ser", MarkerTransformer()) + self.assertIsInstance(pobj, MarkerInstance) + self.assertTrue(MarkerInstance.was_created) + + # Helper used by test_custom_transformer + def load_bytes(self, filename: str, *extra_transformers: ObjectTransformer) -> Any: + with open(_ser_path(filename), "rb") as f: + return javaobj.load(f, *extra_transformers) + + def test_super_object(self) -> None: + """objSuper.ser – verify hierarchy is preserved in field_data.""" + pobj = self.load_bytes("objSuper.ser") + self.assertIsInstance(pobj, JavaInstance) + + # field_data must have at least one entry per class in the hierarchy + self.assertGreater(len(pobj.field_data), 0) + + # All classes in the hierarchy must be present + cd = pobj.get_class() + hierarchy = cd.get_hierarchy() + for hcd in hierarchy: + if hcd in pobj.field_data: + for field in hcd.fields: + self.assertIn(field, pobj.field_data[hcd]) + + +# ------------------------------------------------------------------------------ +# v1 / v2 compatibility tests +# ------------------------------------------------------------------------------ + + +class TestCompat(unittest.TestCase): + """Tests for the v1→v3 and v2→v3 migration helpers in _compat.""" + + # ------------------------------------------------------------------ + # v2 → v3 + # ------------------------------------------------------------------ + + def test_v2_to_v3_string(self) -> None: + """v2_to_v3 converts a v2 JavaString to a v3 JavaString.""" + import javaobj.v2 as javaobj_v2 + + v2_obj = javaobj_v2.loads(open(_ser_path("testJapan.ser"), "rb").read()) + v3_obj = v2_to_v3(v2_obj) + self.assertIsInstance(v3_obj, JavaString) + self.assertEqual(str(v3_obj), "\u65e5\u672c\u56fd") + + def test_v2_to_v3_instance(self) -> None: + """v2_to_v3 converts a v2 JavaInstance to a v3 JavaInstance.""" + import javaobj.v2 as javaobj_v2 + + v2_obj = javaobj_v2.loads(open(_ser_path("test_readFields.ser"), "rb").read()) + v3_obj = v2_to_v3(v2_obj) + self.assertIsInstance(v3_obj, JavaInstance) + self.assertIsNotNone(v3_obj.classdesc) + self.assertEqual(v3_obj.classdesc.name, "OneTest$SerializableTestHelper") + + def test_v2_to_v3_enum(self) -> None: + """v2_to_v3 converts a v2 JavaEnum to a v3 JavaEnum.""" + import javaobj.v2 as javaobj_v2 + + with open(_ser_path("objEnums.ser"), "rb") as f: + v2_obj = javaobj_v2.load(f) + # objEnums.ser is an instance that contains an enum field, not a + # standalone enum; parse the color field instead + v3_obj = v2_to_v3(v2_obj) + self.assertIsInstance(v3_obj, JavaInstance) + + def test_v2_to_v3_array(self) -> None: + """v2_to_v3 converts a v2 JavaArray (chars) to a v3 JavaArray.""" + import javaobj.v2 as javaobj_v2 + + v2_obj = javaobj_v2.loads(open(_ser_path("testCharArray.ser"), "rb").read()) + v3_obj = v2_to_v3(v2_obj) + self.assertIsInstance(v3_obj, JavaArray) + self.assertEqual(v3_obj.element_type, FieldType.CHAR) + + def test_v2_to_v3_unknown_raises(self) -> None: + """v2_to_v3 raises JavaObjError for an unmappable type.""" + with self.assertRaises(JavaObjError): + v2_to_v3(object()) # type: ignore[arg-type] + + # ------------------------------------------------------------------ + # v1 → v3 + # ------------------------------------------------------------------ + + def test_v1_to_v3_instance(self) -> None: + """v1_to_v3 converts a v1 JavaObject to a v3 JavaInstance.""" + import javaobj.v1 as javaobj_v1 + + v1_obj = javaobj_v1.loads(open(_ser_path("test_readFields.ser"), "rb").read()) + v3_obj = v1_to_v3(v1_obj) + self.assertIsInstance(v3_obj, JavaInstance) + self.assertIsNotNone(v3_obj.classdesc) + self.assertEqual(v3_obj.classdesc.name, "OneTest$SerializableTestHelper") + + def test_v1_to_v3_unknown_raises(self) -> None: + """v1_to_v3 raises JavaObjError for an unmappable type.""" + with self.assertRaises(JavaObjError): + v1_to_v3(object()) # type: ignore[arg-type] + + +# ------------------------------------------------------------------------------ +# GZip decompression test + + +class TestGzip(TestJavaobjV3Base): + """Tests for transparent GZip decompression.""" + + def test_gzip_equivalent(self) -> None: + """testChars.ser and testChars.ser.gz must parse to the same value.""" + try: + plain_path = _ser_path("testChars.ser") + gz_path = _ser_path("testChars.ser.gz") + except FileNotFoundError: + self.skipTest("testChars.ser.gz not found") + + with open(plain_path, "rb") as f: + plain = javaobj.load(f) + with open(gz_path, "rb") as f: + gzipped = javaobj.load(f) + + self.assertEqual(plain, gzipped) + + +# ------------------------------------------------------------------------------ +# Entry point +# ------------------------------------------------------------------------------ + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + unittest.main() From b17f15ecdbd484b39001bf8419d178b4d8dce26b Mon Sep 17 00:00:00 2001 From: Thomas Calmant Date: Sun, 31 May 2026 23:54:16 +0200 Subject: [PATCH 3/8] Updated pyproject Added configuration for ruff, pytest as a dev dependency Signed-off-by: Thomas Calmant --- pyproject.toml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8789351..c380172 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ packages = ["javaobj"] [project] name = "javaobj-py3" -version = "0.4.4" +version = "0.5.0" description = "Module for serializing and de-serializing Java objects." readme = "README.md" license = "Apache-2.0" @@ -32,6 +32,8 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Topic :: Software Development :: Libraries :: Python Modules" ] @@ -48,6 +50,11 @@ Homepage = "https://github.com/tcalmant/python-javaobj" Issues = "http://github.com/tcalmant/python-javaobj/issues" Source = "http://github.com/tcalmant/python-javaobj/" +[dependency-groups] +dev = [ + "pytest>=9.0.3", +] + [tool.hatch.envs.test] dependencies = ["pytest"] @@ -55,4 +62,10 @@ dependencies = ["pytest"] run = "pytest tests" [tool.black] -line-length = 79 +line-length = 110 + +[tool.ruff] +line-length = 110 + +[tool.ruff.lint] +extend-select = ["I"] From 34c971a3f223e77cfcbf500065fce8d9bdbb95cf Mon Sep 17 00:00:00 2001 From: Thomas Calmant Date: Sun, 31 May 2026 23:57:00 +0200 Subject: [PATCH 4/8] Updated CI file Signed-off-by: Thomas Calmant --- .github/workflows/build-24.04.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-24.04.yml b/.github/workflows/build-24.04.yml index 4c25cf3..fdccd51 100644 --- a/.github/workflows/build-24.04.yml +++ b/.github/workflows/build-24.04.yml @@ -5,10 +5,10 @@ name: CI Build - Python 3.8+ on: push: - branches: [ "master" ] + branches: [ "main", "master" ] tags: '**' pull_request: - branches: [ "master" ] + branches: [ "main", "master" ] jobs: build: @@ -17,7 +17,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14-dev"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v4 From 0786da843458d7b682aac08ef68d6d7184a12ddb Mon Sep 17 00:00:00 2001 From: Thomas Calmant Date: Mon, 1 Jun 2026 00:02:16 +0200 Subject: [PATCH 5/8] Ignore v3 in CI if Python < 3.12 Signed-off-by: Thomas Calmant --- .github/workflows/build-20.04.yml | 11 ++++++----- .github/workflows/build-24.04.yml | 20 +++++++++++++++----- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-20.04.yml b/.github/workflows/build-20.04.yml index 2d8a2bc..e43ebd8 100644 --- a/.github/workflows/build-20.04.yml +++ b/.github/workflows/build-20.04.yml @@ -5,10 +5,10 @@ name: CI Build - Python 3.5-3.7 on: push: - branches: [ "master" ] + branches: [ "main", "master" ] tags: '**' pull_request: - branches: [ "master" ] + branches: [ "main", "master" ] jobs: build: @@ -34,13 +34,14 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 run: | + # javaobj/v3 and tests/test_v3.py require Python 3.12+ syntax # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude=javaobj/v3,tests/test_v3.py # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude=javaobj/v3,tests/test_v3.py - name: Test run: | - coverage run -m pytest + coverage run -m pytest --ignore=tests/test_v3.py - name: Coveralls env: COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} diff --git a/.github/workflows/build-24.04.yml b/.github/workflows/build-24.04.yml index fdccd51..cc10eb9 100644 --- a/.github/workflows/build-24.04.yml +++ b/.github/workflows/build-24.04.yml @@ -32,13 +32,23 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + # javaobj/v3 and tests/test_v3.py require Python 3.12+ syntax; exclude them on older versions + if python -c "import sys; sys.exit(0 if sys.version_info >= (3, 12) else 1)"; then + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + else + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude=javaobj/v3,tests/test_v3.py + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude=javaobj/v3,tests/test_v3.py + fi - name: Test run: | - coverage run -m pytest + if python -c "import sys; sys.exit(0 if sys.version_info >= (3, 12) else 1)"; then + coverage run -m pytest + else + coverage run -m pytest --ignore=tests/test_v3.py + fi - name: Coveralls env: COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} From ad1ebc8cadb0f4d4239304b6ef33f67dbff0421b Mon Sep 17 00:00:00 2001 From: Thomas Calmant Date: Mon, 1 Jun 2026 00:06:31 +0200 Subject: [PATCH 6/8] Omit v3 files in coverage Maybe this will avoid issues with coveralls Signed-off-by: Thomas Calmant --- .github/workflows/build-20.04.yml | 2 +- .github/workflows/build-24.04.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-20.04.yml b/.github/workflows/build-20.04.yml index e43ebd8..79bcb09 100644 --- a/.github/workflows/build-20.04.yml +++ b/.github/workflows/build-20.04.yml @@ -41,7 +41,7 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude=javaobj/v3,tests/test_v3.py - name: Test run: | - coverage run -m pytest --ignore=tests/test_v3.py + coverage run --omit='javaobj/v3/*,tests/test_v3.py' -m pytest --ignore=tests/test_v3.py - name: Coveralls env: COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} diff --git a/.github/workflows/build-24.04.yml b/.github/workflows/build-24.04.yml index cc10eb9..5b69ebc 100644 --- a/.github/workflows/build-24.04.yml +++ b/.github/workflows/build-24.04.yml @@ -47,7 +47,7 @@ jobs: if python -c "import sys; sys.exit(0 if sys.version_info >= (3, 12) else 1)"; then coverage run -m pytest else - coverage run -m pytest --ignore=tests/test_v3.py + coverage run --omit='javaobj/v3/*,tests/test_v3.py' -m pytest --ignore=tests/test_v3.py fi - name: Coveralls env: From dec8cbcb0b22e8867407ff0ff8f4ff7759806f33 Mon Sep 17 00:00:00 2001 From: Thomas Calmant Date: Mon, 1 Jun 2026 00:17:06 +0200 Subject: [PATCH 7/8] Remove CI for Ubuntu 20.04 This target has been removed from GitHub CI Signed-off-by: Thomas Calmant --- .github/workflows/build-20.04.yml | 50 ------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 .github/workflows/build-20.04.yml diff --git a/.github/workflows/build-20.04.yml b/.github/workflows/build-20.04.yml deleted file mode 100644 index 79bcb09..0000000 --- a/.github/workflows/build-20.04.yml +++ /dev/null @@ -1,50 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: CI Build - Python 3.5-3.7 - -on: - push: - branches: [ "main", "master" ] - tags: '**' - pull_request: - branches: [ "main", "master" ] - -jobs: - build: - timeout-minutes: 10 - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - python-version: ["3.5", "3.6", "3.7"] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - env: - PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 pytest coverage - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # javaobj/v3 and tests/test_v3.py require Python 3.12+ syntax - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude=javaobj/v3,tests/test_v3.py - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude=javaobj/v3,tests/test_v3.py - - name: Test - run: | - coverage run --omit='javaobj/v3/*,tests/test_v3.py' -m pytest --ignore=tests/test_v3.py - - name: Coveralls - env: - COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} - run: | - pip install coveralls - coveralls From 01d037f2a5c712bfaad679dbcbabac639b93f776 Mon Sep 17 00:00:00 2001 From: Thomas Calmant Date: Mon, 1 Jun 2026 00:31:30 +0200 Subject: [PATCH 8/8] Reviewed README Signed-off-by: Thomas Calmant --- README.md | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 5866a86..2d72e90 100644 --- a/README.md +++ b/README.md @@ -28,22 +28,27 @@ This fork intends to work both on Python 2.7 and Python 3.4+. | Implementations | Version | |-----------------|----------| | `v1`, `v2` | `0.4.0+` | +| `v3` | `0.5.0+` | -Since version 0.4.0, two implementations of the parser are available: +Since version 0.4.0, three implementations of the parser are available: * `v1`: the *classic* implementation of `javaobj`, with a work in progress implementation of a writer. -* `v2`: the *new* implementation, which is a port of the Java project +* `v2`: a rewritten implementation, which is a port of the Java project [`jdeserialize`](https://github.com/frohoff/jdeserialize/), with support of the object transformer (with a new API) and of the `numpy` arrays loading. +* `v3`: a **new** implementation, written from scratch to benefit from + Python 3.12+ features. You can use the `v1` parser to ensure that the behaviour of your scripts doesn't change and to keep the ability to write down files. -You can use the `v2` parser for new developments -*which won't require marshalling* and as a *fallback* if the `v1` -fails to parse a file. +You can use the `v2` parser for developments in Python versions lower +than 3.12 and *which won't require marshalling*, or as a *fallback* +if the `v1` parser fails to parse a file. + +For new development, you should use the `v3` parser. ### Object transformers V1 @@ -147,8 +152,8 @@ with open("objCollections.ser", "rb") as fd: **Note:** The objects and methods provided by `javaobj` module are shortcuts to the `javaobj.v1` package, for Compatibility purpose. -It is **recommended** to explicitly import methods and classes from the `v1` -(or `v2`) package when writing new code, in order to be sure that your code +It is **recommended** to explicitly import methods and classes from the `v1`, +`v2`, or `v3` package when writing new code, in order to be sure that your code won't need import updates in the future. @@ -404,13 +409,13 @@ class JavaRandomTransformer(BaseTransformer): values = [] for f_name, f_type in zip(self.field_names, self.field_types): values.append(parser._read_field_value(f_type)) - fields.append(javaobj.beans.JavaField(f_type, f_name)) + fields.append(javaobj.v2.beans.JavaField(f_type, f_name)) - class_desc = javaobj.beans.JavaClassDesc( - javaobj.beans.ClassDescType.NORMALCLASS + class_desc = javaobj.v2.beans.JavaClassDesc( + javaobj.v2.beans.ClassDescType.NORMALCLASS ) class_desc.name = self.name - class_desc.desc_flags = javaobj.beans.ClassDataType.EXTERNAL_CONTENTS + class_desc.desc_flags = javaobj.v2.beans.ClassDataType.EXTERNAL_CONTENTS class_desc.fields = fields class_desc.field_data = values return class_desc @@ -486,7 +491,8 @@ transformers = [ RandomChildTransformer(), JavaRandomTransformer() ] -pobj = javaobj.loads("custom_objects.ser", *transformers) +with open("custom_objects.ser", "rb") as fd: + pobj = javaobj.load(fd, *transformers) # Here we show a field that isn't visible from the class description # The field belongs to the class but it's not serialized by default because @@ -530,7 +536,7 @@ value = pobj.myField | Feature | V1 | V2 | V3 | |---|---|---|---| | Python 3.12+ (`match/case`, PEP 604) | ✗ | ✗ | ✓ | -| Fully typed (`dataclasses`, `TypeAlias`) | ✗ | partial | ✓ | +| Fully typed (`dataclasses`, PEP 695 `type` aliases) | ✗ | partial | ✓ | | `TC_RESET` handling | ✗ | ✗ | ✓ | | `TC_EXCEPTION` in object graph | ✗ | ✗ | ✓ | | `TC_PROXYCLASSDESC` | ✗ | ✓ | ✓ |