diff --git a/README.md b/README.md index 019e3b5..a9622b2 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ - 🚀 **Enhanced by Default**: Main API provides smart datetime parsing and type detection automatically - ⚡ **Dual API Strategy**: Choose stdlib compatibility (`datason.json`) or enhanced features (`datason`) - 🛠️ **Zero Migration**: Existing `json.loads/dumps` code works immediately with optional enhancements +- 📚 **Need help choosing?** See [Two Modes: When to Use Which](docs/two-modes.md) ### 🧠 **Intelligent Processing** - 🧠 **Smart Type Detection**: Automatically handles pandas DataFrames, NumPy arrays, datetime objects, and more diff --git a/datason/__init__.py b/datason/__init__.py index 7ecf09f..cfe1358 100644 --- a/datason/__init__.py +++ b/datason/__init__.py @@ -52,10 +52,12 @@ load_typed, loads, # Enhanced default (smart parsing, datetime support) loads_json, # JSON compatibility (exact stdlib behavior) + save, save_api, save_chunked, save_ml, save_secure, + save_string, stream_dump, stream_load, # Streaming deserialization for large files stream_save_ml, @@ -228,6 +230,8 @@ def _get_version() -> str: # Enhanced DataSON API (default recommended usage) "dump", # Enhanced file writing with smart features "dumps", # Enhanced serialization returning dict + "save", # Alias for dump + "save_string", # Enhanced JSON string serialization "load", # Enhanced file reading with smart parsing "loads", # Enhanced string parsing with smart features "serialize", # Enhanced serialization (returns dict) @@ -276,6 +280,8 @@ def _get_version() -> str: "dump_chunked", "stream_dump", # File I/O Operations - Modern API Integration + "save", + "save_string", "save_ml", "save_secure", "save_api", diff --git a/datason/api.py b/datason/api.py index 324c3f8..d418f02 100644 --- a/datason/api.py +++ b/datason/api.py @@ -102,6 +102,19 @@ def dump(obj: Any, fp: Any, **kwargs: Any) -> None: save_ml(obj, fp, **kwargs) +def save(obj: Any, fp: Any, **kwargs: Any) -> None: + """Enhanced file serialization alias. + + This function mirrors :func:`dump` but uses a name that emphasizes + DataSON's role as an enhanced saver rather than a drop-in + replacement. Behaviour is identical to :func:`dump` and therefore + differs from :func:`json.dump` by performing type preservation and + datetime handling. + """ + + dump(obj, fp, **kwargs) + + def dump_json( obj: Any, fp: Any, @@ -956,6 +969,18 @@ def dumps_json( return dumps_json_stdlib(serialized, **json_params) +def save_string(obj: Any, **kwargs: Any) -> str: + """Enhanced serialization to JSON string. + + This is an alias for :func:`dumps_json` that highlights the + difference from :func:`json.dumps`. DataSON performs type + inference and normalization before encoding the result as a JSON + string. + """ + + return dumps_json(obj, **kwargs) + + # ============================================================================= # MIGRATION HELPERS - For smooth transition from old API # ============================================================================= diff --git a/datason/json.py b/datason/json.py deleted file mode 100644 index 5db56d9..0000000 --- a/datason/json.py +++ /dev/null @@ -1,181 +0,0 @@ -""" -JSON Compatibility Module - DataSON configured for stdlib json compatibility. - -This module uses DataSON's core functionality but configured to behave exactly -like Python's built-in json module. This proves DataSON can be configured for -perfect compatibility while using our unified codebase. - -For enhanced features (smart datetime parsing, dict output, ML types), -use the main datason API instead. - -Examples: - # Drop-in replacement for json module (using DataSON core) - import datason.json as json - json_str = json.dumps(data) # Returns string (like stdlib json) - obj = json.load(file) # Basic parsing (like stdlib json) - - # Enhanced DataSON features - import datason - result = datason.dumps(data) # Returns dict with smart features -""" - -import json as _json -from typing import Any - -from .config import OutputType, SerializationConfig, TypeCoercion - -# Import DataSON's core functionality -from .core_new import serialize as _core_serialize - -# Note: _basic_deserialize imported lazily to avoid circular imports - -# Re-export json module constants and exceptions for compatibility -JSONDecodeError = _json.JSONDecodeError -JSONEncoder = _json.JSONEncoder -JSONDecoder = _json.JSONDecoder - -# Configuration for JSON compatibility - disable all DataSON enhancements -_JSON_COMPAT_CONFIG = SerializationConfig( - # Disable smart features to match stdlib json exactly - uuid_format="string", # UUIDs as strings, not objects - parse_uuids=False, # Don't auto-convert UUID strings - datetime_output=OutputType.JSON_SAFE, # Keep datetimes as strings - series_output=OutputType.JSON_SAFE, # Basic pandas Series output - dataframe_output=OutputType.JSON_SAFE, # Basic DataFrame output - numpy_output=OutputType.JSON_SAFE, # Basic numpy array output - type_coercion=TypeCoercion.SAFE, # Safe type handling - include_type_hints=False, # No type metadata - auto_detect_types=False, # No auto-detection of complex types - check_if_serialized=False, # Always process - # Basic serialization only - sort_keys=False, - ensure_ascii=False, # Let JSON handle this -) - - -def dumps( - obj: Any, - *, - skipkeys: bool = False, - ensure_ascii: bool = True, - check_circular: bool = True, - allow_nan: bool = True, - cls=None, - indent=None, - separators=None, - default=None, - sort_keys: bool = False, - **kwargs: Any, -) -> str: - """ - Serialize obj to JSON string using DataSON core (stdlib json compatible). - - This uses DataSON's serialization engine but configured to behave exactly - like stdlib json.dumps() - proving DataSON can be configured for compatibility. - """ - # Use DataSON's core serialization with compatibility config - serialized = _core_serialize(obj, config=_JSON_COMPAT_CONFIG) - - # Convert to JSON string using stdlib json with exact same parameters - json_params = { - "skipkeys": skipkeys, - "ensure_ascii": ensure_ascii, - "check_circular": check_circular, - "allow_nan": allow_nan, - "cls": cls, - "indent": indent, - "separators": separators, - "default": default, - "sort_keys": sort_keys, - } - - return _json.dumps(serialized, **json_params) - - -def loads(s: str, **kwargs: Any) -> Any: - """ - Parse JSON string using DataSON core (stdlib json compatible). - - This uses DataSON's deserialization engine but configured to behave exactly - like stdlib json.loads() - no smart parsing, just basic JSON. - """ - # First parse with stdlib json to get basic structure - parsed = _json.loads(s, **kwargs) - - # Then process with DataSON's basic deserializer (no smart features) - # Lazy import to avoid circular dependency - from .deserializers_new import deserialize as _basic_deserialize - - return _basic_deserialize(parsed, parse_dates=False, parse_uuids=False) - - -def dump( - obj: Any, - fp, - *, - skipkeys: bool = False, - ensure_ascii: bool = True, - check_circular: bool = True, - allow_nan: bool = True, - cls=None, - indent=None, - separators=None, - default=None, - sort_keys: bool = False, - **kwargs: Any, -) -> None: - """ - Serialize obj to JSON and write to file using DataSON core (stdlib compatible). - """ - # Use DataSON's core serialization with compatibility config - serialized = _core_serialize(obj, config=_JSON_COMPAT_CONFIG) - - # Write to file using stdlib json with exact same parameters - json_params = { - "skipkeys": skipkeys, - "ensure_ascii": ensure_ascii, - "check_circular": check_circular, - "allow_nan": allow_nan, - "cls": cls, - "indent": indent, - "separators": separators, - "default": default, - "sort_keys": sort_keys, - } - - _json.dump(serialized, fp, **json_params) - - -def load(fp, **kwargs: Any) -> Any: - """ - Parse JSON from file using DataSON core (stdlib json compatible). - """ - # First parse with stdlib json - parsed = _json.load(fp, **kwargs) - - # Then process with DataSON's basic deserializer (no smart features) - # Lazy import to avoid circular dependency - from .deserializers_new import deserialize as _basic_deserialize - - return _basic_deserialize(parsed, parse_dates=False, parse_uuids=False) - - -# Module-level documentation -__doc__ = """ -DataSON JSON Compatibility Module - -This module uses DataSON's core functionality configured to provide 100% API -compatibility with Python's built-in json module. This proves DataSON can be -configured for perfect compatibility using our unified codebase. - -Functions: - dumps(obj, **kwargs) -> str: Serialize using DataSON core (stdlib compatible) - loads(s, **kwargs) -> Any: Parse using DataSON core (stdlib compatible) - dump(obj, fp, **kwargs) -> None: Write using DataSON core (stdlib compatible) - load(fp, **kwargs) -> Any: Read using DataSON core (stdlib compatible) - -For enhanced DataSON features, use the main datason module instead: - import datason # Enhanced API with smart features -""" - -__all__ = ["dumps", "loads", "dump", "load", "JSONDecodeError", "JSONEncoder", "JSONDecoder"] diff --git a/datason/json/__init__.py b/datason/json/__init__.py new file mode 100644 index 0000000..182f00e --- /dev/null +++ b/datason/json/__init__.py @@ -0,0 +1,13 @@ +"""Standard library JSON compatibility module. + +This package re-exports Python's built-in :mod:`json` module so that +``import datason.json as json`` provides a strict drop-in replacement. +All functions, classes, and exceptions behave exactly like the +standard library version. +""" + +import json as _json +from json import * # noqa: F401,F403 + +__all__ = getattr(_json, "__all__", []) # type: ignore[attr-defined] +__doc__ = _json.__doc__ diff --git a/docs/api/modern-deserialization.md b/docs/api/modern-deserialization.md index 5fa5802..bfa6d4c 100644 --- a/docs/api/modern-deserialization.md +++ b/docs/api/modern-deserialization.md @@ -102,6 +102,10 @@ basic_data = ds.load_basic(json_data) # Basic types only, minimal processing ``` +!!! warning "Breaking behavior" + Unlike `json.loads`, `load_basic()` may convert ISO formatted + strings to :class:`datetime.datetime` objects. + ### load_smart() Intelligent deserialization with good accuracy for production use. @@ -120,6 +124,10 @@ smart_data = ds.load_smart(json_data) print(type(smart_data["timestamp"])) # ``` +!!! warning "Breaking behavior" + Performs aggressive type inference (datetimes, UUIDs, pandas types) + beyond what `json.loads` provides. + ### load_perfect() Perfect accuracy deserialization using templates for mission-critical applications. @@ -143,6 +151,10 @@ template = { perfect_data = ds.load_perfect(json_data, template) ``` +!!! warning "Breaking behavior" + Requires a template and can reconstruct complex Python types; the + standard library has no equivalent. + ### load_typed() High-accuracy deserialization using embedded type metadata. diff --git a/docs/api/modern-serialization.md b/docs/api/modern-serialization.md index c34995f..b1b407e 100644 --- a/docs/api/modern-serialization.md +++ b/docs/api/modern-serialization.md @@ -6,6 +6,8 @@ Intention-revealing dump functions for different use cases and optimization need | Function | Purpose | Best For | |----------|---------|----------| +| `save()` | Enhanced file writer (alias of `dump`) | Smart file output | +| `save_string()` | JSON string with type inference | String output | | `dump()` | General-purpose with composable options | Flexible workflows | | `dump_ml()` | ML-optimized for models and tensors | Data science | | `dump_api()` | Clean JSON for web APIs | Web development | @@ -21,6 +23,34 @@ Intention-revealing dump functions for different use cases and optimization need ## 📦 Detailed Function Documentation +### save() + +Enhanced file serialization to path or file object. + +::: datason.save + options: + show_source: true + show_signature: true + show_signature_annotations: true + +!!! warning "Breaking behavior" + Unlike `json.dump`, `save()` performs type preservation and datetime + conversion before writing to disk. + +### save_string() + +Enhanced serialization that returns a JSON string. + +::: datason.save_string + options: + show_source: true + show_signature: true + show_signature_annotations: true + +!!! warning "Breaking behavior" + `save_string()` applies DataSON's type inference before producing a + JSON string, while `json.dumps` only handles basic types. + ### dump() General-purpose serialization with composable options. diff --git a/docs/two-modes.md b/docs/two-modes.md new file mode 100644 index 0000000..c98e642 --- /dev/null +++ b/docs/two-modes.md @@ -0,0 +1,40 @@ +# 🔀 Two Modes: When to Use Which + +DataSON offers two distinct ways to work with JSON data: + +1. **Compat Mode** – a strict drop-in replacement for Python's `json` module. +2. **Enhanced Mode** – smarter serialization with type inference and modern helpers. + +Use Compat Mode when you need guaranteed parity with the standard library. +Switch to Enhanced Mode when you want DataSON's advanced features. + +## Compat Mode + +```diff +-import json ++import datason.json as json + +data = json.loads('{"created": "2024-01-01"}') +assert isinstance(data["created"], str) +``` + +## Enhanced Mode + +```diff +-import json ++import datason + +# Smart loading with type inference +item = datason.load_smart('{"created": "2024-01-01"}') +assert item["created"].year == 2024 +``` + +## Choosing a Mode + +| Use case | Recommended mode | +|----------|-----------------| +| Legacy code expecting `json` behavior | Compat Mode | +| Need datetime parsing or complex types | Enhanced Mode | +| Migrating gradually | Start with Compat, upgrade to Enhanced | + +The two modes can coexist in the same project, letting you migrate at your own pace. diff --git a/mkdocs.yml b/mkdocs.yml index a3e5a1c..0de511b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -124,6 +124,7 @@ markdown_extensions: # Fixed navigation using files that actually exist nav: - Home: index.md + - Two Modes: two-modes.md # User Guide Section - User Guide: diff --git a/scripts/simple_json_check.py b/scripts/simple_json_check.py index 399c878..0c25f6b 100644 --- a/scripts/simple_json_check.py +++ b/scripts/simple_json_check.py @@ -8,7 +8,8 @@ # Files where stdlib json import is legitimate ALLOWED_FILES = { - "datason/json.py", # Drop-in compatibility module + "datason/json.py", # Legacy path (kept for backward compatibility) + "datason/json/__init__.py", # Drop-in compatibility package "datason/integrity.py", # Canonical JSON output needed "datason/core_new.py", # Core module with well-documented legitimate usage "tests/unit/test_json_compatibility_requirement.py", diff --git a/tests/test_json_compat.py b/tests/test_json_compat.py new file mode 100644 index 0000000..187323f --- /dev/null +++ b/tests/test_json_compat.py @@ -0,0 +1,83 @@ +import json as std_json +from pathlib import Path +from typing import Any + +import pytest + +import datason.json as ds_json + + +def sample_data() -> dict[str, Any]: + return {"a": 1, "b": [1, 2, 3], "c": {"d": "text"}} + + +def test_function_identity() -> None: + """datason.json should expose the exact stdlib functions and errors.""" + assert ds_json.dumps is std_json.dumps + assert ds_json.loads is std_json.loads + assert ds_json.JSONDecodeError is std_json.JSONDecodeError + + +@pytest.mark.parametrize( + "kwargs,data", + [ + ({}, sample_data()), + ({"indent": 2}, {"a": 1}), + ({"separators": (",", ":")}, {"a": 1, "b": 2}), + ({"sort_keys": True}, {"b": 2, "a": 1}), + ({"ensure_ascii": False}, {"snowman": "☃"}), + ], +) +def test_dumps_parity(kwargs: dict[str, Any], data: Any) -> None: + """dumps should match stdlib json byte-for-byte across flags.""" + assert ds_json.dumps(data, **kwargs) == std_json.dumps(data, **kwargs) + + +def test_allow_nan_parity() -> None: + data = {"nan": float("nan")} + assert ds_json.dumps(data, allow_nan=True) == std_json.dumps(data, allow_nan=True) + with pytest.raises(ValueError) as ds_err: + ds_json.dumps(data, allow_nan=False) + with pytest.raises(ValueError) as std_err: + std_json.dumps(data, allow_nan=False) + assert str(ds_err.value) == str(std_err.value) + + +def test_dump_and_load_parity(tmp_path: Path) -> None: + data = sample_data() + ds_file = tmp_path / "ds.json" + std_file = tmp_path / "std.json" + + with ds_file.open("w") as f: + ds_json.dump(data, f, sort_keys=True, indent=2) + with std_file.open("w") as f: + std_json.dump(data, f, sort_keys=True, indent=2) + + assert ds_file.read_text() == std_file.read_text() + + with ds_file.open() as f: + ds_loaded = ds_json.load(f) + with std_file.open() as f: + std_loaded = std_json.load(f) + assert ds_loaded == std_loaded + + +def test_dumps_non_serializable_error() -> None: + class Foo: + pass + + obj = {"foo": Foo()} + with pytest.raises(TypeError) as ds_err: + ds_json.dumps(obj) + with pytest.raises(TypeError) as std_err: + std_json.dumps(obj) + assert str(ds_err.value) == str(std_err.value) + + +def test_loads_malformed_json_error() -> None: + bad = '{"a": }' + with pytest.raises(ds_json.JSONDecodeError) as ds_err: + ds_json.loads(bad) + with pytest.raises(std_json.JSONDecodeError) as std_err: + std_json.loads(bad) + assert str(ds_err.value) == str(std_err.value)