diff --git a/README.md b/README.md
index 6466b9e7..f4a6f835 100644
--- a/README.md
+++ b/README.md
@@ -146,30 +146,57 @@ DataJoint (<https://datajoint.com>).
 
 ### Prerequisites
 
-- [Docker](https://docs.docker.com/get-docker/) for MySQL and MinIO services
+- [Docker](https://docs.docker.com/get-docker/) (Docker daemon must be running)
 - Python 3.10+
 
-### Running Tests
-
-Tests are organized into `unit/` (no external services) and `integration/` (requires MySQL + MinIO):
+### Quick Start
 
 ```bash
-# Install dependencies
+# Clone and install
+git clone https://github.com/datajoint/datajoint-python.git
+cd datajoint-python
 pip install -e ".[test]"
 
-# Run unit tests only (fast, no Docker needed)
-pytest tests/unit/
+# Run all tests (containers start automatically via testcontainers)
+pytest tests/
 
-# Start MySQL and MinIO for integration tests
-docker compose up -d db minio
+# Install and run pre-commit hooks
+pip install pre-commit
+pre-commit install
+pre-commit run --all-files
+```
 
-# Run all tests
+### Running Tests
+
+Tests use [testcontainers](https://testcontainers.com/) to automatically manage MySQL and MinIO containers.
+**No manual `docker-compose up` required** - containers start when tests run and stop afterward.
+
+```bash
+# Run all tests (recommended)
 pytest tests/
 
+# Run with coverage report
+pytest --cov-report term-missing --cov=datajoint tests/
+
 # Run specific test file
 pytest tests/integration/test_blob.py -v
 
-# Stop services when done
+# Run only unit tests (no containers needed)
+pytest tests/unit/
+```
+
+### Alternative: External Containers
+
+For development/debugging, you may prefer persistent containers that survive test runs:
+
+```bash
+# Start containers manually
+docker compose up -d db minio
+
+# Run tests using external containers
+DJ_USE_EXTERNAL_CONTAINERS=1 pytest tests/
+
+# Stop containers when done
 docker compose down
 ```
 
@@ -183,24 +210,46 @@ docker compose --profile test up djtest --build
 
 ### Alternative: Using pixi
 
-[pixi](https://pixi.sh) users can run tests with automatic service management:
+[pixi](https://pixi.sh) users can run tests with:
 
 ```bash
 pixi install        # First time setup
-pixi run test       # Starts services and runs tests
-pixi run services-down  # Stop services
+pixi run test       # Runs tests (testcontainers manages containers)
 ```
 
 ### Pre-commit Hooks
 
+Pre-commit hooks run automatically on `git commit` to check code quality.
+**All hooks must pass before committing.**
+
 ```bash
-pre-commit install          # Install hooks (first time)
-pre-commit run --all-files  # Run all checks
+# Install hooks (first time only)
+pip install pre-commit
+pre-commit install
+
+# Run all checks manually
+pre-commit run --all-files
+
+# Run specific hook
+pre-commit run ruff --all-files
+pre-commit run codespell --all-files
 ```
 
+Hooks include:
+- **ruff**: Python linting and formatting
+- **codespell**: Spell checking
+- **YAML/JSON/TOML validation**
+- **Large file detection**
+
+### Before Submitting a PR
+
+1. **Run all tests**: `pytest tests/`
+2. **Run pre-commit**: `pre-commit run --all-files`
+3. **Check coverage**: `pytest --cov-report term-missing --cov=datajoint tests/`
+
 ### Environment Variables
 
-Tests use these defaults (configured in `pyproject.toml`):
+For external container mode (`DJ_USE_EXTERNAL_CONTAINERS=1`):
 
 | Variable | Default | Description |
 |----------|---------|-------------|
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 98a16f16..2c48ffd1 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,10 +1,15 @@
 # Development environment with MySQL and MinIO services
 #
-# Quick start:
-#   docker compose up -d db minio   # Start services
-#   pytest tests/                    # Run tests (uses localhost defaults)
+# NOTE: docker-compose is OPTIONAL for running tests.
+# Tests use testcontainers to automatically manage containers.
+# Just run: pytest tests/
 #
-# Full Docker testing:
+# Use docker-compose for development/debugging when you want
+# persistent containers that survive test runs:
+#   docker compose up -d db minio   # Start services manually
+#   pytest tests/                    # Tests will use these containers
+#
+# Full Docker testing (CI):
 #   docker compose --profile test up djtest --build
 services:
   db:
diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml
index 4de4f58e..03c10f69 100644
--- a/docs/mkdocs.yaml
+++ b/docs/mkdocs.yaml
@@ -33,7 +33,7 @@ nav:
       - Blobs: design/tables/blobs.md
       - Attachments: design/tables/attach.md
       - Filepaths: design/tables/filepath.md
-      - Custom Datatypes: design/tables/customtype.md
+      - Custom Codecs: design/tables/codecs.md
       - Dependencies: design/tables/dependencies.md
       - Indexes: design/tables/indexes.md
       - Master-Part Relationships: design/tables/master-part.md
diff --git a/docs/src/design/tables/attributes.md b/docs/src/design/tables/attributes.md
index 2e8105e7..39a80ff6 100644
--- a/docs/src/design/tables/attributes.md
+++ b/docs/src/design/tables/attributes.md
@@ -5,109 +5,150 @@ To conserve database resources, use the smallest and most restrictive datatype
 sufficient for your data.
 This also ensures that only valid data are entered into the pipeline.
 
-## Most common datatypes
-
--  `tinyint`: an 8-bit integer number, ranging from -128 to 127.
--  `tinyint unsigned`: an 8-bit positive integer number, ranging from 0 to 255.
--  `smallint`: a 16-bit integer number, ranging from -32,768 to 32,767.
--  `smallint unsigned`: a 16-bit positive integer, ranging from 0 to 65,535.
--  `int`: a 32-bit integer number, ranging from -2,147,483,648 to 2,147,483,647.
--  `int unsigned`: a 32-bit positive integer, ranging from 0 to 4,294,967,295.
--  `enum`: one of several explicitly enumerated values specified as strings.
-   Use this datatype instead of text strings to avoid spelling variations and to save
-   storage space.
-   For example, the datatype for an anesthesia attribute could be
-   `enum("urethane", "isoflurane", "fentanyl")`.
-   Do not use enums in primary keys due to the difficulty of changing their definitions
-   consistently in multiple tables.
+## Core datatypes (recommended)
+
+Use these portable, scientist-friendly types for cross-database compatibility.
+
+### Integers
+
+-  `int8`: 8-bit signed integer (-128 to 127)
+-  `uint8`: 8-bit unsigned integer (0 to 255)
+-  `int16`: 16-bit signed integer (-32,768 to 32,767)
+-  `uint16`: 16-bit unsigned integer (0 to 65,535)
+-  `int32`: 32-bit signed integer
+-  `uint32`: 32-bit unsigned integer
+-  `int64`: 64-bit signed integer
+-  `uint64`: 64-bit unsigned integer
+-  `bool`: boolean value (True/False, stored as 0/1)
+
+### Floating-point
+
+-  `float32`: 32-bit single-precision floating-point. Sufficient for many measurements.
+-  `float64`: 64-bit double-precision floating-point.
+   Avoid using floating-point types in primary keys due to equality comparison issues.
+-  `decimal(n,f)`: fixed-point number with *n* total digits and *f* fractional digits.
+   Use for exact decimal representation (e.g., currency, coordinates).
+   Safe for primary keys due to well-defined precision.
+
+### Strings
+
+-  `char(n)`: fixed-length string of exactly *n* characters.
+-  `varchar(n)`: variable-length string up to *n* characters.
+-  `text`: unlimited-length text for long-form content (notes, descriptions, abstracts).
+-  `enum(...)`: one of several enumerated values, e.g., `enum("low", "medium", "high")`.
+   Do not use enums in primary keys due to difficulty changing definitions.
+
+**Encoding policy:** All strings use UTF-8 encoding (`utf8mb4` in MySQL, `UTF8` in PostgreSQL).
+Character encoding and collation are database-level configuration, not part of type definitions.
+Comparisons are case-sensitive by default.
+
+### Date/Time
 
 -  `date`: date as `'YYYY-MM-DD'`.
--  `time`: time as `'HH:MM:SS'`.
--  `datetime`: Date and time to the second as `'YYYY-MM-DD HH:MM:SS'`
--  `timestamp`: Date and time to the second as `'YYYY-MM-DD HH:MM:SS'`.
-   The default value may be set to `CURRENT_TIMESTAMP`.
-   Unlike `datetime`, a `timestamp` value will be adjusted to the local time zone.
-
--  `char(N)`: a character string up to *N* characters (but always takes the entire *N*
-bytes to store).
--  `varchar(N)`: a text string of arbitrary length up to *N* characters that takes
-*M+1* or *M+2* bytes of storage, where *M* is the actual length of each stored string.
--  `float`: a single-precision floating-point number.
-   Takes 4 bytes.
-   Single precision is sufficient for many measurements.
-
--  `double`: a double-precision floating-point number.
-   Takes 8 bytes.
-   Because equality comparisons are error-prone, neither `float` nor `double` should be
-   used in primary keys.
--  `decimal(N,F)`: a fixed-point number with *N* total decimal digits and *F*
-fractional digits.
-   This datatype is well suited to represent numbers whose magnitude is well defined
-   and does not warrant the use of floating-point representation or requires precise
-   decimal representations (e.g. dollars and cents).
-   Because of its well-defined precision, `decimal` values can be used in equality
-   comparison and be included in primary keys.
-
--  `longblob`: raw binary data, up to 4
-[GiB](http://en.wikipedia.org/wiki/Gibibyte) in size.
-   Stores and returns raw bytes without serialization.
-   For serialized Python objects (arrays, dicts, etc.), use `<djblob>` instead.
-   The `longblob` and other `blob` datatypes can be configured to store data
-   [externally](../../sysadmin/external-store.md) by using the `blob@store` syntax.
-
-## Less common (but supported) datatypes
-
--  `decimal(N,F) unsigned`: same as `decimal`, but limited to nonnegative values.
--  `mediumint` a 24-bit integer number, ranging from -8,388,608 to 8,388,607.
--  `mediumint unsigned`: a 24-bit positive integer, ranging from 0 to 16,777,216.
--  `mediumblob`: arbitrary numeric array, up to 16
-[MiB](http://en.wikipedia.org/wiki/Mibibyte)
--  `blob`: arbitrary numeric array, up to 64
-[KiB](http://en.wikipedia.org/wiki/Kibibyte)
--  `tinyblob`: arbitrary numeric array, up to 256 bytes (actually smaller due to header
-info).
-
-## Special DataJoint-only datatypes
-
-These types abstract certain kinds of non-database data to facilitate use
-together with DataJoint.
-
-- `<djblob>`: DataJoint's native serialization format for Python objects. Supports
-NumPy arrays, dicts, lists, datetime objects, and nested structures. Compatible with
-MATLAB. See [custom types](customtype.md) for details.
-
-- `object`: managed [file and folder storage](object.md) with support for direct writes
-(Zarr, HDF5) and fsspec integration. Recommended for new pipelines.
-
-- `attach`: a [file attachment](attach.md) similar to email attachments facillitating
-sending/receiving an opaque data file to/from a DataJoint pipeline.
-
-- `filepath@store`: a [filepath](filepath.md) used to link non-DataJoint managed files
-into a DataJoint pipeline.
-
-- `<custom_type>`: a [custom attribute type](customtype.md) that defines bidirectional
-conversion between Python objects and database storage formats. Use this to store
-complex data types like graphs, domain-specific objects, or custom data structures.
-
-## Numeric type aliases
-
-DataJoint provides convenient type aliases that map to standard MySQL numeric types.
+-  `datetime`: date and time as `'YYYY-MM-DD HH:MM:SS'`.
+   Use `CURRENT_TIMESTAMP` as default for auto-populated timestamps.
+
+**Timezone policy:** All `datetime` values should be stored as **UTC**. Timezone
+conversion is a presentation concern handled by the application layer. This ensures
+reproducible computations regardless of server location or timezone settings.
+
+### Binary
+
+-  `bytes`: raw binary data (up to 4 GiB). Stores and returns raw bytes without
+   serialization. For serialized Python objects (arrays, dicts, etc.), use `<blob>`.
+
+### Other
+
+-  `uuid`: 128-bit universally unique identifier.
+-  `json`: JSON document for structured data.
+
+## Native datatypes (advanced)
+
+Native database types are available for advanced use cases but are **not recommended**
+for portable pipelines. Using native types will generate a warning.
+
+-  `tinyint`, `smallint`, `int`, `bigint` (with optional `unsigned`)
+-  `float`, `double`, `real`
+-  `tinyblob`, `blob`, `mediumblob`, `longblob`
+-  `tinytext`, `mediumtext`, `longtext` (size variants)
+-  `time`, `timestamp`, `year`
+-  `mediumint`, `serial`, `int auto_increment`
+
+See the [storage types spec](storage-types-spec.md) for complete mappings.
+
+## Codec types (special datatypes)
+
+Codecs provide `encode()`/`decode()` semantics for complex data that doesn't
+fit native database types. They are denoted with angle brackets: `<name>`.
+
+### Storage mode: `@` convention
+
+The `@` character indicates **external storage** (object store vs database):
+
+- **No `@`**: Internal storage (database) - e.g., `<blob>`, `<attach>`
+- **`@` present**: External storage (object store) - e.g., `<blob@>`, `<attach@store>`
+- **`@` alone**: Use default store - e.g., `<blob@>`
+- **`@name`**: Use named store - e.g., `<blob@cold>`
+
+### Built-in codecs
+
+**Serialization types** - for Python objects:
+
+- `<blob>`: DataJoint's native serialization format for Python objects. Supports
+  NumPy arrays, dicts, lists, datetime objects, and nested structures. Stores in
+  database. Compatible with MATLAB. See [custom codecs](codecs.md) for details.
+
+- `<blob@>` / `<blob@store>`: Like `<blob>` but stores externally with hash-
+  addressed deduplication. Use for large arrays that may be duplicated across rows.
+
+**File storage types** - for managed files:
+
+- `<object@>` / `<object@store>`: Managed file and folder storage with path derived
+  from primary key. Supports Zarr, HDF5, and direct writes via fsspec. Returns
+  `ObjectRef` for lazy access. External only. See [object storage](object.md).
+
+- `<hash@>` / `<hash@store>`: Hash-addressed storage for raw bytes with
+  MD5 deduplication. External only. Use via `<blob@>` or `<attach@>` rather than directly.
+
+**File attachment types** - for file transfer:
+
+- `<attach>`: File attachment stored in database with filename preserved. Similar
+  to email attachments. Good for small files (<16MB). See [attachments](attach.md).
+
+- `<attach@>` / `<attach@store>`: Like `<attach>` but stores externally with
+  deduplication. Use for large files.
+
+**File reference types** - for external files:
+
+- `<filepath@store>`: Reference to existing file in a configured store. No file
+  copying occurs. Returns `ObjectRef` for lazy access. External only. See [filepath](filepath.md).
+
+### User-defined codecs
+
+- `<custom_type>`: Define your own [custom codec](codecs.md) with
+  bidirectional conversion between Python objects and database storage. Use for
+  graphs, domain-specific objects, or custom data structures.
+
+## Core type aliases
+
+DataJoint provides convenient type aliases that map to standard database types.
 These aliases use familiar naming conventions from NumPy and other numerical computing
-libraries, making table definitions more readable and explicit about data precision.
-
-| Alias | MySQL Type | Description |
-|-------|------------|-------------|
-| `bool` | `tinyint` | Boolean value (0 or 1) |
-| `int8` | `tinyint` | 8-bit signed integer (-128 to 127) |
-| `uint8` | `tinyint unsigned` | 8-bit unsigned integer (0 to 255) |
-| `int16` | `smallint` | 16-bit signed integer (-32,768 to 32,767) |
-| `uint16` | `smallint unsigned` | 16-bit unsigned integer (0 to 65,535) |
-| `int32` | `int` | 32-bit signed integer |
-| `uint32` | `int unsigned` | 32-bit unsigned integer |
-| `int64` | `bigint` | 64-bit signed integer |
-| `uint64` | `bigint unsigned` | 64-bit unsigned integer |
-| `float32` | `float` | 32-bit single-precision floating point |
-| `float64` | `double` | 64-bit double-precision floating point |
+libraries, making table definitions more readable and portable across database backends.
+
+| Alias | MySQL | PostgreSQL | Description |
+|-------|-------|------------|-------------|
+| `bool` | `TINYINT` | `BOOLEAN` | Boolean value (0 or 1) |
+| `int8` | `TINYINT` | `SMALLINT` | 8-bit signed integer (-128 to 127) |
+| `uint8` | `TINYINT UNSIGNED` | `SMALLINT` | 8-bit unsigned integer (0 to 255) |
+| `int16` | `SMALLINT` | `SMALLINT` | 16-bit signed integer |
+| `uint16` | `SMALLINT UNSIGNED` | `INTEGER` | 16-bit unsigned integer |
+| `int32` | `INT` | `INTEGER` | 32-bit signed integer |
+| `uint32` | `INT UNSIGNED` | `BIGINT` | 32-bit unsigned integer |
+| `int64` | `BIGINT` | `BIGINT` | 64-bit signed integer |
+| `uint64` | `BIGINT UNSIGNED` | `NUMERIC(20)` | 64-bit unsigned integer |
+| `float32` | `FLOAT` | `REAL` | 32-bit single-precision float |
+| `float64` | `DOUBLE` | `DOUBLE PRECISION` | 64-bit double-precision float |
+| `bytes` | `LONGBLOB` | `BYTEA` | Raw binary data |
 
 Example usage:
 
@@ -115,22 +156,24 @@ Example usage:
 @schema
 class Measurement(dj.Manual):
     definition = """
-    measurement_id : int
+    measurement_id : int32
     ---
     temperature : float32       # single-precision temperature reading
     precise_value : float64     # double-precision measurement
     sample_count : uint32       # unsigned 32-bit counter
     sensor_flags : uint8        # 8-bit status flags
     is_valid : bool             # boolean flag
+    raw_data : bytes            # raw binary data
+    processed : <blob>          # serialized Python object
+    large_array : <blob@>       # external storage with deduplication
     """
 ```
 
 ## Datatypes not (yet) supported
 
--  `binary`
--  `text`
--  `longtext`
--  `bit`
+-  `binary(n)` / `varbinary(n)` - use `bytes` instead
+-  `bit(n)` - use `int` types with bitwise operations
+-  `set(...)` - use `json` for multiple selections
 
 For additional information about these datatypes, see
 http://dev.mysql.com/doc/refman/5.6/en/data-types.html
diff --git a/docs/src/design/tables/codec-spec.md b/docs/src/design/tables/codec-spec.md
new file mode 100644
index 00000000..a3eefa57
--- /dev/null
+++ b/docs/src/design/tables/codec-spec.md
@@ -0,0 +1,766 @@
+# Codec Specification
+
+This document specifies the DataJoint Codec API for creating custom attribute types
+that extend DataJoint's native type system.
+
+## Overview
+
+Codecs define bidirectional conversion between Python objects and database storage.
+They enable storing complex data types (graphs, models, custom formats) while
+maintaining DataJoint's query capabilities.
+
+```
+┌─────────────────┐                    ┌─────────────────┐
+│  Python Object  │  ──── encode ────► │  Storage Type   │
+│   (e.g. Graph)  │                    │  (e.g. bytes)   │
+│                 │  ◄─── decode ────  │                 │
+└─────────────────┘                    └─────────────────┘
+```
+
+## Quick Start
+
+```python
+import datajoint as dj
+import networkx as nx
+
+class GraphCodec(dj.Codec):
+    """Store NetworkX graphs."""
+
+    name = "graph"  # Use as <graph> in definitions
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"  # Delegate to blob for serialization
+
+    def encode(self, graph, *, key=None, store_name=None):
+        return {
+            'nodes': list(graph.nodes(data=True)),
+            'edges': list(graph.edges(data=True)),
+        }
+
+    def decode(self, stored, *, key=None):
+        G = nx.Graph()
+        G.add_nodes_from(stored['nodes'])
+        G.add_edges_from(stored['edges'])
+        return G
+
+# Use in table definition
+@schema
+class Connectivity(dj.Manual):
+    definition = '''
+    conn_id : int
+    ---
+    network : <graph>
+    '''
+```
+
+## The Codec Base Class
+
+All custom codecs inherit from `dj.Codec`:
+
+```python
+class Codec(ABC):
+    """Base class for codec types."""
+
+    name: str | None = None  # Required: unique identifier
+
+    def get_dtype(self, is_external: bool) -> str:
+        """Return the storage dtype."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def encode(self, value, *, key=None, store_name=None) -> Any:
+        """Encode Python value for storage."""
+        ...
+
+    @abstractmethod
+    def decode(self, stored, *, key=None) -> Any:
+        """Decode stored value back to Python."""
+        ...
+
+    def validate(self, value) -> None:
+        """Optional: validate value before encoding."""
+        pass
+```
+
+## Required Components
+
+### 1. The `name` Attribute
+
+The `name` class attribute is a unique identifier used in table definitions with
+`<name>` syntax:
+
+```python
+class MyCodec(dj.Codec):
+    name = "mycodec"  # Use as <mycodec> in definitions
+```
+
+Naming conventions:
+- Use lowercase with underscores: `spike_train`, `graph_embedding`
+- Avoid generic names that might conflict: prefer `lab_model` over `model`
+- Names must be unique across all registered codecs
+
+### 2. The `get_dtype()` Method
+
+Returns the underlying storage type. The `is_external` parameter indicates whether
+the `@` modifier is present in the table definition:
+
+```python
+def get_dtype(self, is_external: bool) -> str:
+    """
+    Args:
+        is_external: True if @ modifier present (e.g., <mycodec@store>)
+
+    Returns:
+        - A core type: "bytes", "json", "varchar(N)", "int32", etc.
+        - Another codec: "<blob>", "<hash>", etc.
+
+    Raises:
+        DataJointError: If external storage not supported but @ is present
+    """
+```
+
+Examples:
+
+```python
+# Simple: always store as bytes
+def get_dtype(self, is_external: bool) -> str:
+    return "bytes"
+
+# Different behavior for internal/external
+def get_dtype(self, is_external: bool) -> str:
+    return "<hash>" if is_external else "bytes"
+
+# External-only codec
+def get_dtype(self, is_external: bool) -> str:
+    if not is_external:
+        raise DataJointError("<object> requires @ (external storage only)")
+    return "json"
+```
+
+### 3. The `encode()` Method
+
+Converts Python objects to the format expected by `get_dtype()`:
+
+```python
+def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> Any:
+    """
+    Args:
+        value: The Python object to store
+        key: Primary key values (for context-dependent encoding)
+        store_name: Target store name (for external storage)
+
+    Returns:
+        Value in the format expected by get_dtype()
+    """
+```
+
+### 4. The `decode()` Method
+
+Converts stored values back to Python objects:
+
+```python
+def decode(self, stored: Any, *, key: dict | None = None) -> Any:
+    """
+    Args:
+        stored: Data retrieved from storage
+        key: Primary key values (for context-dependent decoding)
+
+    Returns:
+        The reconstructed Python object
+    """
+```
+
+### 5. The `validate()` Method (Optional)
+
+Called automatically before `encode()` during INSERT operations:
+
+```python
+def validate(self, value: Any) -> None:
+    """
+    Args:
+        value: The value to validate
+
+    Raises:
+        TypeError: If the value has an incompatible type
+        ValueError: If the value fails domain validation
+    """
+    if not isinstance(value, ExpectedType):
+        raise TypeError(f"Expected ExpectedType, got {type(value).__name__}")
+```
+
+## Auto-Registration
+
+Codecs automatically register when their class is defined. No decorator needed:
+
+```python
+# This codec is registered automatically when the class is defined
+class MyCodec(dj.Codec):
+    name = "mycodec"
+    # ...
+```
+
+### Skipping Registration
+
+For abstract base classes that shouldn't be registered:
+
+```python
+class BaseCodec(dj.Codec, register=False):
+    """Abstract base - not registered."""
+    name = None  # Or omit entirely
+
+class ConcreteCodec(BaseCodec):
+    name = "concrete"  # This one IS registered
+    # ...
+```
+
+### Registration Timing
+
+Codecs are registered at class definition time. Ensure your codec classes are
+imported before any table definitions that use them:
+
+```python
+# myproject/codecs.py
+class GraphCodec(dj.Codec):
+    name = "graph"
+    ...
+
+# myproject/tables.py
+import myproject.codecs  # Ensure codecs are registered
+
+@schema
+class Networks(dj.Manual):
+    definition = '''
+    id : int
+    ---
+    network : <graph>
+    '''
+```
+
+## Codec Composition (Chaining)
+
+Codecs can delegate to other codecs by returning `<codec_name>` from `get_dtype()`.
+This enables layered functionality:
+
+```python
+class CompressedJsonCodec(dj.Codec):
+    """Compress JSON data with zlib."""
+
+    name = "zjson"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"  # Delegate serialization to blob codec
+
+    def encode(self, value, *, key=None, store_name=None):
+        import json, zlib
+        json_bytes = json.dumps(value).encode('utf-8')
+        return zlib.compress(json_bytes)
+
+    def decode(self, stored, *, key=None):
+        import json, zlib
+        json_bytes = zlib.decompress(stored)
+        return json.loads(json_bytes.decode('utf-8'))
+```
+
+### How Chaining Works
+
+When DataJoint encounters `<zjson>`:
+
+1. Calls `ZjsonCodec.get_dtype(is_external=False)` → returns `"<blob>"`
+2. Calls `BlobCodec.get_dtype(is_external=False)` → returns `"bytes"`
+3. Final storage type is `bytes` (LONGBLOB in MySQL)
+
+During INSERT:
+1. `ZjsonCodec.encode()` converts Python dict → compressed bytes
+2. `BlobCodec.encode()` packs bytes → DJ blob format
+3. Stored in database
+
+During FETCH:
+1. Read from database
+2. `BlobCodec.decode()` unpacks DJ blob → compressed bytes
+3. `ZjsonCodec.decode()` decompresses → Python dict
+
+### Built-in Codec Chains
+
+DataJoint's built-in codecs form these chains:
+
+```
+<blob>     → bytes (internal)
+<blob@>    → <hash@> → json (external)
+
+<attach>   → bytes (internal)
+<attach@>  → <hash@> → json (external)
+
+<hash@>    → json (external only)
+<object@>  → json (external only)
+<filepath@> → json (external only)
+```
+
+### Store Name Propagation
+
+When using external storage (`@`), the store name propagates through the chain:
+
+```python
+# Table definition
+data : <mycodec@coldstore>
+
+# Resolution:
+# 1. MyCodec.get_dtype(is_external=True) → "<blob>"
+# 2. BlobCodec.get_dtype(is_external=True) → "<hash>"
+# 3. HashCodec.get_dtype(is_external=True) → "json"
+# 4. store_name="coldstore" passed to HashCodec.encode()
+```
+
+## Plugin System (Entry Points)
+
+Codecs can be distributed as installable packages using Python entry points.
+
+### Package Structure
+
+```
+dj-graph-codecs/
+├── pyproject.toml
+└── src/
+    └── dj_graph_codecs/
+        ├── __init__.py
+        └── codecs.py
+```
+
+### pyproject.toml
+
+```toml
+[project]
+name = "dj-graph-codecs"
+version = "1.0.0"
+dependencies = ["datajoint>=2.0", "networkx"]
+
+[project.entry-points."datajoint.codecs"]
+graph = "dj_graph_codecs.codecs:GraphCodec"
+weighted_graph = "dj_graph_codecs.codecs:WeightedGraphCodec"
+```
+
+### Codec Implementation
+
+```python
+# src/dj_graph_codecs/codecs.py
+import datajoint as dj
+import networkx as nx
+
+class GraphCodec(dj.Codec):
+    name = "graph"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def encode(self, graph, *, key=None, store_name=None):
+        return {
+            'nodes': list(graph.nodes(data=True)),
+            'edges': list(graph.edges(data=True)),
+        }
+
+    def decode(self, stored, *, key=None):
+        G = nx.Graph()
+        G.add_nodes_from(stored['nodes'])
+        G.add_edges_from(stored['edges'])
+        return G
+
+class WeightedGraphCodec(dj.Codec):
+    name = "weighted_graph"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def encode(self, graph, *, key=None, store_name=None):
+        return {
+            'nodes': list(graph.nodes(data=True)),
+            'edges': [(u, v, d) for u, v, d in graph.edges(data=True)],
+        }
+
+    def decode(self, stored, *, key=None):
+        G = nx.Graph()
+        G.add_nodes_from(stored['nodes'])
+        for u, v, d in stored['edges']:
+            G.add_edge(u, v, **d)
+        return G
+```
+
+### Usage After Installation
+
+```bash
+pip install dj-graph-codecs
+```
+
+```python
+# Codecs are automatically discovered and available
+@schema
+class Networks(dj.Manual):
+    definition = '''
+    network_id : int
+    ---
+    topology : <graph>
+    weights : <weighted_graph>
+    '''
+```
+
+### Entry Point Discovery
+
+DataJoint loads entry points lazily when a codec is first requested:
+
+1. Check explicit registry (codecs defined in current process)
+2. Load entry points from `datajoint.codecs` group
+3. Also checks legacy `datajoint.types` group for compatibility
+
+## API Reference
+
+### Module Functions
+
+```python
+import datajoint as dj
+
+# List all registered codec names
+dj.list_codecs()  # Returns: ['blob', 'hash', 'object', 'attach', 'filepath', ...]
+
+# Get a codec instance by name
+codec = dj.get_codec("blob")
+codec = dj.get_codec("<blob>")  # Angle brackets are optional
+codec = dj.get_codec("<blob@store>")  # Store parameter is stripped
+```
+
+### Internal Functions (for advanced use)
+
+```python
+from datajoint.codecs import (
+    is_codec_registered,  # Check if codec exists
+    unregister_codec,     # Remove codec (testing only)
+    resolve_dtype,        # Resolve codec chain
+    parse_type_spec,      # Parse "<name@store>" syntax
+)
+```
+
+## Built-in Codecs
+
+DataJoint provides these built-in codecs:
+
+| Codec | Internal | External | Description |
+|-------|----------|----------|-------------|
+| `<blob>` | `bytes` | `<hash@>` | DataJoint serialization for Python objects |
+| `<hash@>` | N/A | `json` | Content-addressed storage with MD5 deduplication |
+| `<object@>` | N/A | `json` | Path-addressed storage for files/folders |
+| `<attach>` | `bytes` | `<hash@>` | File attachments with filename preserved |
+| `<filepath@>` | N/A | `json` | Reference to existing files in store |
+
+## Complete Examples
+
+### Example 1: Simple Serialization
+
+```python
+import datajoint as dj
+import numpy as np
+
+class SpikeTrainCodec(dj.Codec):
+    """Efficient storage for sparse spike timing data."""
+
+    name = "spike_train"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def validate(self, value):
+        if not isinstance(value, np.ndarray):
+            raise TypeError("Expected numpy array of spike times")
+        if value.ndim != 1:
+            raise ValueError("Spike train must be 1-dimensional")
+        if len(value) > 1 and not np.all(np.diff(value) >= 0):
+            raise ValueError("Spike times must be sorted")
+
+    def encode(self, spike_times, *, key=None, store_name=None):
+        # Store as differences (smaller values, better compression)
+        return np.diff(spike_times, prepend=0).astype(np.float32)
+
+    def decode(self, stored, *, key=None):
+        # Reconstruct original spike times
+        return np.cumsum(stored).astype(np.float64)
+```
+
+### Example 2: External Storage
+
+```python
+import datajoint as dj
+import pickle
+
+class ModelCodec(dj.Codec):
+    """Store ML models with optional external storage."""
+
+    name = "model"
+
+    def get_dtype(self, is_external: bool) -> str:
+        # Use hash-addressed storage for large models
+        return "<hash>" if is_external else "<blob>"
+
+    def encode(self, model, *, key=None, store_name=None):
+        return pickle.dumps(model, protocol=pickle.HIGHEST_PROTOCOL)
+
+    def decode(self, stored, *, key=None):
+        return pickle.loads(stored)
+
+    def validate(self, value):
+        # Check that model has required interface
+        if not hasattr(value, 'predict'):
+            raise TypeError("Model must have a predict() method")
+```
+
+Usage:
+```python
+@schema
+class Models(dj.Manual):
+    definition = '''
+    model_id : int
+    ---
+    small_model : <model>         # Internal storage
+    large_model : <model@>        # External (default store)
+    archive_model : <model@cold>  # External (specific store)
+    '''
+```
+
+### Example 3: JSON with Schema Validation
+
+```python
+import datajoint as dj
+import jsonschema
+
+class ConfigCodec(dj.Codec):
+    """Store validated JSON configuration."""
+
+    name = "config"
+
+    SCHEMA = {
+        "type": "object",
+        "properties": {
+            "version": {"type": "integer", "minimum": 1},
+            "settings": {"type": "object"},
+        },
+        "required": ["version", "settings"],
+    }
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "json"
+
+    def validate(self, value):
+        jsonschema.validate(value, self.SCHEMA)
+
+    def encode(self, config, *, key=None, store_name=None):
+        return config  # JSON type handles serialization
+
+    def decode(self, stored, *, key=None):
+        return stored
+```
+
+### Example 4: Context-Dependent Encoding
+
+```python
+import datajoint as dj
+
+class VersionedDataCodec(dj.Codec):
+    """Handle different encoding versions based on primary key."""
+
+    name = "versioned"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def encode(self, value, *, key=None, store_name=None):
+        version = key.get("schema_version", 1) if key else 1
+        if version >= 2:
+            return {"v": 2, "data": self._encode_v2(value)}
+        return {"v": 1, "data": self._encode_v1(value)}
+
+    def decode(self, stored, *, key=None):
+        version = stored.get("v", 1)
+        if version >= 2:
+            return self._decode_v2(stored["data"])
+        return self._decode_v1(stored["data"])
+
+    def _encode_v1(self, value):
+        return value
+
+    def _decode_v1(self, data):
+        return data
+
+    def _encode_v2(self, value):
+        # New encoding format
+        return {"optimized": True, "payload": value}
+
+    def _decode_v2(self, data):
+        return data["payload"]
+```
+
+### Example 5: External-Only Codec
+
+```python
+import datajoint as dj
+from pathlib import Path
+
+class ZarrCodec(dj.Codec):
+    """Store Zarr arrays in object storage."""
+
+    name = "zarr"
+
+    def get_dtype(self, is_external: bool) -> str:
+        if not is_external:
+            raise dj.DataJointError("<zarr> requires @ (external storage only)")
+        return "<object>"  # Delegate to object storage
+
+    def encode(self, value, *, key=None, store_name=None):
+        import zarr
+        import tempfile
+
+        # If already a path, pass through
+        if isinstance(value, (str, Path)):
+            return str(value)
+
+        # If zarr array, save to temp and return path
+        if isinstance(value, zarr.Array):
+            tmpdir = tempfile.mkdtemp()
+            path = Path(tmpdir) / "data.zarr"
+            zarr.save(path, value)
+            return str(path)
+
+        raise TypeError(f"Expected zarr.Array or path, got {type(value)}")
+
+    def decode(self, stored, *, key=None):
+        # ObjectCodec returns ObjectRef, use its fsmap for zarr
+        import zarr
+        return zarr.open(stored.fsmap, mode='r')
+```
+
+## Best Practices
+
+### 1. Choose Appropriate Storage Types
+
+| Data Type | Recommended `get_dtype()` |
+|-----------|---------------------------|
+| Python objects (dicts, arrays) | `"<blob>"` |
+| Large binary data | `"<hash>"` (external) |
+| Files/folders (Zarr, HDF5) | `"<object>"` (external) |
+| Simple JSON-serializable | `"json"` |
+| Short strings | `"varchar(N)"` |
+| Numeric identifiers | `"int32"`, `"int64"` |
+
+### 2. Handle None Values
+
+Nullable columns may pass `None` to your codec:
+
+```python
+def encode(self, value, *, key=None, store_name=None):
+    if value is None:
+        return None  # Pass through for nullable columns
+    return self._actual_encode(value)
+
+def decode(self, stored, *, key=None):
+    if stored is None:
+        return None
+    return self._actual_decode(stored)
+```
+
+### 3. Test Round-Trips
+
+Always verify that `decode(encode(x)) == x`:
+
+```python
+def test_codec_roundtrip():
+    codec = MyCodec()
+
+    test_values = [
+        {"key": "value"},
+        [1, 2, 3],
+        np.array([1.0, 2.0]),
+    ]
+
+    for original in test_values:
+        encoded = codec.encode(original)
+        decoded = codec.decode(encoded)
+        assert decoded == original or np.array_equal(decoded, original)
+```
+
+### 4. Include Validation
+
+Catch errors early with `validate()`:
+
+```python
+def validate(self, value):
+    if not isinstance(value, ExpectedType):
+        raise TypeError(f"Expected ExpectedType, got {type(value).__name__}")
+
+    if not self._is_valid(value):
+        raise ValueError("Value fails validation constraints")
+```
+
+### 5. Document Expected Formats
+
+Include docstrings explaining input/output formats:
+
+```python
+class MyCodec(dj.Codec):
+    """
+    Store MyType objects.
+
+    Input format (encode):
+        MyType instance with attributes: x, y, z
+
+    Storage format:
+        Dict with keys: 'x', 'y', 'z'
+
+    Output format (decode):
+        MyType instance reconstructed from storage
+    """
+```
+
+### 6. Consider Versioning
+
+If your encoding format might change:
+
+```python
+def encode(self, value, *, key=None, store_name=None):
+    return {
+        "_version": 2,
+        "_data": self._encode_v2(value),
+    }
+
+def decode(self, stored, *, key=None):
+    version = stored.get("_version", 1)
+    data = stored.get("_data", stored)
+
+    if version == 1:
+        return self._decode_v1(data)
+    return self._decode_v2(data)
+```
+
+## Error Handling
+
+### Common Errors
+
+| Error | Cause | Solution |
+|-------|-------|----------|
+| `Unknown codec: <name>` | Codec not registered | Import module defining codec before table definition |
+| `Codec <name> already registered` | Duplicate name | Use unique names; check for conflicts |
+| `<codec> requires @` | External-only codec used without @ | Add `@` or `@store` to attribute type |
+| `Circular codec reference` | Codec chain forms a loop | Check `get_dtype()` return values |
+
+### Debugging
+
+```python
+# Check what codecs are registered
+print(dj.list_codecs())
+
+# Inspect a codec
+codec = dj.get_codec("mycodec")
+print(f"Name: {codec.name}")
+print(f"Internal dtype: {codec.get_dtype(is_external=False)}")
+print(f"External dtype: {codec.get_dtype(is_external=True)}")
+
+# Resolve full chain
+from datajoint.codecs import resolve_dtype
+final_type, chain, store = resolve_dtype("<mycodec@store>")
+print(f"Final storage type: {final_type}")
+print(f"Codec chain: {[c.name for c in chain]}")
+print(f"Store: {store}")
+```
diff --git a/docs/src/design/tables/codecs.md b/docs/src/design/tables/codecs.md
new file mode 100644
index 00000000..ccc9db1f
--- /dev/null
+++ b/docs/src/design/tables/codecs.md
@@ -0,0 +1,553 @@
+# Custom Codecs
+
+In modern scientific research, data pipelines often involve complex workflows that
+generate diverse data types. From high-dimensional imaging data to machine learning
+models, these data types frequently exceed the basic representations supported by
+traditional relational databases. For example:
+
++ A lab working on neural connectivity might use graph objects to represent brain
+  networks.
++ Researchers processing raw imaging data might store custom objects for pre-processing
+  configurations.
++ Computational biologists might store fitted machine learning models or parameter
+  objects for downstream predictions.
+
+To handle these diverse needs, DataJoint provides the **Codec** system. It
+enables researchers to store and retrieve complex, non-standard data types—like Python
+objects or data structures—in a relational database while maintaining the
+reproducibility, modularity, and query capabilities required for scientific workflows.
+
+## Overview
+
+Custom codecs define bidirectional conversion between:
+
+- **Python objects** (what your code works with)
+- **Storage format** (what gets stored in the database)
+
+```
+┌─────────────────┐     encode()      ┌─────────────────┐
+│  Python Object  │ ───────────────►  │  Storage Type   │
+│   (e.g. Graph)  │                   │  (e.g. bytes)   │
+└─────────────────┘     decode()      └─────────────────┘
+                    ◄───────────────
+```
+
+## Defining Custom Codecs
+
+Create a custom codec by subclassing `dj.Codec` and implementing the required
+methods. Codecs auto-register when their class is defined:
+
+```python
+import datajoint as dj
+import networkx as nx
+
+class GraphCodec(dj.Codec):
+    """Custom codec for storing networkx graphs."""
+
+    # Required: unique identifier used in table definitions
+    name = "graph"
+
+    def get_dtype(self, is_external: bool) -> str:
+        """Return the underlying storage type."""
+        return "<blob>"  # Delegate to blob for serialization
+
+    def encode(self, graph, *, key=None, store_name=None):
+        """Convert graph to storable format (called on INSERT)."""
+        return {
+            'nodes': list(graph.nodes(data=True)),
+            'edges': list(graph.edges(data=True)),
+        }
+
+    def decode(self, stored, *, key=None):
+        """Convert stored data back to graph (called on FETCH)."""
+        G = nx.Graph()
+        G.add_nodes_from(stored['nodes'])
+        G.add_edges_from(stored['edges'])
+        return G
+```
+
+### Required Components
+
+| Component | Description |
+|-----------|-------------|
+| `name` | Unique identifier used in table definitions with `<name>` syntax |
+| `get_dtype(is_external)` | Returns underlying storage type (e.g., `"<blob>"`, `"bytes"`, `"json"`) |
+| `encode(value, *, key=None, store_name=None)` | Converts Python object to storable format |
+| `decode(stored, *, key=None)` | Converts stored data back to Python object |
+
+### Using Custom Codecs in Tables
+
+Once defined, use the codec in table definitions with angle brackets:
+
+```python
+@schema
+class Connectivity(dj.Manual):
+    definition = """
+    conn_id : int
+    ---
+    conn_graph = null : <graph>  # Uses the GraphCodec we defined
+    """
+```
+
+Insert and fetch work seamlessly:
+
+```python
+import networkx as nx
+
+# Insert - encode() is called automatically
+g = nx.lollipop_graph(4, 2)
+Connectivity.insert1({"conn_id": 1, "conn_graph": g})
+
+# Fetch - decode() is called automatically
+result = (Connectivity & "conn_id = 1").fetch1("conn_graph")
+assert isinstance(result, nx.Graph)
+```
+
+## Auto-Registration
+
+Codecs automatically register when their class is defined. No decorator needed:
+
+```python
+# This codec is registered automatically when the class is defined
+class MyCodec(dj.Codec):
+    name = "mycodec"
+    ...
+```
+
+### Skipping Registration
+
+For abstract base classes that shouldn't be registered:
+
+```python
+class BaseCodec(dj.Codec, register=False):
+    """Abstract base - not registered."""
+    name = None
+
+class ConcreteCodec(BaseCodec):
+    name = "concrete"  # This one IS registered
+    ...
+```
+
+### Listing Registered Codecs
+
+```python
+# List all registered codec names
+print(dj.list_codecs())
+```
+
+## Validation
+
+Add data validation by overriding the `validate()` method. It's called automatically
+before `encode()` during INSERT operations:
+
+```python
+class PositiveArrayCodec(dj.Codec):
+    name = "positive_array"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def validate(self, value):
+        """Ensure all values are positive."""
+        import numpy as np
+        if not isinstance(value, np.ndarray):
+            raise TypeError(f"Expected numpy array, got {type(value).__name__}")
+        if np.any(value < 0):
+            raise ValueError("Array must contain only positive values")
+
+    def encode(self, array, *, key=None, store_name=None):
+        return array
+
+    def decode(self, stored, *, key=None):
+        return stored
+```
+
+## The `get_dtype()` Method
+
+The `get_dtype()` method specifies how data is stored. The `is_external` parameter
+indicates whether the `@` modifier is present:
+
+```python
+def get_dtype(self, is_external: bool) -> str:
+    """
+    Args:
+        is_external: True if @ modifier present (e.g., <mycodec@store>)
+
+    Returns:
+        - A core type: "bytes", "json", "varchar(N)", etc.
+        - Another codec: "<blob>", "<hash>", etc.
+    """
+```
+
+### Storage Type Options
+
+| Return Value | Use Case | Database Type |
+|--------------|----------|---------------|
+| `"bytes"` | Raw binary data | LONGBLOB |
+| `"json"` | JSON-serializable data | JSON |
+| `"varchar(N)"` | String representations | VARCHAR(N) |
+| `"int32"` | Integer identifiers | INT |
+| `"<blob>"` | Serialized Python objects | Depends on internal/external |
+| `"<hash>"` | Large objects with deduplication | JSON (external only) |
+| `"<other_codec>"` | Chain to another codec | Varies |
+
+### External Storage
+
+For large data, use external storage with the `@` modifier:
+
+```python
+class LargeArrayCodec(dj.Codec):
+    name = "large_array"
+
+    def get_dtype(self, is_external: bool) -> str:
+        # Use hash-addressed external storage for large data
+        return "<hash>" if is_external else "<blob>"
+
+    def encode(self, array, *, key=None, store_name=None):
+        import pickle
+        return pickle.dumps(array)
+
+    def decode(self, stored, *, key=None):
+        import pickle
+        return pickle.loads(stored)
+```
+
+Usage:
+```python
+@schema
+class Data(dj.Manual):
+    definition = '''
+    id : int
+    ---
+    small_array : <large_array>        # Internal (in database)
+    big_array : <large_array@>         # External (default store)
+    archive : <large_array@coldstore>  # External (specific store)
+    '''
+```
+
+## Codec Chaining
+
+Custom codecs can build on other codecs by returning `<codec_name>` from `get_dtype()`:
+
+```python
+class CompressedGraphCodec(dj.Codec):
+    name = "compressed_graph"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<graph>"  # Chain to the GraphCodec
+
+    def encode(self, graph, *, key=None, store_name=None):
+        # Compress before passing to GraphCodec
+        return self._compress(graph)
+
+    def decode(self, stored, *, key=None):
+        # GraphCodec's decode already ran, decompress result
+        return self._decompress(stored)
+```
+
+DataJoint automatically resolves the chain to find the final storage type.
+
+### How Chaining Works
+
+When DataJoint encounters `<compressed_graph>`:
+
+1. `CompressedGraphCodec.get_dtype()` returns `"<graph>"`
+2. `GraphCodec.get_dtype()` returns `"<blob>"`
+3. `BlobCodec.get_dtype()` returns `"bytes"`
+4. Final storage type is `bytes` (LONGBLOB in MySQL)
+
+During INSERT, encoders run outer → inner:
+1. `CompressedGraphCodec.encode()` → compressed graph
+2. `GraphCodec.encode()` → edge list dict
+3. `BlobCodec.encode()` → serialized bytes
+
+During FETCH, decoders run inner → outer (reverse order).
+
+## The Key Parameter
+
+The `key` parameter provides access to primary key values during encode/decode
+operations. This is useful when the conversion depends on record context:
+
+```python
+class ContextAwareCodec(dj.Codec):
+    name = "context_aware"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def encode(self, value, *, key=None, store_name=None):
+        if key and key.get("version") == 2:
+            return self._encode_v2(value)
+        return self._encode_v1(value)
+
+    def decode(self, stored, *, key=None):
+        if key and key.get("version") == 2:
+            return self._decode_v2(stored)
+        return self._decode_v1(stored)
+```
+
+## Publishing Codecs as Packages
+
+Custom codecs can be distributed as installable packages using Python entry points.
+This allows codecs to be automatically discovered when the package is installed.
+
+### Package Structure
+
+```
+dj-graph-codecs/
+├── pyproject.toml
+└── src/
+    └── dj_graph_codecs/
+        ├── __init__.py
+        └── codecs.py
+```
+
+### pyproject.toml
+
+```toml
+[project]
+name = "dj-graph-codecs"
+version = "1.0.0"
+dependencies = ["datajoint>=2.0", "networkx"]
+
+[project.entry-points."datajoint.codecs"]
+graph = "dj_graph_codecs.codecs:GraphCodec"
+weighted_graph = "dj_graph_codecs.codecs:WeightedGraphCodec"
+```
+
+### Codec Implementation
+
+```python
+# src/dj_graph_codecs/codecs.py
+import datajoint as dj
+import networkx as nx
+
+class GraphCodec(dj.Codec):
+    name = "graph"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def encode(self, graph, *, key=None, store_name=None):
+        return {
+            'nodes': list(graph.nodes(data=True)),
+            'edges': list(graph.edges(data=True)),
+        }
+
+    def decode(self, stored, *, key=None):
+        G = nx.Graph()
+        G.add_nodes_from(stored['nodes'])
+        G.add_edges_from(stored['edges'])
+        return G
+
+class WeightedGraphCodec(dj.Codec):
+    name = "weighted_graph"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def encode(self, graph, *, key=None, store_name=None):
+        return [(u, v, d) for u, v, d in graph.edges(data=True)]
+
+    def decode(self, edges, *, key=None):
+        g = nx.Graph()
+        for u, v, d in edges:
+            g.add_edge(u, v, **d)
+        return g
+```
+
+### Usage After Installation
+
+```bash
+pip install dj-graph-codecs
+```
+
+```python
+# Codecs are automatically available after package installation
+@schema
+class MyTable(dj.Manual):
+    definition = """
+    id : int
+    ---
+    network : <graph>
+    weighted_network : <weighted_graph>
+    """
+```
+
+## Complete Example
+
+Here's a complete example demonstrating custom codecs for a neuroscience workflow:
+
+```python
+import datajoint as dj
+import numpy as np
+
+# Define custom codecs
+class SpikeTrainCodec(dj.Codec):
+    """Efficient storage for sparse spike timing data."""
+    name = "spike_train"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def validate(self, value):
+        if not isinstance(value, np.ndarray):
+            raise TypeError("Expected numpy array of spike times")
+        if value.ndim != 1:
+            raise ValueError("Spike train must be 1-dimensional")
+        if len(value) > 1 and not np.all(np.diff(value) >= 0):
+            raise ValueError("Spike times must be sorted")
+
+    def encode(self, spike_times, *, key=None, store_name=None):
+        # Store as differences (smaller values, better compression)
+        return np.diff(spike_times, prepend=0).astype(np.float32)
+
+    def decode(self, stored, *, key=None):
+        # Reconstruct original spike times
+        return np.cumsum(stored).astype(np.float64)
+
+
+class WaveformCodec(dj.Codec):
+    """Storage for spike waveform templates with metadata."""
+    name = "waveform"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def encode(self, waveform_dict, *, key=None, store_name=None):
+        return {
+            "data": waveform_dict["data"].astype(np.float32),
+            "sampling_rate": waveform_dict["sampling_rate"],
+            "channel_ids": list(waveform_dict["channel_ids"]),
+        }
+
+    def decode(self, stored, *, key=None):
+        return {
+            "data": stored["data"].astype(np.float64),
+            "sampling_rate": stored["sampling_rate"],
+            "channel_ids": np.array(stored["channel_ids"]),
+        }
+
+
+# Create schema and tables
+schema = dj.schema("ephys_analysis")
+
+@schema
+class Unit(dj.Manual):
+    definition = """
+    unit_id : int
+    ---
+    spike_times : <spike_train>
+    waveform : <waveform>
+    quality : enum('good', 'mua', 'noise')
+    """
+
+
+# Usage
+spike_times = np.array([0.1, 0.15, 0.23, 0.45, 0.67, 0.89])
+waveform = {
+    "data": np.random.randn(82, 4),
+    "sampling_rate": 30000,
+    "channel_ids": [10, 11, 12, 13],
+}
+
+Unit.insert1({
+    "unit_id": 1,
+    "spike_times": spike_times,
+    "waveform": waveform,
+    "quality": "good",
+})
+
+# Fetch - automatically decoded
+result = (Unit & "unit_id = 1").fetch1()
+print(f"Spike times: {result['spike_times']}")
+print(f"Waveform shape: {result['waveform']['data'].shape}")
+```
+
+## Built-in Codecs
+
+DataJoint includes several built-in codecs:
+
+### `<blob>` - DataJoint Blob Serialization
+
+The `<blob>` codec provides DataJoint's native binary serialization. It supports:
+
+- NumPy arrays (compatible with MATLAB)
+- Python dicts, lists, tuples, sets
+- datetime objects, Decimals, UUIDs
+- Nested data structures
+- Optional compression
+
+```python
+@schema
+class ProcessedData(dj.Manual):
+    definition = """
+    data_id : int
+    ---
+    results : <blob>        # Internal (serialized in database)
+    large_results : <blob@> # External (hash-addressed storage)
+    """
+```
+
+### `<hash@>` - Content-Addressed Storage
+
+Stores raw bytes using MD5 content hashing with automatic deduplication.
+External storage only.
+
+### `<object@>` - Path-Addressed Storage
+
+Stores files and folders at paths derived from primary keys. Ideal for
+Zarr arrays, HDF5 files, and multi-file outputs. External storage only.
+
+### `<attach>` - File Attachments
+
+Stores files with filename preserved. Supports internal and external storage.
+
+### `<filepath@>` - File References
+
+References existing files in configured stores without copying.
+External storage only.
+
+## Best Practices
+
+1. **Choose descriptive codec names**: Use lowercase with underscores (e.g., `spike_train`, `graph_embedding`)
+
+2. **Select appropriate storage types**: Use `<blob>` for complex objects, `json` for simple structures, `<hash@>` or `<object@>` for large data
+
+3. **Add validation**: Use `validate()` to catch data errors early
+
+4. **Document your codecs**: Include docstrings explaining the expected input/output formats
+
+5. **Handle None values**: Your encode/decode methods may receive `None` for nullable attributes
+
+6. **Consider versioning**: If your encoding format might change, include version information
+
+7. **Test round-trips**: Ensure `decode(encode(x)) == x` for all valid inputs
+
+```python
+def test_graph_codec_roundtrip():
+    import networkx as nx
+    g = nx.lollipop_graph(4, 2)
+    codec = GraphCodec()
+
+    encoded = codec.encode(g)
+    decoded = codec.decode(encoded)
+
+    assert set(g.edges) == set(decoded.edges)
+```
+
+## API Reference
+
+```python
+import datajoint as dj
+
+# List all registered codecs
+dj.list_codecs()
+
+# Get a codec instance
+codec = dj.get_codec("blob")
+codec = dj.get_codec("<blob>")  # Angle brackets optional
+codec = dj.get_codec("<blob@store>")  # Store parameter stripped
+```
+
+For the complete Codec API specification, see [Codec Specification](codec-spec.md).
diff --git a/docs/src/design/tables/customtype.md b/docs/src/design/tables/customtype.md
deleted file mode 100644
index 267e0420..00000000
--- a/docs/src/design/tables/customtype.md
+++ /dev/null
@@ -1,614 +0,0 @@
-# Custom Attribute Types
-
-In modern scientific research, data pipelines often involve complex workflows that
-generate diverse data types. From high-dimensional imaging data to machine learning
-models, these data types frequently exceed the basic representations supported by
-traditional relational databases. For example:
-
-+ A lab working on neural connectivity might use graph objects to represent brain
-  networks.
-+ Researchers processing raw imaging data might store custom objects for pre-processing
-  configurations.
-+ Computational biologists might store fitted machine learning models or parameter
-  objects for downstream predictions.
-
-To handle these diverse needs, DataJoint provides the **AttributeType** system. It
-enables researchers to store and retrieve complex, non-standard data types—like Python
-objects or data structures—in a relational database while maintaining the
-reproducibility, modularity, and query capabilities required for scientific workflows.
-
-## Overview
-
-Custom attribute types define bidirectional conversion between:
-
-- **Python objects** (what your code works with)
-- **Storage format** (what gets stored in the database)
-
-```
-┌─────────────────┐     encode()      ┌─────────────────┐
-│  Python Object  │ ───────────────►  │  Storage Type   │
-│   (e.g. Graph)  │                   │  (e.g. blob)    │
-└─────────────────┘     decode()      └─────────────────┘
-                    ◄───────────────
-```
-
-## Defining Custom Types
-
-Create a custom type by subclassing `dj.AttributeType` and implementing the required
-methods:
-
-```python
-import datajoint as dj
-import networkx as nx
-
-@dj.register_type
-class GraphType(dj.AttributeType):
-    """Custom type for storing networkx graphs."""
-
-    # Required: unique identifier used in table definitions
-    type_name = "graph"
-
-    # Required: underlying DataJoint storage type
-    dtype = "longblob"
-
-    def encode(self, graph, *, key=None):
-        """Convert graph to storable format (called on INSERT)."""
-        return list(graph.edges)
-
-    def decode(self, edges, *, key=None):
-        """Convert stored data back to graph (called on FETCH)."""
-        return nx.Graph(edges)
-```
-
-### Required Components
-
-| Component | Description |
-|-----------|-------------|
-| `type_name` | Unique identifier used in table definitions with `<type_name>` syntax |
-| `dtype` | Underlying DataJoint type for storage (e.g., `"longblob"`, `"varchar(255)"`, `"json"`) |
-| `encode(value, *, key=None)` | Converts Python object to storable format |
-| `decode(stored, *, key=None)` | Converts stored data back to Python object |
-
-### Using Custom Types in Tables
-
-Once registered, use the type in table definitions with angle brackets:
-
-```python
-@schema
-class Connectivity(dj.Manual):
-    definition = """
-    conn_id : int
-    ---
-    conn_graph = null : <graph>  # Uses the GraphType we defined
-    """
-```
-
-Insert and fetch work seamlessly:
-
-```python
-import networkx as nx
-
-# Insert - encode() is called automatically
-g = nx.lollipop_graph(4, 2)
-Connectivity.insert1({"conn_id": 1, "conn_graph": g})
-
-# Fetch - decode() is called automatically
-result = (Connectivity & "conn_id = 1").fetch1("conn_graph")
-assert isinstance(result, nx.Graph)
-```
-
-## Type Registration
-
-### Decorator Registration
-
-The simplest way to register a type is with the `@dj.register_type` decorator:
-
-```python
-@dj.register_type
-class MyType(dj.AttributeType):
-    type_name = "my_type"
-    ...
-```
-
-### Direct Registration
-
-You can also register types explicitly:
-
-```python
-class MyType(dj.AttributeType):
-    type_name = "my_type"
-    ...
-
-dj.register_type(MyType)
-```
-
-### Listing Registered Types
-
-```python
-# List all registered type names
-print(dj.list_types())
-```
-
-## Validation
-
-Add data validation by overriding the `validate()` method. It's called automatically
-before `encode()` during INSERT operations:
-
-```python
-@dj.register_type
-class PositiveArrayType(dj.AttributeType):
-    type_name = "positive_array"
-    dtype = "longblob"
-
-    def validate(self, value):
-        """Ensure all values are positive."""
-        import numpy as np
-        if not isinstance(value, np.ndarray):
-            raise TypeError(f"Expected numpy array, got {type(value).__name__}")
-        if np.any(value < 0):
-            raise ValueError("Array must contain only positive values")
-
-    def encode(self, array, *, key=None):
-        return array
-
-    def decode(self, stored, *, key=None):
-        return stored
-```
-
-## Storage Types (dtype)
-
-The `dtype` property specifies how data is stored in the database:
-
-| dtype | Use Case | Stored Format |
-|-------|----------|---------------|
-| `"longblob"` | Complex Python objects, arrays | Serialized binary |
-| `"blob"` | Smaller objects | Serialized binary |
-| `"json"` | JSON-serializable data | JSON string |
-| `"varchar(N)"` | String representations | Text |
-| `"int"` | Integer identifiers | Integer |
-| `"blob@store"` | Large objects in external storage | UUID reference |
-| `"object"` | Files/folders in object storage | JSON metadata |
-| `"<other_type>"` | Chain to another custom type | Varies |
-
-### External Storage
-
-For large data, use external blob storage:
-
-```python
-@dj.register_type
-class LargeArrayType(dj.AttributeType):
-    type_name = "large_array"
-    dtype = "blob@mystore"  # Uses external store named "mystore"
-
-    def encode(self, array, *, key=None):
-        return array
-
-    def decode(self, stored, *, key=None):
-        return stored
-```
-
-## Type Chaining
-
-Custom types can build on other custom types by referencing them in `dtype`:
-
-```python
-@dj.register_type
-class CompressedGraphType(dj.AttributeType):
-    type_name = "compressed_graph"
-    dtype = "<graph>"  # Chain to the GraphType
-
-    def encode(self, graph, *, key=None):
-        # Compress before passing to GraphType
-        return self._compress(graph)
-
-    def decode(self, stored, *, key=None):
-        # GraphType's decode already ran
-        return self._decompress(stored)
-```
-
-DataJoint automatically resolves the chain to find the final storage type.
-
-## The Key Parameter
-
-The `key` parameter provides access to primary key values during encode/decode
-operations. This is useful when the conversion depends on record context:
-
-```python
-@dj.register_type
-class ContextAwareType(dj.AttributeType):
-    type_name = "context_aware"
-    dtype = "longblob"
-
-    def encode(self, value, *, key=None):
-        if key and key.get("version") == 2:
-            return self._encode_v2(value)
-        return self._encode_v1(value)
-
-    def decode(self, stored, *, key=None):
-        if key and key.get("version") == 2:
-            return self._decode_v2(stored)
-        return self._decode_v1(stored)
-```
-
-## Publishing Custom Types as Packages
-
-Custom types can be distributed as installable packages using Python entry points.
-This allows types to be automatically discovered when the package is installed.
-
-### Package Structure
-
-```
-dj-graph-types/
-├── pyproject.toml
-└── src/
-    └── dj_graph_types/
-        ├── __init__.py
-        └── types.py
-```
-
-### pyproject.toml
-
-```toml
-[project]
-name = "dj-graph-types"
-version = "1.0.0"
-
-[project.entry-points."datajoint.types"]
-graph = "dj_graph_types.types:GraphType"
-weighted_graph = "dj_graph_types.types:WeightedGraphType"
-```
-
-### Type Implementation
-
-```python
-# src/dj_graph_types/types.py
-import datajoint as dj
-import networkx as nx
-
-class GraphType(dj.AttributeType):
-    type_name = "graph"
-    dtype = "longblob"
-
-    def encode(self, graph, *, key=None):
-        return list(graph.edges)
-
-    def decode(self, edges, *, key=None):
-        return nx.Graph(edges)
-
-class WeightedGraphType(dj.AttributeType):
-    type_name = "weighted_graph"
-    dtype = "longblob"
-
-    def encode(self, graph, *, key=None):
-        return [(u, v, d) for u, v, d in graph.edges(data=True)]
-
-    def decode(self, edges, *, key=None):
-        g = nx.Graph()
-        g.add_weighted_edges_from(edges)
-        return g
-```
-
-### Usage After Installation
-
-```bash
-pip install dj-graph-types
-```
-
-```python
-# Types are automatically available after package installation
-@schema
-class MyTable(dj.Manual):
-    definition = """
-    id : int
-    ---
-    network : <graph>
-    weighted_network : <weighted_graph>
-    """
-```
-
-## Complete Example
-
-Here's a complete example demonstrating custom types for a neuroscience workflow:
-
-```python
-import datajoint as dj
-import numpy as np
-
-# Configure DataJoint
-dj.config["database.host"] = "localhost"
-dj.config["database.user"] = "root"
-dj.config["database.password"] = "password"
-
-# Define custom types
-@dj.register_type
-class SpikeTrainType(dj.AttributeType):
-    """Efficient storage for sparse spike timing data."""
-    type_name = "spike_train"
-    dtype = "longblob"
-
-    def validate(self, value):
-        if not isinstance(value, np.ndarray):
-            raise TypeError("Expected numpy array of spike times")
-        if value.ndim != 1:
-            raise ValueError("Spike train must be 1-dimensional")
-        if not np.all(np.diff(value) >= 0):
-            raise ValueError("Spike times must be sorted")
-
-    def encode(self, spike_times, *, key=None):
-        # Store as differences (smaller values, better compression)
-        return np.diff(spike_times, prepend=0).astype(np.float32)
-
-    def decode(self, stored, *, key=None):
-        # Reconstruct original spike times
-        return np.cumsum(stored).astype(np.float64)
-
-
-@dj.register_type
-class WaveformType(dj.AttributeType):
-    """Storage for spike waveform templates with metadata."""
-    type_name = "waveform"
-    dtype = "longblob"
-
-    def encode(self, waveform_dict, *, key=None):
-        return {
-            "data": waveform_dict["data"].astype(np.float32),
-            "sampling_rate": waveform_dict["sampling_rate"],
-            "channel_ids": list(waveform_dict["channel_ids"]),
-        }
-
-    def decode(self, stored, *, key=None):
-        return {
-            "data": stored["data"].astype(np.float64),
-            "sampling_rate": stored["sampling_rate"],
-            "channel_ids": np.array(stored["channel_ids"]),
-        }
-
-
-# Create schema and tables
-schema = dj.schema("ephys_analysis")
-
-@schema
-class Unit(dj.Manual):
-    definition = """
-    unit_id : int
-    ---
-    spike_times : <spike_train>
-    waveform : <waveform>
-    quality : enum('good', 'mua', 'noise')
-    """
-
-
-# Usage
-spike_times = np.array([0.1, 0.15, 0.23, 0.45, 0.67, 0.89])
-waveform = {
-    "data": np.random.randn(82, 4),
-    "sampling_rate": 30000,
-    "channel_ids": [10, 11, 12, 13],
-}
-
-Unit.insert1({
-    "unit_id": 1,
-    "spike_times": spike_times,
-    "waveform": waveform,
-    "quality": "good",
-})
-
-# Fetch - automatically decoded
-result = (Unit & "unit_id = 1").fetch1()
-print(f"Spike times: {result['spike_times']}")
-print(f"Waveform shape: {result['waveform']['data'].shape}")
-```
-
-## Migration from AttributeAdapter
-
-The `AttributeAdapter` class is deprecated. Migrate to `AttributeType`:
-
-### Before (deprecated)
-
-```python
-class GraphAdapter(dj.AttributeAdapter):
-    attribute_type = "longblob"
-
-    def put(self, obj):
-        return list(obj.edges)
-
-    def get(self, value):
-        return nx.Graph(value)
-
-# Required context-based registration
-graph = GraphAdapter()
-schema = dj.schema("mydb", context={"graph": graph})
-```
-
-### After (recommended)
-
-```python
-@dj.register_type
-class GraphType(dj.AttributeType):
-    type_name = "graph"
-    dtype = "longblob"
-
-    def encode(self, obj, *, key=None):
-        return list(obj.edges)
-
-    def decode(self, value, *, key=None):
-        return nx.Graph(value)
-
-# Global registration - no context needed
-schema = dj.schema("mydb")
-```
-
-### Key Differences
-
-| Aspect | AttributeAdapter (deprecated) | AttributeType (recommended) |
-|--------|-------------------------------|----------------------------|
-| Methods | `put()` / `get()` | `encode()` / `decode()` |
-| Storage type | `attribute_type` | `dtype` |
-| Type name | Variable name in context | `type_name` property |
-| Registration | Context dict per schema | Global `@register_type` decorator |
-| Validation | Manual | Built-in `validate()` method |
-| Distribution | Copy adapter code | Entry point packages |
-| Key access | Not available | Optional `key` parameter |
-
-## Best Practices
-
-1. **Choose descriptive type names**: Use lowercase with underscores (e.g., `spike_train`, `graph_embedding`)
-
-2. **Select appropriate storage types**: Use `<djblob>` for complex objects, `json` for simple structures, external storage for large data
-
-3. **Add validation**: Use `validate()` to catch data errors early
-
-4. **Document your types**: Include docstrings explaining the expected input/output formats
-
-5. **Handle None values**: Your encode/decode methods may receive `None` for nullable attributes
-
-6. **Consider versioning**: If your encoding format might change, include version information
-
-7. **Test round-trips**: Ensure `decode(encode(x)) == x` for all valid inputs
-
-```python
-def test_graph_type_roundtrip():
-    g = nx.lollipop_graph(4, 2)
-    t = GraphType()
-
-    encoded = t.encode(g)
-    decoded = t.decode(encoded)
-
-    assert set(g.edges) == set(decoded.edges)
-```
-
-## Built-in Types
-
-DataJoint includes a built-in type for explicit blob serialization:
-
-### `<djblob>` - DataJoint Blob Serialization
-
-The `<djblob>` type provides explicit control over DataJoint's native binary
-serialization. It supports:
-
-- NumPy arrays (compatible with MATLAB)
-- Python dicts, lists, tuples, sets
-- datetime objects, Decimals, UUIDs
-- Nested data structures
-- Optional compression
-
-```python
-@schema
-class ProcessedData(dj.Manual):
-    definition = """
-    data_id : int
-    ---
-    results : <djblob>      # Serialized Python objects
-    raw_bytes : longblob    # Raw bytes (no serialization)
-    """
-```
-
-#### When to Use `<djblob>`
-
-- **Serialized data**: When storing Python objects (dicts, arrays, etc.)
-- **New tables**: Prefer `<djblob>` for automatic serialization
-- **Migration**: Existing schemas with implicit serialization must migrate
-
-#### Raw Blob Behavior
-
-Plain `longblob` (and other blob variants) columns now store and return
-**raw bytes** without automatic serialization:
-
-```python
-@schema
-class RawData(dj.Manual):
-    definition = """
-    id : int
-    ---
-    raw_bytes : longblob    # Stores/returns raw bytes
-    serialized : <djblob>   # Stores Python objects with serialization
-    """
-
-# Raw bytes - no serialization
-RawData.insert1({"id": 1, "raw_bytes": b"raw binary data", "serialized": {"key": "value"}})
-
-row = (RawData & "id=1").fetch1()
-row["raw_bytes"]    # Returns: b"raw binary data"
-row["serialized"]   # Returns: {"key": "value"}
-```
-
-**Important**: Existing schemas that relied on implicit blob serialization
-must be migrated to `<djblob>` to preserve their behavior.
-
-## Schema Migration
-
-When upgrading existing schemas to use explicit type declarations, DataJoint
-provides migration utilities.
-
-### Analyzing Blob Columns
-
-```python
-import datajoint as dj
-
-schema = dj.schema("my_database")
-
-# Check migration status
-status = dj.migrate.check_migration_status(schema)
-print(f"Blob columns: {status['total_blob_columns']}")
-print(f"Already migrated: {status['migrated']}")
-print(f"Pending migration: {status['pending']}")
-```
-
-### Generating Migration SQL
-
-```python
-# Preview migration (dry run)
-result = dj.migrate.migrate_blob_columns(schema, dry_run=True)
-for sql in result['sql_statements']:
-    print(sql)
-```
-
-### Applying Migration
-
-```python
-# Apply migration
-result = dj.migrate.migrate_blob_columns(schema, dry_run=False)
-print(f"Migrated {result['migrated']} columns")
-```
-
-### Migration Details
-
-The migration updates MySQL column comments to include the type declaration.
-This is a **metadata-only** change - the actual blob data format is unchanged.
-
-All blob type variants are handled: `tinyblob`, `blob`, `mediumblob`, `longblob`.
-
-Before migration:
-- Column: `longblob` (or `blob`, `mediumblob`, etc.)
-- Comment: `user comment`
-- Behavior: Auto-serialization (implicit)
-
-After migration:
-- Column: `longblob` (unchanged)
-- Comment: `:<djblob>:user comment`
-- Behavior: Explicit serialization via `<djblob>`
-
-### Updating Table Definitions
-
-After database migration, update your Python table definitions for consistency:
-
-```python
-# Before
-class MyTable(dj.Manual):
-    definition = """
-    id : int
-    ---
-    data : longblob  # stored data
-    """
-
-# After
-class MyTable(dj.Manual):
-    definition = """
-    id : int
-    ---
-    data : <djblob>  # stored data
-    """
-```
-
-Both definitions work identically after migration, but using `<djblob>` makes
-the serialization explicit and documents the intended behavior.
diff --git a/docs/src/design/tables/object-type-spec.md b/docs/src/design/tables/object-type-spec.md
deleted file mode 100644
index 24fb2b4a..00000000
--- a/docs/src/design/tables/object-type-spec.md
+++ /dev/null
@@ -1,1473 +0,0 @@
-# Object Column Type Specification
-
-## Overview
-
-The `object` type introduces a new paradigm for managed file storage in DataJoint. Unlike existing `attach@store` and `filepath@store` types that reference named stores, the `object` type uses a **unified storage backend** that is tightly coupled with the schema and configured at the pipeline level.
-
-The `object` type supports both **files and folders**. Content is copied to storage at insert time, referenced via handle on fetch, and deleted when the record is deleted.
-
-### Immutability Contract
-
-Objects stored via the `object` type are **immutable after finalization**. Users agree to:
-- **Insert (copy)**: Copy existing content to storage
-- **Insert (staged)**: Reserve path, write directly, then finalize
-- **Fetch**: Read content via handle (no modification)
-- **Delete**: Remove content when record is deleted (only way to remove)
-
-Once an object is **finalized** (either via copy-insert or staged-insert completion), users must not directly modify it in the object store.
-
-#### Two Insert Modes
-
-| Mode | Use Case | Workflow |
-|------|----------|----------|
-| **Copy** | Small files, existing data | Local file → copy to storage → insert record |
-| **Staged** | Large objects, Zarr, TileDB | Reserve path → write directly to storage → finalize record |
-
-### Augmented Schema vs External References
-
-The `object` type implements **Augmented Schema (AUS)** — a paradigm where the object store becomes a true extension of the relational database:
-
-- **DataJoint fully controls** the object store lifecycle
-- **Only DataJoint writes** to the object store (users may have direct read access)
-- **Tight coupling** between database and object store
-- **Joint transaction management** on objects and database records
-- **Single backend per pipeline** — all managed objects live together
-
-This is fundamentally different from **external references**, where DataJoint merely points to user-managed data:
-
-| Aspect | `object` (Augmented Schema) | `filepath@store` (External Reference) |
-|--------|----------------------------|--------------------------------------|
-| **Ownership** | DataJoint owns the data | User owns the data |
-| **Writes** | Only via DataJoint | User writes directly |
-| **Deletion** | DataJoint deletes on record delete | User manages lifecycle |
-| **Multi-backend** | Single backend per pipeline | Multiple named stores |
-| **Use case** | Pipeline-generated data | Collaborator data, legacy assets |
-
-**When to use each:**
-
-- Use `object` for data that DataJoint should own and manage as part of the schema (e.g., processed results, derived datasets)
-- Use `filepath@store` for referencing externally-managed data across multiple backends (e.g., collaborator data on different cloud providers, legacy data that shouldn't be moved)
-
-## Storage Architecture
-
-### Default and Named Stores
-
-Each DataJoint pipeline has a **default storage backend** plus optional **named stores**, all configured in `datajoint.json`. DataJoint fully controls the path structure within each store.
-
-```python
-@schema
-class Recording(dj.Manual):
-    definition = """
-    subject_id : int
-    session_id : int
-    ---
-    raw_data : object              # uses default store
-    published : object@public      # uses 'public' named store
-    """
-```
-
-**All stores follow OAS principles:**
-- DataJoint owns the lifecycle (insert/delete/fetch as a unit)
-- Same deterministic path structure (`project/schema/Table/objects/...`)
-- Same access control alignment with database
-- Each store has its own `datajoint_store.json` metadata file
-
-**Why support multiple stores?**
-- Different access policies (private vs public buckets)
-- Different storage tiers (hot vs cold storage)
-- Organizational requirements (data sovereignty, compliance)
-
-**Why require explicit store configuration?**
-- All stores must be registered for OAS semantics
-- Credential management aligns with database access control (platform-managed)
-- Orphan cleanup operates per-store with full knowledge of configured stores
-
-### Access Control Patterns
-
-The deterministic path structure (`project/schema/Table/objects/pk=val/...`) enables **prefix-based access control policies** on each storage backend.
-
-**Supported access control levels:**
-
-| Level | Implementation | Example Policy Prefix |
-|-------|---------------|----------------------|
-| Project-level | IAM/bucket policy | `my-bucket/my_project/*` |
-| Schema-level | IAM/bucket policy | `my-bucket/my_project/lab_internal/*` |
-| Table-level | IAM/bucket policy | `my-bucket/my_project/schema/SensitiveTable/*` |
-| Row-level | Per-object ACL or signed URLs | Future enhancement |
-
-**Example: Private and public data in separate stores**
-
-```
-# Default store (private)
-s3://internal-bucket/my_project/
-└── lab_schema/
-    └── ProcessingResults/
-        └── objects/...
-
-# Named 'public' store
-s3://public-bucket/my_project/
-└── lab_schema/
-    └── PublishedDatasets/
-        └── objects/...
-```
-
-Alternatively, use prefix-based policies within a single bucket if preferred.
-
-**Row-level access control** (access to objects for specific primary key values) is not directly supported by object store policies. Future versions may address this via DataJoint-generated signed URLs that project database permissions onto object access.
-
-### Supported Backends
-
-DataJoint uses **[`fsspec`](https://filesystem-spec.readthedocs.io/en/latest/)** to ensure compatibility across multiple storage backends:
-
-- **Local storage** – POSIX-compliant file systems (e.g., NFS, SMB)
-- **Cloud-based object storage** – Amazon S3, Google Cloud Storage, Azure Blob, MinIO
-
-## Project Structure
-
-A DataJoint project creates a structured hierarchical storage pattern:
-
-```
-📁 project_name/
-├── datajoint_store.json         # store metadata (not client config)
-├── 📁 schema_name/
-│   ├── 📁 Table1/
-│   │   ├── data.parquet         # tabular data export (future)
-│   │   └── 📁 objects/          # object storage for this table
-│   │       ├── pk1=val1/pk2=val2/field1_token.dat
-│   │       └── pk1=val1/pk2=val2/field2_token.zarr
-│   ├── 📁 Table2/
-│   │   ├── data.parquet
-│   │   └── 📁 objects/
-│   │       └── ...
-```
-
-### Object Storage Keys
-
-When using cloud object storage:
-
-```
-s3://bucket/project_name/schema_name/Table1/objects/pk1=val1/field_token.dat
-s3://bucket/project_name/schema_name/Table1/objects/pk1=val1/field_token.zarr
-```
-
-## Configuration
-
-### Settings Structure
-
-Object storage is configured in `datajoint.json` using the existing settings system:
-
-```json
-{
-    "database.host": "localhost",
-    "database.user": "datajoint",
-
-    "object_storage.project_name": "my_project",
-    "object_storage.protocol": "s3",
-    "object_storage.endpoint": "s3.amazonaws.com",
-    "object_storage.bucket": "my-bucket",
-    "object_storage.location": "my_project",
-    "object_storage.partition_pattern": "{subject_id}/{session_id}"
-}
-```
-
-For local filesystem storage:
-
-```json
-{
-    "object_storage.project_name": "my_project",
-    "object_storage.protocol": "file",
-    "object_storage.location": "/data/my_project",
-    "object_storage.partition_pattern": "{subject_id}/{session_id}"
-}
-```
-
-### Named Stores
-
-Additional stores can be defined using the `object_storage.stores.<name>` prefix:
-
-```json
-{
-    "object_storage.project_name": "my_project",
-    "object_storage.protocol": "s3",
-    "object_storage.bucket": "internal-bucket",
-    "object_storage.location": "my_project",
-
-    "object_storage.stores.public.protocol": "s3",
-    "object_storage.stores.public.bucket": "public-bucket",
-    "object_storage.stores.public.location": "my_project"
-}
-```
-
-Named stores inherit `project_name` from the default configuration but can override all other settings. Use named stores with the `object@store_name` syntax:
-
-```python
-@schema
-class Dataset(dj.Manual):
-    definition = """
-    dataset_id : int
-    ---
-    internal_data : object           # default store (internal-bucket)
-    published_data : object@public   # public store (public-bucket)
-    """
-```
-
-Each named store:
-- Must be explicitly configured (no ad-hoc URLs)
-- Has its own `datajoint_store.json` metadata file
-- Follows the same OAS lifecycle semantics as the default store
-- Credentials are managed at the platform level, aligned with database access control
-
-### Settings Schema
-
-| Setting | Type | Required | Description |
-|---------|------|----------|-------------|
-| `object_storage.project_name` | string | Yes | Unique project identifier (must match store metadata) |
-| `object_storage.protocol` | string | Yes | Storage backend: `file`, `s3`, `gcs`, `azure` |
-| `object_storage.location` | string | Yes | Base path or bucket prefix |
-| `object_storage.bucket` | string | For cloud | Bucket name (S3, GCS, Azure) |
-| `object_storage.endpoint` | string | For S3 | S3 endpoint URL |
-| `object_storage.partition_pattern` | string | No | Path pattern with `{attribute}` placeholders |
-| `object_storage.token_length` | int | No | Random suffix length for filenames (default: 8, range: 4-16) |
-| `object_storage.access_key` | string | For cloud | Access key (can use secrets file) |
-| `object_storage.secret_key` | string | For cloud | Secret key (can use secrets file) |
-
-### Configuration Immutability
-
-**CRITICAL**: Once a project has been instantiated (i.e., `datajoint_store.json` has been created and the first object stored), the following settings MUST NOT be changed:
-
-- `object_storage.project_name`
-- `object_storage.protocol`
-- `object_storage.bucket`
-- `object_storage.location`
-- `object_storage.partition_pattern`
-
-Changing these settings after objects have been stored will result in **broken references**—existing paths stored in the database will no longer resolve to valid storage locations.
-
-DataJoint validates `project_name` against `datajoint_store.json` on connect, but administrators must ensure other settings remain consistent across all clients for the lifetime of the project.
-
-### Environment Variables
-
-Settings can be overridden via environment variables:
-
-```bash
-DJ_OBJECT_STORAGE_PROTOCOL=s3
-DJ_OBJECT_STORAGE_BUCKET=my-bucket
-DJ_OBJECT_STORAGE_LOCATION=my_project
-DJ_OBJECT_STORAGE_PARTITION_PATTERN="subject{subject_id}/session{session_id}"
-```
-
-### Secrets
-
-Credentials can be stored in the `.secrets/` directory:
-
-```
-.secrets/
-├── object_storage.access_key
-└── object_storage.secret_key
-```
-
-### Partition Pattern
-
-The partition pattern is configured **per pipeline** (one per settings file). Placeholders use `{attribute_name}` syntax and are replaced with primary key values.
-
-```json
-{
-    "object_storage.partition_pattern": "subject{subject_id}/session{session_id}"
-}
-```
-
-**Example with partitioning:**
-
-```
-s3://my-bucket/my_project/subject_id=123/session_id=45/schema_name/Recording/objects/raw_data_Ax7bQ2kM.dat
-```
-
-If no partition pattern is specified, files are organized directly under `{location}/{schema}/{Table}/objects/`.
-
-## Store Metadata (`datajoint_store.json`)
-
-Each object store contains a metadata file at its root that identifies the store and enables verification by DataJoint clients. This file is named `datajoint_store.json` to distinguish it from client configuration files (`datajoint.json`).
-
-### Location
-
-```
-{location}/datajoint_store.json
-```
-
-For cloud storage:
-```
-s3://bucket/my_project/datajoint_store.json
-```
-
-### Content
-
-```json
-{
-    "project_name": "my_project",
-    "created": "2025-01-15T10:30:00Z",
-    "format_version": "1.0",
-    "datajoint_version": "0.15.0",
-    "database_host": "db.example.com",
-    "database_name": "my_project_db"
-}
-```
-
-### Schema
-
-| Field | Type | Required | Description |
-|-------|------|----------|-------------|
-| `project_name` | string | Yes | Unique project identifier |
-| `created` | string | Yes | ISO 8601 timestamp of store creation |
-| `format_version` | string | Yes | Store format version for compatibility |
-| `datajoint_version` | string | Yes | DataJoint version that created the store |
-| `database_host` | string | No | Database server hostname (for bidirectional mapping) |
-| `database_name` | string | No | Database name on the server (for bidirectional mapping) |
-
-The `database_name` field exists for DBMS platforms that support multiple databases on a single server (e.g., PostgreSQL, MySQL). The object storage configuration is **shared across all schemas comprising the pipeline**—it's a pipeline-level setting, not a per-schema setting.
-
-The optional `database_host` and `database_name` fields enable bidirectional mapping between object stores and databases:
-
-- **Forward**: Client settings → object store location
-- **Reverse**: Object store metadata → originating database
-
-This is informational only—not enforced at runtime. Administrators can alternatively ensure unique `project_name` values across their namespace, and managed platforms may handle this mapping externally.
-
-### Store Initialization
-
-The store metadata file is created when the first `object` attribute is used:
-
-```
-┌─────────────────────────────────────────────────────────┐
-│ 1. Client attempts first file operation                 │
-├─────────────────────────────────────────────────────────┤
-│ 2. Check if datajoint_store.json exists                 │
-│    ├─ If exists: verify project_name matches            │
-│    └─ If not: create with current project_name          │
-├─────────────────────────────────────────────────────────┤
-│ 3. On mismatch: raise DataJointError                    │
-└─────────────────────────────────────────────────────────┘
-```
-
-### Client Verification
-
-DataJoint performs a basic verification on connect to ensure store-database cohesion:
-
-1. **On connect**: Client reads `datajoint_store.json` from store
-2. **Verify**: `project_name` in client settings matches store metadata
-3. **On mismatch**: Raise `DataJointError` with descriptive message
-
-```python
-# Example error
-DataJointError: Object store project name mismatch.
-  Client configured: "project_a"
-  Store metadata: "project_b"
-  Ensure all clients use the same object_storage.project_name setting.
-```
-
-### Administrative Responsibility
-
-A 1:1 correspondence is assumed between:
-- Database location + `project_name` in client settings
-- Object store + `project_name` in store metadata
-
-DataJoint performs basic verification but does **not** enforce this mapping. Administrators are responsible for ensuring correct configuration across all clients.
-
-## Syntax
-
-```python
-@schema
-class Recording(dj.Manual):
-    definition = """
-    subject_id : int
-    session_id : int
-    ---
-    raw_data : object          # uses default store
-    processed : object         # another object attribute (default store)
-    published : object@public  # uses named 'public' store
-    """
-```
-
-- `object` — uses the default storage backend
-- `object@store_name` — uses a named store (must be configured in settings)
-
-## Database Storage
-
-The `object` type is stored as a `JSON` column in MySQL containing:
-
-**File in default store:**
-```json
-{
-    "store": null,
-    "url": "s3://my-bucket/my_project/my_schema/Recording/objects/subject_id=123/session_id=45/raw_data_Ax7bQ2kM.dat",
-    "path": "my_schema/Recording/objects/subject_id=123/session_id=45/raw_data_Ax7bQ2kM.dat",
-    "size": 12345,
-    "hash": null,
-    "ext": ".dat",
-    "is_dir": false,
-    "timestamp": "2025-01-15T10:30:00Z",
-    "mime_type": "application/octet-stream"
-}
-```
-
-**File in named store:**
-```json
-{
-    "store": "public",
-    "url": "s3://public-bucket/my_project/my_schema/Dataset/objects/dataset_id=1/published_data_Bx8cD3kM.dat",
-    "path": "my_schema/Dataset/objects/dataset_id=1/published_data_Bx8cD3kM.dat",
-    "size": 12345,
-    "hash": "sha256:abcdef1234...",
-    "ext": ".dat",
-    "is_dir": false,
-    "timestamp": "2025-01-15T10:30:00Z",
-    "mime_type": "application/octet-stream"
-}
-```
-
-**Folder example:**
-```json
-{
-    "store": null,
-    "url": "s3://my-bucket/my_project/my_schema/Recording/objects/subject_id=123/session_id=45/raw_data_pL9nR4wE",
-    "path": "my_schema/Recording/objects/subject_id=123/session_id=45/raw_data_pL9nR4wE",
-    "size": 567890,
-    "hash": null,
-    "ext": null,
-    "is_dir": true,
-    "timestamp": "2025-01-15T10:30:00Z",
-    "item_count": 42
-}
-```
-
-**Zarr example (large dataset, metadata fields omitted for performance):**
-```json
-{
-    "store": null,
-    "url": "s3://my-bucket/my_project/my_schema/Recording/objects/subject_id=123/session_id=45/neural_data_kM3nP2qR.zarr",
-    "path": "my_schema/Recording/objects/subject_id=123/session_id=45/neural_data_kM3nP2qR.zarr",
-    "size": null,
-    "hash": null,
-    "ext": ".zarr",
-    "is_dir": true,
-    "timestamp": "2025-01-15T10:30:00Z"
-}
-```
-
-### JSON Schema
-
-| Field | Type | Required | Description |
-|-------|------|----------|-------------|
-| `store` | string/null | Yes | Store name (e.g., `"public"`), or `null` for default store |
-| `url` | string | Yes | Full URL including protocol and bucket (e.g., `s3://bucket/path`) |
-| `path` | string | Yes | Relative path within store (excludes protocol/bucket, includes token) |
-| `size` | integer/null | No | Total size in bytes (sum for folders), or null if not computed. See [Performance Considerations](#performance-considerations). |
-| `hash` | string/null | Yes | Content hash with algorithm prefix, or null (default) |
-| `ext` | string/null | Yes | File extension as tooling hint (e.g., `.dat`, `.zarr`) or null. See [Extension Field](#extension-field). |
-| `is_dir` | boolean | Yes | True if stored content is a directory/key-prefix (e.g., Zarr store) |
-| `timestamp` | string | Yes | ISO 8601 upload timestamp |
-| `mime_type` | string | No | MIME type (files only, auto-detected from extension) |
-| `item_count` | integer | No | Number of files (folders only), or null if not computed. See [Performance Considerations](#performance-considerations). |
-
-**Why both `url` and `path`?**
-- `url`: Self-describing, enables cross-validation, robust to config changes
-- `path`: Enables store name re-derivation at migration time, consistent structure across stores
-- At migration, the store name can be derived by matching `url` against configured stores
-
-### Extension Field
-
-The `ext` field is a **tooling hint** that preserves the original file extension or provides a conventional suffix for directory-based formats. It is:
-
-- **Not a content-type declaration**: Unlike `mime_type`, it does not attempt to describe the internal content format
-- **Useful for tooling**: Enables file browsers, IDEs, and other tools to display appropriate icons or suggest applications
-- **Conventional for formats like Zarr**: The `.zarr` extension is recognized by the ecosystem even though a Zarr store contains mixed content (JSON metadata + binary chunks)
-
-For single files, `ext` is extracted from the source filename. For staged inserts (like Zarr), it can be explicitly provided.
-
-### Performance Considerations
-
-For large hierarchical data like Zarr stores, computing certain metadata can be expensive:
-
-- **`size`**: Requires listing all objects and summing their sizes. For stores with millions of chunks, this can take minutes or hours.
-- **`item_count`**: Requires listing all objects. Same performance concern as `size`.
-- **`hash`**: Requires reading all content. Explicitly not supported for staged inserts.
-
-**These fields are optional** and default to `null` for staged inserts. Users can explicitly request computation when needed, understanding the performance implications.
-
-### Content Hashing
-
-By default, **no content hash is computed** to avoid performance overhead for large objects. Storage backend integrity is trusted.
-
-**Explicit hash control** via insert kwarg:
-
-```python
-# Default - no hash (fast)
-Recording.insert1({..., "raw_data": "/path/to/large.dat"})
-
-# Explicit hash request - user specifies algorithm
-Recording.insert1({..., "raw_data": "/path/to/important.dat"}, hash="sha256")
-
-# Other supported algorithms
-Recording.insert1({..., "raw_data": "/path/to/data.bin"}, hash="md5")
-Recording.insert1({..., "raw_data": "/path/to/large.bin"}, hash="xxhash")  # xxh3, faster for large files
-```
-
-**Design principles:**
-
-- **Explicit over implicit**: No automatic hashing based on file size or other heuristics
-- **User controls the tradeoff**: User decides when integrity verification is worth the performance cost
-- **Files only**: Hash applies to files, not folders (folders use manifests for integrity)
-- **Staged inserts**: Hash is always `null` regardless of kwarg—data flows directly to storage without a local copy to hash
-
-Supported hash algorithms: `sha256`, `md5`, `xxhash` (xxh3, faster for large files)
-
-### Folder Manifests
-
-For folders (directories), a **manifest file** is created alongside the folder in the object store to enable integrity verification without computing content hashes:
-
-```
-raw_data_pL9nR4wE/
-raw_data_pL9nR4wE.manifest.json
-```
-
-**Manifest content:**
-```json
-{
-    "files": [
-        {"path": "file1.dat", "size": 1234},
-        {"path": "subdir/file2.dat", "size": 5678},
-        {"path": "subdir/file3.dat", "size": 91011}
-    ],
-    "total_size": 567890,
-    "item_count": 42,
-    "created": "2025-01-15T10:30:00Z"
-}
-```
-
-**Design rationale:**
-- Stored in object store (not database) to avoid bloating the JSON for folders with many files
-- Placed alongside folder (not inside) to avoid polluting folder contents and interfering with tools like Zarr
-- Enables self-contained verification without database access
-
-The manifest enables:
-- Quick verification that all expected files exist
-- Size validation without reading file contents
-- Detection of missing or extra files
-
-### Filename Convention
-
-The stored filename is **always derived from the field name**:
-- **Base name**: The attribute/field name (e.g., `raw_data`)
-- **Extension**: Adopted from source file (copy insert) or optionally provided (staged insert)
-- **Token**: Random suffix for collision avoidance
-
-```
-Stored filename = {field}_{token}{ext}
-
-Examples:
-  raw_data_Ax7bQ2kM.dat     # file with .dat extension
-  raw_data_pL9nR4wE.zarr    # Zarr directory with .zarr extension
-  raw_data_kM3nP2qR         # directory without extension
-```
-
-This convention ensures:
-- Consistent, predictable naming across all objects
-- Field name visible in storage for easier debugging
-- Extension preserved for MIME type detection and tooling compatibility
-
-## Path Generation
-
-Storage paths are **deterministically constructed** from record metadata, enabling bidirectional lookup between database records and stored files.
-
-### Path Components
-
-1. **Location** - from configuration (`object_storage.location`)
-2. **Partition attributes** - promoted PK attributes (if `partition_pattern` configured)
-3. **Schema name** - from the table's schema
-4. **Table name** - the table class name
-5. **Object directory** - `objects/`
-6. **Primary key encoding** - remaining PK attributes and values
-7. **Suffixed filename** - `{field}_{token}{ext}`
-
-### Path Template
-
-**Without partitioning:**
-```
-{location}/{schema}/{Table}/objects/{pk_attr1}={pk_val1}/{pk_attr2}={pk_val2}/.../{field}_{token}{ext}
-```
-
-**With partitioning:**
-```
-{location}/{partition_attr}={val}/.../schema/{Table}/objects/{remaining_pk_attrs}/.../{field}_{token}{ext}
-```
-
-Note: The `objects/` directory follows the table name, allowing each table folder to also contain tabular data exports (e.g., `data.parquet`) alongside the objects.
-
-### Partitioning
-
-The **partition pattern** allows promoting certain primary key attributes to the beginning of the path (after `location`). This organizes storage by high-level attributes like subject or experiment, enabling:
-- Efficient data locality for related records
-- Easier manual browsing of storage
-- Potential for storage tiering by partition
-
-**Configuration:**
-```json
-{
-    "object_storage.partition_pattern": "{subject_id}/{experiment_id}"
-}
-```
-
-Partition attributes are extracted from the primary key and placed at the path root. Remaining PK attributes appear in their normal position.
-
-### Example Without Partitioning
-
-For a table:
-```python
-@schema
-class Recording(dj.Manual):
-    definition = """
-    subject_id : int
-    session_id : int
-    ---
-    raw_data : object
-    """
-```
-
-Inserting `{"subject_id": 123, "session_id": 45, "raw_data": "/path/to/recording.dat"}` produces:
-
-```
-my_project/my_schema/Recording/objects/subject_id=123/session_id=45/raw_data_Ax7bQ2kM.dat
-```
-
-Note: The filename is `raw_data` (field name) with `.dat` extension (from source file).
-
-### Example With Partitioning
-
-With `partition_pattern = "{subject_id}"`:
-
-```
-my_project/subject_id=123/my_schema/Recording/objects/session_id=45/raw_data_Ax7bQ2kM.dat
-```
-
-The `subject_id` is promoted to the path root, grouping all files for subject 123 together regardless of schema or table.
-
-### Deterministic Bidirectional Mapping
-
-The path structure (excluding the random token) is fully deterministic:
-- **Record → File**: Given a record's primary key, construct the path prefix to locate its file
-- **File → Record**: Parse the path to extract schema, table, field, and primary key values
-
-This enables:
-- Finding all files for a specific record
-- Identifying which record a file belongs to
-- Auditing storage against database contents
-
-The **random token** is stored in the JSON metadata to complete the full path.
-
-### Primary Key Value Encoding
-
-Primary key values are encoded directly in paths when they are simple, path-safe types:
-- **Integers**: Used directly (`subject_id=123`)
-- **Dates**: ISO format (`session_date=2025-01-15`)
-- **Timestamps**: ISO format with safe separators (`created=2025-01-15T10-30-00`)
-- **Simple strings**: Used directly if path-safe (`experiment=baseline`)
-
-**Conversion to path-safe strings** is applied only when necessary:
-- Strings containing `/`, `\`, or other path-unsafe characters
-- Very long strings (truncated with hash suffix)
-- Binary or complex types (hashed)
-
-```python
-# Direct encoding (no conversion needed)
-subject_id=123
-session_date=2025-01-15
-trial_type=control
-
-# Converted encoding (path-unsafe characters)
-filename=my%2Ffile.dat          # "/" encoded
-description=a1b2c3d4_abc123     # long string truncated + hash
-```
-
-### Filename Collision Avoidance
-
-To prevent filename collisions, each stored object receives a **random token suffix** appended to the field name:
-
-```
-field: raw_data, source: recording.dat
-stored: raw_data_Ax7bQ2kM.dat
-
-field: image, source: scan.tiff
-stored: image_pL9nR4wE.tiff
-
-field: neural_data (staged with .zarr)
-stored: neural_data_kM3nP2qR.zarr
-```
-
-#### Token Suffix Specification
-
-- **Alphabet**: URL-safe and filename-safe Base64 characters: `A-Z`, `a-z`, `0-9`, `-`, `_`
-- **Length**: Configurable via `object_storage.token_length` (default: 8, range: 4-16)
-- **Generation**: Cryptographically random using `secrets.token_urlsafe()`
-
-At 8 characters with 64 possible values per character: 64^8 = 281 trillion combinations.
-
-#### Rationale
-
-- Avoids collisions without requiring existence checks
-- Field name visible in storage for easier debugging/auditing
-- URL-safe for web-based access to cloud storage
-- Filesystem-safe across all supported platforms
-
-### No Deduplication
-
-Each insert stores a separate copy of the file, even if identical content was previously stored. This ensures:
-- Clear 1:1 relationship between records and files
-- Simplified delete behavior
-- No reference counting complexity
-
-## Insert Behavior
-
-At insert time, the `object` attribute accepts:
-
-1. **Local file path** (string or `Path`): Path to an existing local file (extension extracted)
-2. **Local folder path** (string or `Path`): Path to an existing local directory
-3. **Remote URL** (string): URL to remote file or folder (`s3://`, `gs://`, `az://`, `http://`, `https://`)
-4. **Tuple of (ext, stream)**: File-like object with explicit extension
-
-```python
-# From local file path - extension (.dat) extracted from source
-Recording.insert1({
-    "subject_id": 123,
-    "session_id": 45,
-    "raw_data": "/local/path/to/recording.dat"
-})
-# Stored as: raw_data_Ax7bQ2kM.dat
-
-# From local folder path - no extension
-Recording.insert1({
-    "subject_id": 123,
-    "session_id": 45,
-    "raw_data": "/local/path/to/data_folder/"
-})
-# Stored as: raw_data_pL9nR4wE/
-
-# From remote URL - copies from source to managed storage
-Recording.insert1({
-    "subject_id": 123,
-    "session_id": 45,
-    "raw_data": "s3://source-bucket/path/to/data.dat"
-})
-# Stored as: raw_data_kM3nP2qR.dat
-
-# From remote Zarr store (e.g., collaborator data on GCS)
-Recording.insert1({
-    "subject_id": 123,
-    "session_id": 45,
-    "neural_data": "gs://collaborator-bucket/shared/experiment.zarr"
-})
-# Copied to managed storage as: neural_data_pL9nR4wE.zarr
-
-# From stream with explicit extension
-with open("/local/path/data.bin", "rb") as f:
-    Recording.insert1({
-        "subject_id": 123,
-        "session_id": 45,
-        "raw_data": (".bin", f)
-    })
-# Stored as: raw_data_xY8zW3vN.bin
-```
-
-### Remote URL Support
-
-Remote URLs are detected by protocol prefix and handled via fsspec:
-
-| Protocol | Example | Notes |
-|----------|---------|-------|
-| `s3://` | `s3://bucket/path/file.dat` | AWS S3, MinIO |
-| `gs://` | `gs://bucket/path/file.dat` | Google Cloud Storage |
-| `az://` | `az://container/path/file.dat` | Azure Blob Storage |
-| `http://` | `http://server/path/file.dat` | HTTP (read-only source) |
-| `https://` | `https://server/path/file.dat` | HTTPS (read-only source) |
-
-**Authentication**: Remote sources may require credentials. fsspec uses standard credential discovery (environment variables, config files, IAM roles). For cross-cloud copies, ensure credentials are configured for both source and destination.
-
-**Performance note**: For large remote-to-remote copies, data flows through the client. This is acceptable for most use cases but may be slow for very large datasets. Future optimizations could include server-side copy for same-provider transfers.
-
-### Insert Processing Steps
-
-1. Validate input (file/folder exists, stream is readable)
-2. Generate deterministic storage path with random token
-3. **Copy content to storage backend** via `fsspec`
-4. **If copy fails: abort insert** (no database operation attempted)
-5. Compute content hash if requested (optional, default: no hash)
-6. Build JSON metadata structure
-7. Execute database INSERT
-
-### Copy-First Semantics
-
-The file/folder is copied to storage **before** the database insert is attempted:
-- If the copy fails, the insert does not proceed
-- If the copy succeeds but the database insert fails, an orphaned file may remain
-- Orphaned files are acceptable due to the random token (no collision with future inserts)
-
-### Staged Insert (Direct Write Mode)
-
-For large objects like Zarr arrays, copying from local storage is inefficient. **Staged insert** allows writing directly to the destination.
-
-#### Why a Separate Method?
-
-Staged insert uses a dedicated `staged_insert1` method rather than co-opting `insert1` because:
-
-1. **Explicit over implicit** - Staged inserts have fundamentally different semantics (file creation happens during context, commit on exit). A separate method makes this explicit.
-2. **Backward compatibility** - `insert1` returns `None` and doesn't support context manager protocol. Changing this could break existing code.
-3. **Clear error handling** - The context manager semantics (success = commit, exception = rollback) are obvious with `staged_insert1`.
-4. **Type safety** - The staged context exposes `.store()` for object fields. A dedicated method can return a properly-typed `StagedInsert` object.
-
-**Staged inserts are limited to `insert1`** (one row at a time). Multi-row inserts are not supported for staged operations.
-
-#### Basic Usage
-
-```python
-# Stage an insert with direct object storage writes
-with Recording.staged_insert1 as staged:
-    # Set primary key values
-    staged.rec['subject_id'] = 123
-    staged.rec['session_id'] = 45
-
-    # Create object storage directly using store()
-    # Extension is optional - .zarr is conventional for Zarr arrays
-    z = zarr.open(staged.store('raw_data', '.zarr'), mode='w', shape=(10000, 10000), dtype='f4')
-    z[:] = compute_large_array()
-
-    # Assign the created object to the record
-    staged.rec['raw_data'] = z
-
-# On successful exit: metadata computed, record inserted
-# On exception: storage cleaned up, no record inserted
-# Stored as: raw_data_Ax7bQ2kM.zarr
-```
-
-#### StagedInsert Interface
-
-```python
-class StagedInsert:
-    """Context manager for staged insert operations."""
-
-    rec: dict[str, Any]  # Record dict for setting attribute values
-
-    def store(self, field: str, ext: str = "") -> fsspec.FSMap:
-        """
-        Get an FSMap store for direct writes to an object field.
-
-        Args:
-            field: Name of the object attribute
-            ext: Optional extension (e.g., ".zarr", ".hdf5")
-
-        Returns:
-            fsspec.FSMap suitable for Zarr/xarray
-        """
-        ...
-
-    def open(self, field: str, ext: str = "", mode: str = "wb") -> IO:
-        """
-        Open a file for direct writes to an object field.
-
-        Args:
-            field: Name of the object attribute
-            ext: Optional extension (e.g., ".bin", ".dat")
-            mode: File mode (default: "wb")
-
-        Returns:
-            File-like object for writing
-        """
-        ...
-
-    @property
-    def fs(self) -> fsspec.AbstractFileSystem:
-        """Return fsspec filesystem for advanced operations."""
-        ...
-```
-
-#### Staged Insert Flow
-
-```
-┌─────────────────────────────────────────────────────────┐
-│ 1. Enter context: create StagedInsert with empty rec    │
-├─────────────────────────────────────────────────────────┤
-│ 2. User sets primary key values in staged.rec           │
-├─────────────────────────────────────────────────────────┤
-│ 3. User calls store()/open() to get storage handles     │
-│    - Path reserved with random token on first call      │
-│    - User writes data directly via fsspec               │
-├─────────────────────────────────────────────────────────┤
-│ 4. User assigns object references to staged.rec         │
-├─────────────────────────────────────────────────────────┤
-│ 5. On context exit (success):                           │
-│    - Build metadata (size/item_count optional, no hash) │
-│    - Execute database INSERT                            │
-├─────────────────────────────────────────────────────────┤
-│ 6. On context exit (exception):                         │
-│    - Delete any written data                            │
-│    - Re-raise exception                                 │
-└─────────────────────────────────────────────────────────┘
-```
-
-#### Zarr Example
-
-```python
-import zarr
-import numpy as np
-
-# Create a large Zarr array directly in object storage
-with Recording.staged_insert1 as staged:
-    staged.rec['subject_id'] = 123
-    staged.rec['session_id'] = 45
-
-    # Create Zarr hierarchy directly in object storage
-    # .zarr extension is optional but conventional
-    root = zarr.open(staged.store('neural_data', '.zarr'), mode='w')
-    root.create_dataset('timestamps', data=np.arange(1000000))
-    root.create_dataset('waveforms', shape=(1000000, 82), chunks=(10000, 82))
-
-    # Write in chunks (streaming from acquisition)
-    for i, chunk in enumerate(data_stream):
-        root['waveforms'][i*10000:(i+1)*10000] = chunk
-
-    # Assign to record
-    staged.rec['neural_data'] = root
-
-# Record automatically inserted with computed metadata
-# Stored as: neural_data_kM3nP2qR.zarr
-```
-
-#### Multiple Object Fields
-
-```python
-with Recording.staged_insert1 as staged:
-    staged.rec['subject_id'] = 123
-    staged.rec['session_id'] = 45
-
-    # Write multiple object fields - extension optional
-    raw = zarr.open(staged.store('raw_data', '.zarr'), mode='w', shape=(1000, 1000))
-    raw[:] = raw_array
-
-    processed = zarr.open(staged.store('processed', '.zarr'), mode='w', shape=(100, 100))
-    processed[:] = processed_array
-
-    staged.rec['raw_data'] = raw
-    staged.rec['processed'] = processed
-
-# Stored as: raw_data_Ax7bQ2kM.zarr, processed_pL9nR4wE.zarr
-```
-
-#### Comparison: Copy vs Staged Insert
-
-| Aspect | Copy Insert | Staged Insert |
-|--------|-------------|---------------|
-| Data location | Must exist locally first | Written directly to storage |
-| Efficiency | Copy overhead | No copy needed |
-| Use case | Small files, existing data | Large arrays, streaming data |
-| Cleanup on failure | Orphan possible | Cleaned up |
-| API | `insert1({..., "field": path})` | `staged_insert1` context manager |
-| Multi-row | Supported | Not supported (insert1 only) |
-
-## Transaction Handling
-
-Since storage backends don't support distributed transactions with MySQL, DataJoint uses a **copy-first** strategy.
-
-### Insert Transaction Flow
-
-```
-┌─────────────────────────────────────────────────────────┐
-│ 1. Validate input and generate storage path with token  │
-├─────────────────────────────────────────────────────────┤
-│ 2. Copy file/folder to storage backend                  │
-│    └─ On failure: raise error, INSERT not attempted     │
-├─────────────────────────────────────────────────────────┤
-│ 3. Compute hash (if requested) and build JSON metadata  │
-├─────────────────────────────────────────────────────────┤
-│ 4. Execute database INSERT                              │
-│    └─ On failure: orphaned file remains (acceptable)    │
-├─────────────────────────────────────────────────────────┤
-│ 5. Commit database transaction                          │
-│    └─ On failure: orphaned file remains (acceptable)    │
-└─────────────────────────────────────────────────────────┘
-```
-
-### Failure Scenarios
-
-| Scenario | Result | Orphaned File? |
-|----------|--------|----------------|
-| Copy fails | Clean failure, no INSERT | No |
-| DB insert fails | Error raised | Yes (acceptable) |
-| DB commit fails | Error raised | Yes (acceptable) |
-
-### Orphaned Files
-
-Orphaned files (files in storage without corresponding database records) may accumulate due to:
-- Failed database inserts after successful copy
-- Process crashes
-- Network failures
-
-**This is acceptable** because:
-- Random tokens prevent collisions with future inserts
-- Orphaned files can be identified by comparing storage contents with database records
-- A separate cleanup procedure removes orphaned files during maintenance
-
-### Orphan Cleanup Procedure
-
-Orphan cleanup is a **separate maintenance operation** provided via the `schema.object_storage` utility object. Cleanup operates **per-store**, iterating through all configured stores.
-
-```python
-# Maintenance utility methods (not a hidden table)
-schema.object_storage.find_orphaned(grace_period_minutes=30)  # List orphaned files (all stores)
-schema.object_storage.find_orphaned(store="public")           # List orphaned files (specific store)
-schema.object_storage.cleanup_orphaned(dry_run=True)          # Delete orphaned files
-schema.object_storage.verify_integrity()                       # Check all objects exist
-schema.object_storage.stats()                                  # Storage usage statistics
-```
-
-**Note**: `schema.object_storage` is a utility object, not a hidden table. Unlike `attach@store` which uses `~external_*` tables, the `object` type stores all metadata inline in JSON columns and has no hidden tables.
-
-**Efficient listing for Zarr and large stores:**
-
-For stores with Zarr arrays (potentially millions of chunk objects), cleanup uses **delimiter-based listing** to enumerate only root object names, not individual chunks:
-
-```python
-# S3 API with delimiter - lists "directories" only
-response = s3.list_objects_v2(
-    Bucket=bucket,
-    Prefix='project/schema/Table/objects/',
-    Delimiter='/'
-)
-# Returns: ['neural_data_kM3nP2qR.zarr/', 'raw_data_Ax7bQ2kM.dat']
-# NOT millions of individual chunk keys
-```
-
-Orphan deletion uses recursive delete to remove entire Zarr stores efficiently.
-
-**Grace period for in-flight inserts:**
-
-While random tokens prevent filename collisions, there's a race condition with in-flight inserts:
-
-1. Insert starts: file copied to storage with token `Ax7bQ2kM`
-2. Orphan cleanup runs: lists storage, queries DB for references
-3. File `Ax7bQ2kM` not yet in DB (INSERT not committed)
-4. Cleanup identifies it as orphan and deletes it
-5. Insert commits: DB now references deleted file!
-
-**Solution**: The `grace_period_minutes` parameter (default: 30) excludes files created within that window, assuming they are in-flight inserts.
-
-**Important considerations:**
-- Cleanup enumerates all configured stores (default + named)
-- Uses delimiter-based listing for efficiency with Zarr stores
-- Grace period handles race conditions—cleanup is safe to run anytime
-- `dry_run=True` previews deletions before execution
-- Compares storage contents against JSON metadata in table columns
-
-## Fetch Behavior
-
-On fetch, the `object` type returns a **handle** (`ObjectRef` object) to the stored content. **The file is not copied** - all operations access the storage backend directly.
-
-```python
-record = Recording.fetch1()
-file_ref = record["raw_data"]
-
-# Access metadata (no I/O)
-print(file_ref.path)           # Full storage path
-print(file_ref.size)           # File size in bytes
-print(file_ref.hash)           # Content hash (if computed) or None
-print(file_ref.ext)            # File extension (e.g., ".dat") or None
-print(file_ref.is_dir)         # True if stored content is a folder
-
-# Read content directly from storage backend
-content = file_ref.read()      # Returns bytes (files only)
-
-# Open as fsspec file object (files only)
-with file_ref.open() as f:
-    data = f.read()
-
-# List contents (folders only)
-contents = file_ref.listdir()  # Returns list of relative paths
-
-# Access specific file within folder
-with file_ref.open("subdir/file.dat") as f:
-    data = f.read()
-```
-
-### No Automatic Download
-
-Unlike `attach@store`, the `object` type does **not** automatically download content to a local path. Users access content directly through the `ObjectRef` handle, which streams from the storage backend.
-
-For local copies, users explicitly download:
-
-```python
-# Download file to local destination
-local_path = file_ref.download("/local/destination/")
-
-# Download specific file from folder
-local_path = file_ref.download("/local/destination/", "subdir/file.dat")
-```
-
-## Implementation Components
-
-### 1. Settings Extension (`settings.py`)
-
-New `ObjectStorageSettings` class:
-
-```python
-class ObjectStorageSettings(BaseSettings):
-    """Object storage configuration for object columns."""
-
-    model_config = SettingsConfigDict(
-        env_prefix="DJ_OBJECT_STORAGE_",
-        extra="forbid",
-        validate_assignment=True,
-    )
-
-    project_name: str | None = None  # Must match store metadata
-    protocol: Literal["object", "s3", "gcs", "azure"] | None = None
-    location: str | None = None
-    bucket: str | None = None
-    endpoint: str | None = None
-    partition_pattern: str | None = None
-    token_length: int = Field(default=8, ge=4, le=16)
-    access_key: str | None = None
-    secret_key: SecretStr | None = None
-```
-
-Add to main `Config` class:
-
-```python
-object_storage: ObjectStorageSettings = Field(default_factory=ObjectStorageSettings)
-```
-
-### 2. Storage Backend (`storage.py` - new module)
-
-- `StorageBackend` class wrapping `fsspec`
-- Methods: `upload()`, `download()`, `open()`, `exists()`, `delete()`
-- Path generation with partition support
-
-### 3. Type Declaration (`declare.py`)
-
-- Add `OBJECT` pattern: `object$`
-- Add to `SPECIAL_TYPES`
-- Substitute to `JSON` type in database
-
-### 4. Schema Integration (`schemas.py`)
-
-- Associate storage backend with schema
-- Validate storage configuration on schema creation
-
-### 5. Insert Processing (`table.py`)
-
-- New `__process_file_attribute()` method
-- Path generation using primary key and partition pattern
-- Upload via storage backend
-
-### 6. Fetch Processing (`fetch.py`)
-
-- New `ObjectRef` class
-- Lazy loading from storage backend
-- Metadata access interface
-
-### 7. ObjectRef Class (`objectref.py` - new module)
-
-```python
-@dataclass
-class ObjectRef:
-    """Handle to a file or folder stored in the pipeline's storage backend."""
-
-    path: str
-    size: int
-    hash: str | None           # content hash (if computed) or None
-    ext: str | None            # file extension (e.g., ".dat") or None
-    is_dir: bool
-    timestamp: datetime
-    mime_type: str | None      # files only, derived from ext
-    item_count: int | None     # folders only
-    _backend: StorageBackend   # internal reference
-
-    # fsspec access (for Zarr, xarray, etc.)
-    @property
-    def fs(self) -> fsspec.AbstractFileSystem:
-        """Return fsspec filesystem for direct access."""
-        ...
-
-    @property
-    def store(self) -> fsspec.FSMap:
-        """Return FSMap suitable for Zarr/xarray."""
-        ...
-
-    @property
-    def full_path(self) -> str:
-        """Return full URI (e.g., 's3://bucket/path')."""
-        ...
-
-    # File operations
-    def read(self) -> bytes: ...
-    def open(self, subpath: str | None = None, mode: str = "rb") -> IO: ...
-
-    # Folder operations
-    def listdir(self, subpath: str = "") -> list[str]: ...
-    def walk(self) -> Iterator[tuple[str, list[str], list[str]]]: ...
-
-    # Common operations
-    def download(self, destination: Path | str, subpath: str | None = None) -> Path: ...
-    def exists(self, subpath: str | None = None) -> bool: ...
-
-    # Integrity verification
-    def verify(self) -> bool:
-        """
-        Verify object integrity.
-
-        For files: checks size matches, and hash if available.
-        For folders: validates manifest (all files exist with correct sizes).
-
-        Returns True if valid, raises IntegrityError with details if not.
-        """
-        ...
-```
-
-#### fsspec Integration
-
-The `ObjectRef` provides direct fsspec access for integration with array libraries:
-
-```python
-import zarr
-import xarray as xr
-
-record = Recording.fetch1()
-obj_ref = record["raw_data"]
-
-# Direct Zarr access
-z = zarr.open(obj_ref.store, mode='r')
-print(z.shape)
-
-# Direct xarray access
-ds = xr.open_zarr(obj_ref.store)
-
-# Use fsspec filesystem directly
-fs = obj_ref.fs
-files = fs.ls(obj_ref.full_path)
-```
-
-## Dependencies
-
-New dependency: `fsspec` with optional backend-specific packages:
-
-```toml
-[project.dependencies]
-fsspec = ">=2023.1.0"
-
-[project.optional-dependencies]
-s3 = ["s3fs"]
-gcs = ["gcsfs"]
-azure = ["adlfs"]
-```
-
-### Storage Access Architecture
-
-The `object` type separates **data declaration** (the JSON metadata stored in the database) from **storage access** (the library used to read/write objects):
-
-- **Data declaration**: The JSON schema (path, size, hash, etc.) is a pure data structure with no library dependencies
-- **Storage access**: Currently uses `fsspec` as the default accessor, but the architecture supports alternative backends
-
-**Why this matters**: While `fsspec` is a mature and widely-used library, alternatives like [`obstore`](https://github.com/developmentseed/obstore) offer performance advantages for certain workloads. By keeping the data model independent of the access library, future versions can support pluggable storage accessors without schema changes.
-
-**Current implementation**: The `ObjectRef` class provides fsspec-based accessors (`fs`, `store` properties). Future versions may add:
-- Pluggable accessor interface
-- Alternative backends (obstore, custom implementations)
-- Backend selection per-operation or per-configuration
-
-## Comparison with Existing Types
-
-| Feature | `attach@store` | `filepath@store` | `object` |
-|---------|----------------|------------------|--------|
-| Store config | Per-attribute | Per-attribute | Per-pipeline |
-| Path control | DataJoint | User-managed | DataJoint |
-| DB column | binary(16) UUID | binary(16) UUID | JSON |
-| Hidden tables | Yes (external) | Yes (external) | **No** |
-| Backend | File/S3 only | File/S3 only | fsspec (any) |
-| Partitioning | Hash-based | User path | Configurable |
-| Metadata storage | External table | External table | Inline JSON |
-| Deduplication | By content | By path | None |
-
-### No Hidden Tables
-
-A key architectural difference: the `object` type does **not** use hidden external tables.
-
-The legacy `attach@store` and `filepath@store` types store a UUID in the table column and maintain a separate hidden `~external_*` table containing:
-- File paths/keys
-- Checksums
-- Size information
-- Reference counts
-
-The `object` type eliminates this complexity by storing all metadata **inline** in the JSON column. This provides:
-- **Simpler schema** - no hidden tables to manage or migrate
-- **Self-contained records** - all information in one place
-- **Easier debugging** - metadata visible directly in queries
-- **No reference counting** - each record owns its object exclusively
-
-### Legacy Type Deprecation
-
-The existing `attach@store` and `filepath@store` types will be:
-- **Maintained** for backward compatibility with existing pipelines
-- **Deprecated** in future releases with migration warnings
-- **Eventually removed** after a transition period
-
-New pipelines should use the `object` type exclusively.
-
-## Delete Behavior
-
-When a record with a `object` attribute is deleted:
-
-1. **Database delete executes first** (within transaction)
-2. **File delete is attempted** after successful DB commit
-3. **File delete is best-effort** - the delete transaction succeeds even if file deletion fails
-
-### Delete Transaction Flow
-
-```
-┌─────────────────────────────────────────────────────────┐
-│ 1. Execute database DELETE                              │
-├─────────────────────────────────────────────────────────┤
-│ 2. Commit database transaction                          │
-│    └─ On failure: rollback, files unchanged             │
-├─────────────────────────────────────────────────────────┤
-│ 3. Issue delete command to storage backend              │
-│    └─ On failure: log warning, transaction still OK     │
-└─────────────────────────────────────────────────────────┘
-```
-
-### Stale Files
-
-If file deletion fails (network error, permissions, etc.), **stale files** may remain in storage. This is acceptable because:
-- The database record is already deleted (authoritative source)
-- Random tokens prevent any collision with future inserts
-- Stale files can be identified and cleaned via orphan detection utilities
-
-### No Reference Counting
-
-Each record owns its file exclusively. There is no deduplication or reference counting, simplifying delete logic.
-
-## Migration Path
-
-- Existing `attach@store` and `filepath@store` remain unchanged
-- `object` type is additive - new tables only
-- Future: Migration utilities to convert existing external storage
-
-## Zarr, TileDB, and Large Hierarchical Data
-
-The `object` type is designed with **chunk-based formats** like Zarr and TileDB in mind. These formats store each chunk as a separate object, which maps naturally to object storage.
-
-### Staged Insert Compatibility
-
-**Staged inserts work with formats that support chunk-based writes:**
-
-| Format | Staged Insert | Why |
-|--------|---------------|-----|
-| **Zarr** | ✅ Yes | Each chunk is a separate object |
-| **TileDB** | ✅ Yes | Fragment-based storage maps to objects |
-| **HDF5** | ❌ No | Single monolithic file requires random-access seek/write |
-
-**HDF5 limitation**: HDF5 files have internal B-tree structures that require random-access modifications. Object storage only supports full object PUT/GET operations, not partial updates. For HDF5, use **copy insert**:
-
-```python
-# HDF5: Write locally, then copy to object storage
-import h5py
-import tempfile
-
-with tempfile.NamedTemporaryFile(suffix='.h5', delete=False) as f:
-    with h5py.File(f.name, 'w') as h5:
-        h5.create_dataset('data', data=large_array)
-    Recording.insert1({..., 'data_file': f.name})
-```
-
-For cloud-native workflows with large arrays, **Zarr is recommended** over HDF5.
-
-### Recommended Workflow (Zarr)
-
-For large Zarr stores, use **staged insert** to write directly to object storage:
-
-```python
-import zarr
-import numpy as np
-
-with Recording.staged_insert1 as staged:
-    staged.rec['subject_id'] = 123
-    staged.rec['session_id'] = 45
-
-    # Write Zarr directly to object storage
-    store = staged.store('neural_data', '.zarr')
-    root = zarr.open(store, mode='w')
-    root.create_dataset('spikes', shape=(1000000, 384), chunks=(10000, 384), dtype='f4')
-
-    # Stream data without local intermediate copy
-    for i, chunk in enumerate(acquisition_stream):
-        root['spikes'][i*10000:(i+1)*10000] = chunk
-
-    staged.rec['neural_data'] = root
-
-# Metadata recorded, no expensive size/hash computation
-```
-
-### JSON Metadata for Zarr
-
-For Zarr stores, the recommended JSON metadata omits expensive-to-compute fields:
-
-```json
-{
-    "path": "schema/Recording/objects/subject_id=123/session_id=45/neural_data_kM3nP2qR.zarr",
-    "size": null,
-    "hash": null,
-    "ext": ".zarr",
-    "is_dir": true,
-    "timestamp": "2025-01-15T10:30:00Z"
-}
-```
-
-**Field notes for Zarr:**
-- **`size`**: Set to `null` - computing total size requires listing all chunks
-- **`hash`**: Always `null` for staged inserts - no merkle tree support currently
-- **`ext`**: Set to `.zarr` as a conventional tooling hint
-- **`is_dir`**: Set to `true` - Zarr stores are key prefixes (logical directories)
-- **`item_count`**: Omitted - counting chunks is expensive and rarely useful
-- **`mime_type`**: Omitted - Zarr contains mixed content types
-
-### Reading Zarr Data
-
-The `ObjectRef` provides direct access compatible with Zarr and xarray:
-
-```python
-record = Recording.fetch1()
-obj_ref = record['neural_data']
-
-# Direct Zarr access
-z = zarr.open(obj_ref.store, mode='r')
-print(z['spikes'].shape)
-
-# xarray integration
-ds = xr.open_zarr(obj_ref.store)
-
-# Dask integration (lazy loading)
-import dask.array as da
-arr = da.from_zarr(obj_ref.store, component='spikes')
-```
-
-### Performance Tips
-
-1. **Use chunked writes**: Write data in chunks that match your Zarr chunk size
-2. **Avoid metadata computation**: Let `size` and `item_count` default to `null`
-3. **Use appropriate chunk sizes**: Balance between too many small files (overhead) and too few large files (memory)
-4. **Consider compression**: Configure Zarr compression (blosc, zstd) to reduce storage costs
-
-## Future Extensions
-
-- [ ] Compression options (gzip, lz4, zstd)
-- [ ] Encryption at rest
-- [ ] Versioning support
-- [ ] Streaming upload for large files
-- [ ] Checksum verification on fetch
-- [ ] Cache layer for frequently accessed files
-- [ ] Parallel upload/download for large folders
-- [ ] Row-level object access control via signed URLs (project DB permissions onto object access)
diff --git a/docs/src/design/tables/storage-types-implementation-plan.md b/docs/src/design/tables/storage-types-implementation-plan.md
deleted file mode 100644
index c15a2292..00000000
--- a/docs/src/design/tables/storage-types-implementation-plan.md
+++ /dev/null
@@ -1,464 +0,0 @@
-# DataJoint Storage Types Redesign - Implementation Plan
-
-## Executive Summary
-
-This plan describes the implementation of a three-layer type architecture for DataJoint, building on the existing `AttributeType` infrastructure. The key goals are:
-
-1. Establish a clean three-layer type hierarchy (native DB types, core DataJoint types, AttributeTypes)
-2. Implement content-addressed storage with deduplication
-3. Provide composable, user-friendly types (`<xblob>`, `<xattach>`, `<filepath@store>`)
-4. Enable project-wide garbage collection
-5. Maintain backward compatibility with existing schemas
-
----
-
-## Implementation Status
-
-| Phase | Status | Notes |
-|-------|--------|-------|
-| Phase 1: Core Type System | ✅ Complete | CORE_TYPES dict, type chain resolution |
-| Phase 2: Content-Addressed Storage | ✅ Complete | Function-based, no registry table |
-| Phase 2b: Path-Addressed Storage | ✅ Complete | ObjectType for files/folders |
-| Phase 3: User-Defined AttributeTypes | ✅ Complete | AttachType, XAttachType, FilepathType |
-| Phase 4: Insert and Fetch Integration | ✅ Complete | Type chain encoding/decoding |
-| Phase 5: Garbage Collection | ✅ Complete | gc.py with scan/collect functions |
-| Phase 6: Documentation and Testing | ✅ Complete | Test files for all new types |
-
----
-
-## Phase 1: Core Type System Foundation ✅
-
-**Status**: Complete
-
-### Implemented in `src/datajoint/declare.py`:
-
-```python
-CORE_TYPES = {
-    # Numeric types (aliased to native SQL)
-    "float32": (r"float32$", "float"),
-    "float64": (r"float64$", "double"),
-    "int64": (r"int64$", "bigint"),
-    "uint64": (r"uint64$", "bigint unsigned"),
-    "int32": (r"int32$", "int"),
-    "uint32": (r"uint32$", "int unsigned"),
-    "int16": (r"int16$", "smallint"),
-    "uint16": (r"uint16$", "smallint unsigned"),
-    "int8": (r"int8$", "tinyint"),
-    "uint8": (r"uint8$", "tinyint unsigned"),
-    "bool": (r"bool$", "tinyint"),
-    # UUID (stored as binary)
-    "uuid": (r"uuid$", "binary(16)"),
-    # JSON
-    "json": (r"json$", None),
-    # Binary (blob maps to longblob)
-    "blob": (r"blob$", "longblob"),
-    # Temporal
-    "date": (r"date$", None),
-    "datetime": (r"datetime$", None),
-    # String types (with parameters)
-    "char": (r"char\s*\(\d+\)$", None),
-    "varchar": (r"varchar\s*\(\d+\)$", None),
-    # Enumeration
-    "enum": (r"enum\s*\(.+\)$", None),
-}
-```
-
-### Key changes:
-- Removed `SERIALIZED_TYPES`, `BINARY_TYPES`, `EXTERNAL_TYPES`
-- Core types are recorded in field comments with `:type:` syntax
-- Non-standard native types pass through with warning
-- `parse_type_spec()` handles `<type@store>` syntax
-- `resolve_dtype()` returns `(final_dtype, type_chain, store_name)` tuple
-
----
-
-## Phase 2: Content-Addressed Storage ✅
-
-**Status**: Complete (simplified design)
-
-### Design Decision: Functions vs Class
-
-The original plan proposed a `ContentRegistry` class with a database table. We implemented a simpler, stateless approach using functions in `content_registry.py`:
-
-**Why functions instead of a registry table:**
-1. **Simpler** - No additional database table to manage
-2. **Decoupled** - Content storage is independent of any schema
-3. **GC by scanning** - Garbage collection scans tables for references rather than maintaining reference counts
-4. **Less state** - No synchronization issues between registry and actual storage
-
-### Implemented in `src/datajoint/content_registry.py`:
-
-```python
-def compute_content_hash(data: bytes) -> str:
-    """Compute SHA256 hash of content."""
-    return hashlib.sha256(data).hexdigest()
-
-def build_content_path(content_hash: str) -> str:
-    """Build path: _content/{hash[:2]}/{hash[2:4]}/{hash}"""
-    return f"_content/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
-
-def put_content(data: bytes, store_name: str | None = None) -> dict[str, Any]:
-    """Store content with deduplication. Returns {hash, store, size}."""
-    ...
-
-def get_content(content_hash: str, store_name: str | None = None) -> bytes:
-    """Retrieve content by hash with verification."""
-    ...
-
-def content_exists(content_hash: str, store_name: str | None = None) -> bool:
-    """Check if content exists."""
-    ...
-
-def delete_content(content_hash: str, store_name: str | None = None) -> bool:
-    """Delete content (use with caution - verify no references first)."""
-    ...
-```
-
-### Implemented AttributeTypes in `src/datajoint/attribute_type.py`:
-
-```python
-class ContentType(AttributeType):
-    """Content-addressed storage. Stores bytes, returns JSON metadata."""
-    type_name = "content"
-    dtype = "json"
-
-    def encode(self, value: bytes, *, key=None, store_name=None) -> dict:
-        return put_content(value, store_name=store_name)
-
-    def decode(self, stored: dict, *, key=None) -> bytes:
-        return get_content(stored["hash"], store_name=stored.get("store"))
-
-
-class XBlobType(AttributeType):
-    """External serialized blob using content-addressed storage."""
-    type_name = "xblob"
-    dtype = "<content>"  # Composition
-
-    def encode(self, value, *, key=None, store_name=None) -> bytes:
-        return blob.pack(value, compress=True)
-
-    def decode(self, stored: bytes, *, key=None) -> Any:
-        return blob.unpack(stored, squeeze=False)
-```
-
----
-
-## Phase 2b: Path-Addressed Storage (ObjectType) ✅
-
-**Status**: Complete
-
-### Design: Path vs Content Addressing
-
-| Aspect | `<content>` | `<object>` |
-|--------|-------------|------------|
-| Addressing | Content-hash (SHA256) | Path (from primary key) |
-| Path Format | `_content/{hash[:2]}/{hash[2:4]}/{hash}` | `{schema}/{table}/objects/{pk}/{field}_{token}.ext` |
-| Deduplication | Yes (same content = same hash) | No (each row has unique path) |
-| Deletion | GC when unreferenced | Deleted with row |
-| Use case | Serialized blobs, attachments | Zarr, HDF5, folders |
-
-### Implemented in `src/datajoint/builtin_types.py`:
-
-```python
-@register_type
-class ObjectType(AttributeType):
-    """Path-addressed storage for files and folders."""
-    type_name = "object"
-    dtype = "json"
-
-    def encode(self, value, *, key=None, store_name=None) -> dict:
-        # value can be bytes, str path, or Path
-        # key contains _schema, _table, _field for path construction
-        path, token = build_object_path(schema, table, field, primary_key, ext)
-        backend.put_buffer(content, path)  # or put_folder for directories
-        return {
-            "path": path,
-            "store": store_name,
-            "size": size,
-            "ext": ext,
-            "is_dir": is_dir,
-            "timestamp": timestamp.isoformat(),
-        }
-
-    def decode(self, stored: dict, *, key=None) -> ObjectRef:
-        # Returns lazy handle for fsspec-based access
-        return ObjectRef.from_json(stored, backend=backend)
-```
-
-### ObjectRef Features:
-- `ref.path` - Storage path
-- `ref.read()` - Read file content
-- `ref.open()` - Open as file handle
-- `ref.fsmap` - For `zarr.open(ref.fsmap)`
-- `ref.download(dest)` - Download to local path
-- `ref.listdir()` / `ref.walk()` - For directories
-
-### Staged Insert for Object Types
-
-For large objects like Zarr arrays, `staged_insert.py` provides direct writes to storage:
-
-```python
-with table.staged_insert1 as staged:
-    # 1. Set primary key first (required for path construction)
-    staged.rec['subject_id'] = 123
-    staged.rec['session_id'] = 45
-
-    # 2. Get storage handle and write directly
-    z = zarr.open(staged.store('raw_data', '.zarr'), mode='w')
-    z[:] = large_array
-
-    # 3. On exit: metadata computed, record inserted
-```
-
-**Flow comparison:**
-
-| Normal Insert | Staged Insert |
-|--------------|---------------|
-| `ObjectType.encode()` uploads content | Direct writes via `staged.store()` |
-| Single operation | Two-phase: write then finalize |
-| Good for files/folders | Ideal for Zarr, HDF5, streaming |
-
-Both produce the same JSON metadata format compatible with `ObjectRef.from_json()`.
-
-**Key methods:**
-- `staged.store(field, ext)` - Returns `FSMap` for Zarr/xarray
-- `staged.open(field, ext)` - Returns file handle for binary writes
-- `staged.fs` - Raw fsspec filesystem access
-
----
-
-## Phase 3: User-Defined AttributeTypes ✅
-
-**Status**: Complete
-
-All built-in AttributeTypes are implemented in `src/datajoint/builtin_types.py`.
-
-### 3.1 XBlobType ✅
-External serialized blobs using content-addressed storage. Composes with `<content>`.
-
-### 3.2 AttachType ✅
-
-```python
-@register_type
-class AttachType(AttributeType):
-    """Internal file attachment stored in database."""
-    type_name = "attach"
-    dtype = "longblob"
-
-    def encode(self, filepath, *, key=None, store_name=None) -> bytes:
-        # Returns: filename (UTF-8) + null byte + contents
-        return path.name.encode("utf-8") + b"\x00" + path.read_bytes()
-
-    def decode(self, stored, *, key=None) -> str:
-        # Extracts to download_path, returns local path
-        ...
-```
-
-### 3.3 XAttachType ✅
-
-```python
-@register_type
-class XAttachType(AttributeType):
-    """External file attachment using content-addressed storage."""
-    type_name = "xattach"
-    dtype = "<content>"  # Composes with ContentType
-    # Same encode/decode as AttachType, but stored externally with dedup
-```
-
-### 3.4 FilepathType ✅
-
-```python
-@register_type
-class FilepathType(AttributeType):
-    """Reference to existing file in configured store."""
-    type_name = "filepath"
-    dtype = "json"
-
-    def encode(self, relative_path: str, *, key=None, store_name=None) -> dict:
-        # Verifies file exists, returns metadata
-        return {'path': path, 'store': store_name, 'size': size, ...}
-
-    def decode(self, stored: dict, *, key=None) -> ObjectRef:
-        # Returns ObjectRef for lazy access
-        return ObjectRef.from_json(stored, backend=backend)
-```
-
-### Type Comparison
-
-| Type | Storage | Copies File | Dedup | Returns |
-|------|---------|-------------|-------|---------|
-| `<attach>` | Database | Yes | No | Local path |
-| `<xattach>` | External | Yes | Yes | Local path |
-| `<filepath>` | Reference | No | N/A | ObjectRef |
-| `<object>` | External | Yes | No | ObjectRef |
-
----
-
-## Phase 4: Insert and Fetch Integration ✅
-
-**Status**: Complete
-
-### Updated in `src/datajoint/table.py`:
-
-```python
-def __make_placeholder(self, name, value, ...):
-    if attr.adapter:
-        from .attribute_type import resolve_dtype
-        attr.adapter.validate(value)
-        _, type_chain, resolved_store = resolve_dtype(
-            f"<{attr.adapter.type_name}>", store_name=attr.store
-        )
-        # Apply type chain: outermost → innermost
-        for attr_type in type_chain:
-            try:
-                value = attr_type.encode(value, key=None, store_name=resolved_store)
-            except TypeError:
-                value = attr_type.encode(value, key=None)
-```
-
-### Updated in `src/datajoint/fetch.py`:
-
-```python
-def _get(connection, attr, data, squeeze, download_path):
-    if attr.adapter:
-        from .attribute_type import resolve_dtype
-        final_dtype, type_chain, _ = resolve_dtype(f"<{attr.adapter.type_name}>")
-
-        # Parse JSON if final storage is JSON
-        if final_dtype.lower() == "json":
-            data = json.loads(data)
-
-        # Apply type chain in reverse: innermost → outermost
-        for attr_type in reversed(type_chain):
-            data = attr_type.decode(data, key=None)
-
-        return data
-```
-
----
-
-## Phase 5: Garbage Collection ✅
-
-**Status**: Complete
-
-### Implemented in `src/datajoint/gc.py`:
-
-```python
-import datajoint as dj
-
-# Scan schemas and find orphaned content/objects
-stats = dj.gc.scan(schema1, schema2, store_name='mystore')
-
-# Remove orphaned content/objects (dry_run=False to actually delete)
-stats = dj.gc.collect(schema1, schema2, store_name='mystore', dry_run=True)
-
-# Format statistics for display
-print(dj.gc.format_stats(stats))
-```
-
-**Supported storage patterns:**
-
-1. **Content-Addressed Storage** (`<content>`, `<xblob>`, `<xattach>`):
-   - Stored at: `_content/{hash[:2]}/{hash[2:4]}/{hash}`
-   - Referenced by SHA256 hash in JSON metadata
-
-2. **Path-Addressed Storage** (`<object>`):
-   - Stored at: `{schema}/{table}/objects/{pk}/{field}_{token}/`
-   - Referenced by path in JSON metadata
-
-**Key functions:**
-- `scan_references(*schemas, store_name=None)` - Scan tables for content hashes
-- `scan_object_references(*schemas, store_name=None)` - Scan tables for object paths
-- `list_stored_content(store_name=None)` - List all content in `_content/` directory
-- `list_stored_objects(store_name=None)` - List all objects in `*/objects/` directories
-- `scan(*schemas, store_name=None)` - Find orphaned content/objects without deleting
-- `collect(*schemas, store_name=None, dry_run=True)` - Remove orphaned content/objects
-- `delete_object(path, store_name=None)` - Delete an object directory
-- `format_stats(stats)` - Human-readable statistics output
-
-**GC Process:**
-1. Scan all tables in provided schemas for content-type and object-type attributes
-2. Extract content hashes and object paths from JSON metadata columns
-3. Scan storage for all stored content (`_content/`) and objects (`*/objects/`)
-4. Compute orphaned = stored - referenced (for both types)
-5. Optionally delete orphaned items (when `dry_run=False`)
-
----
-
-## Phase 6: Documentation and Testing ✅
-
-**Status**: Complete
-
-### Test files created:
-- `tests/test_content_storage.py` - Content-addressed storage functions
-- `tests/test_type_composition.py` - Type chain encoding/decoding
-- `tests/test_gc.py` - Garbage collection
-- `tests/test_attribute_type.py` - AttributeType registry and DJBlobType (existing)
-
----
-
-## Critical Files Summary
-
-| File | Status | Changes |
-|------|--------|---------|
-| `src/datajoint/declare.py` | ✅ | CORE_TYPES, type parsing, SQL generation |
-| `src/datajoint/heading.py` | ✅ | Simplified attribute properties |
-| `src/datajoint/attribute_type.py` | ✅ | Base class, registry, type chain resolution |
-| `src/datajoint/builtin_types.py` | ✅ | DJBlobType, ContentType, XBlobType, ObjectType |
-| `src/datajoint/content_registry.py` | ✅ | Content storage functions (put, get, delete) |
-| `src/datajoint/objectref.py` | ✅ | ObjectRef handle for lazy access |
-| `src/datajoint/storage.py` | ✅ | StorageBackend, build_object_path |
-| `src/datajoint/staged_insert.py` | ✅ | Staged insert for direct object storage writes |
-| `src/datajoint/table.py` | ✅ | Type chain encoding on insert |
-| `src/datajoint/fetch.py` | ✅ | Type chain decoding on fetch |
-| `src/datajoint/blob.py` | ✅ | Removed bypass_serialization |
-| `src/datajoint/gc.py` | ✅ | Garbage collection for content storage |
-| `tests/test_content_storage.py` | ✅ | Tests for content_registry.py |
-| `tests/test_type_composition.py` | ✅ | Tests for type chain encoding/decoding |
-| `tests/test_gc.py` | ✅ | Tests for garbage collection |
-
----
-
-## Removed/Deprecated
-
-- `src/datajoint/attribute_adapter.py` - Deleted (hard deprecated)
-- `bypass_serialization` flag in `blob.py` - Removed
-- `database` field in Attribute - Removed (unused)
-- `SERIALIZED_TYPES`, `BINARY_TYPES`, `EXTERNAL_TYPES` - Removed
-- `is_attachment`, `is_filepath`, `is_object`, `is_external` flags - Removed
-
----
-
-## Architecture Summary
-
-```
-Layer 3: AttributeTypes (user-facing)
-         <djblob>, <object>, <content>, <xblob>, <attach>, <xattach>, <filepath@store>
-         ↓ encode() / ↑ decode()
-
-Layer 2: Core DataJoint Types
-         float32, int64, uuid, json, blob, varchar(n), etc.
-         ↓ SQL mapping
-
-Layer 1: Native Database Types
-         FLOAT, BIGINT, BINARY(16), JSON, LONGBLOB, VARCHAR(n), etc.
-```
-
-**Built-in AttributeTypes:**
-```
-<djblob>   → longblob (internal serialized storage)
-<attach>   → longblob (internal file attachment)
-<object>   → json     (path-addressed, for Zarr/HDF5/folders)
-<filepath> → json     (reference to existing file in store)
-<content>  → json     (content-addressed with deduplication)
-<xblob>    → <content> → json (external serialized with dedup)
-<xattach>  → <content> → json (external file attachment with dedup)
-```
-
-**Type Composition Example:**
-```
-<xblob> → <content> → json (in DB)
-
-Insert: Python object → blob.pack() → put_content() → JSON metadata
-Fetch:  JSON metadata → get_content() → blob.unpack() → Python object
-```
diff --git a/docs/src/design/tables/storage-types-spec.md b/docs/src/design/tables/storage-types-spec.md
index 668fdfdf..f7aead7d 100644
--- a/docs/src/design/tables/storage-types-spec.md
+++ b/docs/src/design/tables/storage-types-spec.md
@@ -6,20 +6,20 @@ This document defines a three-layer type architecture:
 
 1. **Native database types** - Backend-specific (`FLOAT`, `TINYINT UNSIGNED`, `LONGBLOB`). Discouraged for direct use.
 2. **Core DataJoint types** - Standardized across backends, scientist-friendly (`float32`, `uint8`, `bool`, `json`).
-3. **AttributeTypes** - Programmatic types with `encode()`/`decode()` semantics. Composable.
+3. **Codec Types** - Programmatic types with `encode()`/`decode()` semantics. Composable.
 
 ```
 ┌───────────────────────────────────────────────────────────────────┐
-│                     AttributeTypes (Layer 3)                       │
+│                      Codec Types (Layer 3)                         │
 │                                                                    │
-│  Built-in:  <djblob>  <object>  <content>  <filepath@s>  <xblob>  │
+│  Built-in:  <blob>  <attach>  <object@>  <hash@>  <filepath@>     │
 │  User:      <custom>  <mytype>   ...                               │
 ├───────────────────────────────────────────────────────────────────┤
 │                 Core DataJoint Types (Layer 2)                     │
 │                                                                    │
 │  float32  float64  int64  uint64  int32  uint32  int16  uint16    │
-│  int8  uint8  bool  uuid  json  blob  date  datetime              │
-│  char(n)  varchar(n)  enum(...)                                    │
+│  int8  uint8  bool  uuid  json  bytes  date  datetime  text       │
+│  char(n)  varchar(n)  enum(...)  decimal(n,f)                      │
 ├───────────────────────────────────────────────────────────────────┤
 │               Native Database Types (Layer 1)                      │
 │                                                                    │
@@ -31,14 +31,15 @@ This document defines a three-layer type architecture:
 
 **Syntax distinction:**
 - Core types: `int32`, `float64`, `varchar(255)` - no brackets
-- AttributeTypes: `<object>`, `<djblob>`, `<filepath@main>` - angle brackets
+- Codec types: `<blob>`, `<object@store>`, `<filepath@main>` - angle brackets
+- The `@` character indicates external storage (object store vs database)
 
 ### OAS Storage Regions
 
 | Region | Path Pattern | Addressing | Use Case |
 |--------|--------------|------------|----------|
 | Object | `{schema}/{table}/{pk}/` | Primary key | Large objects, Zarr, HDF5 |
-| Content | `_content/{hash}` | Content hash | Deduplicated blobs/files |
+| Hash | `_hash/{hash}` | MD5 hash | Deduplicated blobs/files |
 
 ### External References
 
@@ -54,70 +55,300 @@ MySQL and PostgreSQL backends. Users should prefer these over native database ty
 
 ### Numeric Types
 
-| Core Type | Description | MySQL |
-|-----------|-------------|-------|
-| `int8` | 8-bit signed | `TINYINT` |
-| `int16` | 16-bit signed | `SMALLINT` |
-| `int32` | 32-bit signed | `INT` |
-| `int64` | 64-bit signed | `BIGINT` |
-| `uint8` | 8-bit unsigned | `TINYINT UNSIGNED` |
-| `uint16` | 16-bit unsigned | `SMALLINT UNSIGNED` |
-| `uint32` | 32-bit unsigned | `INT UNSIGNED` |
-| `uint64` | 64-bit unsigned | `BIGINT UNSIGNED` |
-| `float32` | 32-bit float | `FLOAT` |
-| `float64` | 64-bit float | `DOUBLE` |
+| Core Type | Description | MySQL | PostgreSQL |
+|-----------|-------------|-------|------------|
+| `int8` | 8-bit signed | `TINYINT` | `SMALLINT` |
+| `int16` | 16-bit signed | `SMALLINT` | `SMALLINT` |
+| `int32` | 32-bit signed | `INT` | `INTEGER` |
+| `int64` | 64-bit signed | `BIGINT` | `BIGINT` |
+| `uint8` | 8-bit unsigned | `TINYINT UNSIGNED` | `SMALLINT` |
+| `uint16` | 16-bit unsigned | `SMALLINT UNSIGNED` | `INTEGER` |
+| `uint32` | 32-bit unsigned | `INT UNSIGNED` | `BIGINT` |
+| `uint64` | 64-bit unsigned | `BIGINT UNSIGNED` | `NUMERIC(20)` |
+| `float32` | 32-bit float | `FLOAT` | `REAL` |
+| `float64` | 64-bit float | `DOUBLE` | `DOUBLE PRECISION` |
+| `decimal(n,f)` | Fixed-point | `DECIMAL(n,f)` | `NUMERIC(n,f)` |
 
 ### String Types
 
-| Core Type | Description | MySQL |
-|-----------|-------------|-------|
-| `char(n)` | Fixed-length | `CHAR(n)` |
-| `varchar(n)` | Variable-length | `VARCHAR(n)` |
+| Core Type | Description | MySQL | PostgreSQL |
+|-----------|-------------|-------|------------|
+| `char(n)` | Fixed-length | `CHAR(n)` | `CHAR(n)` |
+| `varchar(n)` | Variable-length | `VARCHAR(n)` | `VARCHAR(n)` |
+| `text` | Unlimited text | `TEXT` | `TEXT` |
+
+**Encoding:** All strings use UTF-8 (`utf8mb4` in MySQL, `UTF8` in PostgreSQL).
+See [Encoding and Collation Policy](#encoding-and-collation-policy) for details.
 
 ### Boolean
 
-| Core Type | Description | MySQL |
-|-----------|-------------|-------|
-| `bool` | True/False | `TINYINT` |
+| Core Type | Description | MySQL | PostgreSQL |
+|-----------|-------------|-------|------------|
+| `bool` | True/False | `TINYINT` | `BOOLEAN` |
 
 ### Date/Time Types
 
-| Core Type | Description | MySQL |
-|-----------|-------------|-------|
-| `date` | Date only | `DATE` |
-| `datetime` | Date and time | `DATETIME` |
+| Core Type | Description | MySQL | PostgreSQL |
+|-----------|-------------|-------|------------|
+| `date` | Date only | `DATE` | `DATE` |
+| `datetime` | Date and time | `DATETIME` | `TIMESTAMP` |
+
+**Timezone policy:** All `datetime` values should be stored as **UTC**. Timezone conversion is a
+presentation concern handled by the application layer, not the database. This ensures:
+- Reproducible computations regardless of server or client timezone settings
+- Simple arithmetic on temporal values (no DST ambiguity)
+- Portable data across systems and regions
+
+Use `CURRENT_TIMESTAMP` for auto-populated creation times:
+```
+created_at : datetime = CURRENT_TIMESTAMP
+```
 
 ### Binary Types
 
-The core `blob` type stores raw bytes without any serialization. Use `<djblob>` AttributeType
+The core `bytes` type stores raw bytes without any serialization. Use the `<blob>` codec
 for serialized Python objects.
 
-| Core Type | Description | MySQL |
-|-----------|-------------|-------|
-| `blob` | Raw bytes | `LONGBLOB` |
+| Core Type | Description | MySQL | PostgreSQL |
+|-----------|-------------|-------|------------|
+| `bytes` | Raw bytes | `LONGBLOB` | `BYTEA` |
 
 ### Other Types
 
-| Core Type | Description | MySQL |
-|-----------|-------------|-------|
-| `json` | JSON document | `JSON` |
-| `uuid` | UUID | `BINARY(16)` |
-| `enum(...)` | Enumeration | `ENUM(...)` |
+| Core Type | Description | MySQL | PostgreSQL |
+|-----------|-------------|-------|------------|
+| `json` | JSON document | `JSON` | `JSONB` |
+| `uuid` | UUID | `BINARY(16)` | `UUID` |
+| `enum(...)` | Enumeration | `ENUM(...)` | `CREATE TYPE ... AS ENUM` |
 
 ### Native Passthrough Types
 
-Users may use native database types directly (e.g., `text`, `mediumint auto_increment`),
+Users may use native database types directly (e.g., `mediumint`, `tinyblob`),
 but these will generate a warning about non-standard usage. Native types are not recorded
 in field comments and may have portability issues across database backends.
 
-## AttributeTypes (Layer 3)
+### Type Modifiers Policy
+
+DataJoint table definitions have their own syntax for constraints and metadata. SQL type
+modifiers are **not allowed** in type specifications because they conflict with DataJoint's
+declarative syntax:
+
+| Modifier | Status | DataJoint Alternative |
+|----------|--------|----------------------|
+| `NOT NULL` / `NULL` | ❌ Not allowed | Use `= NULL` for nullable; omit default for required |
+| `DEFAULT value` | ❌ Not allowed | Use `= value` syntax before the type |
+| `PRIMARY KEY` | ❌ Not allowed | Position above `---` line |
+| `UNIQUE` | ❌ Not allowed | Use DataJoint index syntax |
+| `COMMENT 'text'` | ❌ Not allowed | Use `# comment` syntax |
+| `CHARACTER SET` | ❌ Not allowed | Database-level configuration |
+| `COLLATE` | ❌ Not allowed | Database-level configuration |
+| `AUTO_INCREMENT` | ⚠️ Discouraged | Allowed with native types only, generates warning |
+| `UNSIGNED` | ✅ Allowed | Part of type semantics (use `uint*` core types) |
+
+**Nullability and defaults:** DataJoint handles nullability through the default value syntax.
+An attribute is nullable if and only if its default is `NULL`:
+
+```
+# Required (NOT NULL, no default)
+name : varchar(100)
+
+# Nullable (default is NULL)
+nickname = NULL : varchar(100)
+
+# Required with default value
+status = "active" : varchar(20)
+```
+
+**Auto-increment policy:** DataJoint discourages `AUTO_INCREMENT` / `SERIAL` because:
+- Breaks reproducibility (IDs depend on insertion order)
+- Makes pipelines non-deterministic
+- Complicates data migration and replication
+- Primary keys should be meaningful, not arbitrary
+
+If required, use native types: `int auto_increment` or `serial` (with warning).
+
+### Encoding and Collation Policy
+
+Character encoding and collation are **database-level configuration**, not part of type
+definitions. This ensures consistent behavior across all tables and simplifies portability.
 
-AttributeTypes provide `encode()`/`decode()` semantics on top of core types. They are
+**Configuration** (in `dj.config` or `datajoint.json`):
+```json
+{
+    "database.charset": "utf8mb4",
+    "database.collation": "utf8mb4_bin"
+}
+```
+
+**Defaults:**
+
+| Setting | MySQL | PostgreSQL |
+|---------|-------|------------|
+| Charset | `utf8mb4` | `UTF8` |
+| Collation | `utf8mb4_bin` | `C` |
+
+**Policy:**
+- **UTF-8 required**: DataJoint validates charset is UTF-8 compatible at connection time
+- **Case-sensitive by default**: Binary collation (`utf8mb4_bin` / `C`) ensures predictable comparisons
+- **No per-column overrides**: `CHARACTER SET` and `COLLATE` are rejected in type definitions
+- **Like timezone**: Encoding is infrastructure configuration, not part of the data model
+
+## Codec Types (Layer 3)
+
+Codec types provide `encode()`/`decode()` semantics on top of core types. They are
 composable and can be built-in or user-defined.
 
-### `<object>` / `<object@store>` - Path-Addressed Storage
+### Storage Mode: `@` Convention
+
+The `@` character in codec syntax indicates **external storage** (object store):
+
+- **No `@`**: Internal storage (database) - e.g., `<blob>`, `<attach>`
+- **`@` present**: External storage (object store) - e.g., `<blob@>`, `<attach@store>`
+- **`@` alone**: Use default store - e.g., `<blob@>`
+- **`@name`**: Use named store - e.g., `<blob@cold>`
+
+Some codecs support both modes (`<blob>`, `<attach>`), others are external-only (`<object@>`, `<hash@>`, `<filepath@>`).
+
+### Codec Base Class
+
+Codecs auto-register when subclassed using Python's `__init_subclass__` mechanism.
+No decorator is needed.
+
+```python
+from abc import ABC, abstractmethod
+from typing import Any
+
+# Global codec registry
+_codec_registry: dict[str, "Codec"] = {}
+
+
+class Codec(ABC):
+    """
+    Base class for codec types. Subclasses auto-register by name.
+
+    Requires Python 3.10+.
+    """
+    name: str | None = None  # Must be set by concrete subclasses
+
+    def __init_subclass__(cls, *, register: bool = True, **kwargs):
+        """Auto-register concrete codecs when subclassed."""
+        super().__init_subclass__(**kwargs)
+
+        if not register:
+            return  # Skip registration for abstract bases
+
+        if cls.name is None:
+            return  # Skip registration if no name (abstract)
+
+        if cls.name in _codec_registry:
+            existing = _codec_registry[cls.name]
+            if type(existing) is not cls:
+                raise DataJointError(
+                    f"Codec <{cls.name}> already registered by {type(existing).__name__}"
+                )
+            return  # Same class, idempotent
+
+        _codec_registry[cls.name] = cls()
+
+    def get_dtype(self, is_external: bool) -> str:
+        """
+        Return the storage dtype for this codec.
+
+        Args:
+            is_external: True if @ modifier present (external storage)
+
+        Returns:
+            A core type (e.g., "bytes", "json") or another codec (e.g., "<hash>")
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> Any:
+        """Encode Python value for storage."""
+        ...
+
+    @abstractmethod
+    def decode(self, stored: Any, *, key: dict | None = None) -> Any:
+        """Decode stored value back to Python."""
+        ...
+
+    def validate(self, value: Any) -> None:
+        """Optional validation before encoding. Override to add constraints."""
+        pass
+
+
+def list_codecs() -> list[str]:
+    """Return list of registered codec names."""
+    return sorted(_codec_registry.keys())
+
+
+def get_codec(name: str) -> Codec:
+    """Get codec by name. Raises DataJointError if not found."""
+    if name not in _codec_registry:
+        raise DataJointError(f"Unknown codec: <{name}>")
+    return _codec_registry[name]
+```
+
+**Usage - no decorator needed:**
+
+```python
+class GraphCodec(dj.Codec):
+    """Auto-registered as <graph>."""
+    name = "graph"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<blob>"
+
+    def encode(self, graph, *, key=None, store_name=None):
+        return {'nodes': list(graph.nodes()), 'edges': list(graph.edges())}
+
+    def decode(self, stored, *, key=None):
+        import networkx as nx
+        G = nx.Graph()
+        G.add_nodes_from(stored['nodes'])
+        G.add_edges_from(stored['edges'])
+        return G
+```
+
+**Skip registration for abstract bases:**
+
+```python
+class ExternalOnlyCodec(dj.Codec, register=False):
+    """Abstract base for external-only codecs. Not registered."""
+
+    def get_dtype(self, is_external: bool) -> str:
+        if not is_external:
+            raise DataJointError(f"<{self.name}> requires @ (external only)")
+        return "json"
+```
+
+### Codec Resolution and Chaining
+
+Codecs resolve to core types through chaining. The `get_dtype(is_external)` method
+returns the appropriate dtype based on storage mode:
+
+```
+Resolution at declaration time:
 
-**Built-in AttributeType.** OAS (Object-Augmented Schema) storage:
+<blob>         → get_dtype(False) → "bytes"     → LONGBLOB/BYTEA
+<blob@>        → get_dtype(True)  → "<hash>" → json → JSON/JSONB
+<blob@cold>    → get_dtype(True)  → "<hash>" → json (store=cold)
+
+<attach>       → get_dtype(False) → "bytes"     → LONGBLOB/BYTEA
+<attach@>      → get_dtype(True)  → "<hash>" → json → JSON/JSONB
+
+<object@>      → get_dtype(True)  → "json"      → JSON/JSONB
+<object>       → get_dtype(False) → ERROR (external only)
+
+<hash@>     → get_dtype(True)  → "json"      → JSON/JSONB
+<filepath@s>   → get_dtype(True)  → "json"      → JSON/JSONB
+```
+
+### `<object@>` / `<object@store>` - Path-Addressed Storage
+
+**Built-in codec. External only.**
+
+OAS (Object-Augmented Schema) storage for files and folders:
 
 - Path derived from primary key: `{schema}/{table}/{pk}/{attribute}/`
 - One-to-one relationship with table row
@@ -131,7 +362,7 @@ class Analysis(dj.Computed):
     definition = """
     -> Recording
     ---
-    results : <object>          # default store
+    results : <object@>         # default store
     archive : <object@cold>     # specific store
     """
 ```
@@ -139,35 +370,34 @@ class Analysis(dj.Computed):
 #### Implementation
 
 ```python
-class ObjectType(AttributeType):
-    """Built-in AttributeType for path-addressed OAS storage."""
-    type_name = "object"
-    dtype = "json"
+class ObjectCodec(dj.Codec):
+    """Path-addressed OAS storage. External only."""
+    name = "object"
+
+    def get_dtype(self, is_external: bool) -> str:
+        if not is_external:
+            raise DataJointError("<object> requires @ (external storage only)")
+        return "json"
 
     def encode(self, value, *, key=None, store_name=None) -> dict:
         store = get_store(store_name or dj.config['stores']['default'])
         path = self._compute_path(key)  # {schema}/{table}/{pk}/{attr}/
         store.put(path, value)
-        return {
-            "path": path,
-            "store": store_name,
-            # Additional metadata (size, timestamps, etc.)
-        }
+        return {"path": path, "store": store_name, ...}
 
     def decode(self, stored: dict, *, key=None) -> ObjectRef:
-        return ObjectRef(
-            store=get_store(stored["store"]),
-            path=stored["path"]
-        )
+        return ObjectRef(store=get_store(stored["store"]), path=stored["path"])
 ```
 
-### `<content>` / `<content@store>` - Content-Addressed Storage
+### `<hash@>` / `<hash@store>` - Hash-Addressed Storage
 
-**Built-in AttributeType.** Content-addressed storage with deduplication:
+**Built-in codec. External only.**
+
+Hash-addressed storage with deduplication:
 
 - **Single blob only**: stores a single file or serialized object (not folders)
 - **Per-project scope**: content is shared across all schemas in a project (not per-schema)
-- Path derived from content hash: `_content/{hash[:2]}/{hash[2:4]}/{hash}`
+- Path derived from content hash: `_hash/{hash[:2]}/{hash[2:4]}/{hash}`
 - Many-to-one: multiple rows (even across schemas) can reference same content
 - Reference counted for garbage collection
 - Deduplication: identical content stored once across the entire project
@@ -179,48 +409,44 @@ store_root/
 ├── {schema}/{table}/{pk}/     # object storage (path-addressed by PK)
 │   └── {attribute}/
 │
-└── _content/                   # content storage (content-addressed)
+└── _hash/                   # content storage (hash-addressed)
     └── {hash[:2]}/{hash[2:4]}/{hash}
 ```
 
 #### Implementation
 
 ```python
-class ContentType(AttributeType):
-    """Built-in AttributeType for content-addressed storage."""
-    type_name = "content"
-    dtype = "json"
+class HashCodec(dj.Codec):
+    """Hash-addressed storage. External only."""
+    name = "hash"
+
+    def get_dtype(self, is_external: bool) -> str:
+        if not is_external:
+            raise DataJointError("<hash> requires @ (external storage only)")
+        return "json"
 
     def encode(self, data: bytes, *, key=None, store_name=None) -> dict:
         """Store content, return metadata as JSON."""
-        content_hash = hashlib.sha256(data).hexdigest()
+        hash_id = hashlib.md5(data).hexdigest()  # 32-char hex
         store = get_store(store_name or dj.config['stores']['default'])
-        path = f"_content/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
+        path = f"_hash/{hash_id[:2]}/{hash_id[2:4]}/{hash_id}"
 
         if not store.exists(path):
             store.put(path, data)
-            ContentRegistry().insert1({
-                'content_hash': content_hash,
-                'store': store_name,
-                'size': len(data)
-            }, skip_duplicates=True)
-
-        return {
-            "hash": content_hash,
-            "store": store_name,
-            "size": len(data)
-        }
+
+        # Metadata stored in JSON column (no separate registry)
+        return {"hash": hash_id, "store": store_name, "size": len(data)}
 
     def decode(self, stored: dict, *, key=None) -> bytes:
         """Retrieve content by hash."""
         store = get_store(stored["store"])
-        path = f"_content/{stored['hash'][:2]}/{stored['hash'][2:4]}/{stored['hash']}"
+        path = f"_hash/{stored['hash'][:2]}/{stored['hash'][2:4]}/{stored['hash']}"
         return store.get(path)
 ```
 
 #### Database Column
 
-The `<content>` type stores JSON metadata:
+The `<hash@>` type stores JSON metadata:
 
 ```sql
 -- content column (MySQL)
@@ -233,7 +459,9 @@ features JSONB NOT NULL
 
 ### `<filepath@store>` - Portable External Reference
 
-**Built-in AttributeType.** Relative path references within configured stores:
+**Built-in codec. External only (store required).**
+
+Relative path references within configured stores:
 
 - **Relative paths**: paths within a configured store (portable across environments)
 - **Store-aware**: resolves paths against configured store backend
@@ -282,32 +510,23 @@ just use `varchar`. A string is simpler and more transparent.
 #### Implementation
 
 ```python
-class FilepathType(AttributeType):
-    """Built-in AttributeType for store-relative file references."""
-    type_name = "filepath"
-    dtype = "json"
+class FilepathCodec(dj.Codec):
+    """Store-relative file references. External only."""
+    name = "filepath"
 
-    def encode(self, relative_path: str, *, key=None, store_name=None,
-               compute_checksum: bool = False) -> dict:
+    def get_dtype(self, is_external: bool) -> str:
+        if not is_external:
+            raise DataJointError("<filepath> requires @store")
+        return "json"
+
+    def encode(self, relative_path: str, *, key=None, store_name=None) -> dict:
         """Register reference to file in store."""
         store = get_store(store_name)  # store_name required for filepath
-        metadata = {'path': relative_path, 'store': store_name}
-
-        if compute_checksum:
-            full_path = store.resolve(relative_path)
-            if store.exists(full_path):
-                metadata['checksum'] = compute_file_checksum(store, full_path)
-                metadata['size'] = store.size(full_path)
-
-        return metadata
+        return {'path': relative_path, 'store': store_name}
 
     def decode(self, stored: dict, *, key=None) -> ObjectRef:
         """Return ObjectRef for lazy access."""
-        return ObjectRef(
-            store=get_store(stored['store']),
-            path=stored['path'],
-            checksum=stored.get('checksum')  # optional verification
-        )
+        return ObjectRef(store=get_store(stored['store']), path=stored['path'])
 ```
 
 #### Database Column
@@ -346,69 +565,33 @@ column_name JSONB NOT NULL
 ```
 
 The `json` database type:
-- Used as dtype by built-in AttributeTypes (`<object>`, `<content>`, `<filepath@store>`)
+- Used as dtype by built-in codecs (`<object@>`, `<hash@>`, `<filepath@store>`)
 - Stores arbitrary JSON-serializable data
 - Automatically uses appropriate type for database backend
 - Supports JSON path queries where available
 
-## Parameterized AttributeTypes
-
-AttributeTypes can be parameterized with `<type@param>` syntax. The parameter specifies
-which store to use:
+## Built-in Codecs
 
-```python
-class AttributeType:
-    type_name: str      # Name used in <brackets> or as bare type
-    dtype: str          # Database type or built-in AttributeType
-
-    # When user writes type_name@param, resolved store becomes param
-```
-
-**Resolution examples:**
-```
-<xblob>        → uses <content> type   → default store
-<xblob@cold>   → uses <content> type   → cold store
-<djblob>       → dtype = "longblob"    → database (no store)
-<object@cold>  → uses <object> type    → cold store
-```
+### `<blob>` / `<blob@>` - Serialized Python Objects
 
-AttributeTypes can use other AttributeTypes as their dtype (composition):
-- `<xblob>` uses `<content>` - adds djblob serialization on top of content-addressed storage
-- `<xattach>` uses `<content>` - adds filename preservation on top of content-addressed storage
+**Supports both internal and external storage.**
 
-## User-Defined AttributeTypes
+Serializes Python objects (NumPy arrays, dicts, lists, etc.) using DataJoint's
+blob format. Compatible with MATLAB.
 
-### `<djblob>` - Internal Serialized Blob
-
-Serialized Python object stored in database.
+- **`<blob>`**: Stored in database (`bytes` → `LONGBLOB`/`BYTEA`)
+- **`<blob@>`**: Stored externally via `<hash@>` with deduplication
+- **`<blob@store>`**: Stored in specific named store
 
 ```python
-@dj.register_type
-class DJBlobType(AttributeType):
-    type_name = "djblob"
-    dtype = "longblob"  # MySQL type
-
-    def encode(self, value, *, key=None) -> bytes:
-        from . import blob
-        return blob.pack(value, compress=True)
-
-    def decode(self, stored, *, key=None) -> Any:
-        from . import blob
-        return blob.unpack(stored)
-```
-
-### `<xblob>` / `<xblob@store>` - External Serialized Blob
+class BlobCodec(dj.Codec):
+    """Serialized Python objects. Supports internal and external."""
+    name = "blob"
 
-Serialized Python object stored in content-addressed storage.
+    def get_dtype(self, is_external: bool) -> str:
+        return "<hash>" if is_external else "bytes"
 
-```python
-@dj.register_type
-class XBlobType(AttributeType):
-    type_name = "xblob"
-    dtype = "content"  # Core type - uses default store
-    # dtype = "content@store" for specific store
-
-    def encode(self, value, *, key=None) -> bytes:
+    def encode(self, value, *, key=None, store_name=None) -> bytes:
         from . import blob
         return blob.pack(value, compress=True)
 
@@ -423,55 +606,38 @@ class ProcessedData(dj.Computed):
     definition = """
     -> RawData
     ---
-    small_result : <djblob>        # internal (in database)
-    large_result : <xblob>         # external (default store)
-    archive_result : <xblob@cold>  # external (specific store)
+    small_result : <blob>          # internal (in database)
+    large_result : <blob@>         # external (default store)
+    archive_result : <blob@cold>   # external (specific store)
     """
 ```
 
-### `<attach>` - Internal File Attachment
-
-File stored in database with filename preserved.
-
-```python
-@dj.register_type
-class AttachType(AttributeType):
-    type_name = "attach"
-    dtype = "longblob"
-
-    def encode(self, filepath, *, key=None) -> bytes:
-        path = Path(filepath)
-        return path.name.encode() + b"\0" + path.read_bytes()
+### `<attach>` / `<attach@>` - File Attachments
 
-    def decode(self, stored, *, key=None) -> str:
-        filename, contents = stored.split(b"\0", 1)
-        filename = filename.decode()
-        download_path = Path(dj.config['download_path']) / filename
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        download_path.write_bytes(contents)
-        return str(download_path)
-```
+**Supports both internal and external storage.**
 
-### `<xattach>` / `<xattach@store>` - External File Attachment
+Stores files with filename preserved. On fetch, extracts to configured download path.
 
-File stored in content-addressed storage with filename preserved.
+- **`<attach>`**: Stored in database (`bytes` → `LONGBLOB`/`BYTEA`)
+- **`<attach@>`**: Stored externally via `<hash@>` with deduplication
+- **`<attach@store>`**: Stored in specific named store
 
 ```python
-@dj.register_type
-class XAttachType(AttributeType):
-    type_name = "xattach"
-    dtype = "content"  # Core type
+class AttachCodec(dj.Codec):
+    """File attachment with filename. Supports internal and external."""
+    name = "attach"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<hash>" if is_external else "bytes"
 
-    def encode(self, filepath, *, key=None) -> bytes:
+    def encode(self, filepath, *, key=None, store_name=None) -> bytes:
         path = Path(filepath)
-        # Include filename in stored data
         return path.name.encode() + b"\0" + path.read_bytes()
 
     def decode(self, stored, *, key=None) -> str:
         filename, contents = stored.split(b"\0", 1)
         filename = filename.decode()
         download_path = Path(dj.config['download_path']) / filename
-        download_path.parent.mkdir(parents=True, exist_ok=True)
         download_path.write_bytes(contents)
         return str(download_path)
 ```
@@ -480,93 +646,121 @@ Usage:
 ```python
 class Attachments(dj.Manual):
     definition = """
-    attachment_id : int
+    attachment_id : int32
     ---
     config : <attach>           # internal (small file in DB)
-    data_file : <xattach>       # external (default store)
-    archive : <xattach@cold>    # external (specific store)
+    data_file : <attach@>       # external (default store)
+    archive : <attach@cold>     # external (specific store)
     """
 ```
 
-## Storage Comparison
+## User-Defined Codecs
 
-| Type | dtype | Storage Location | Dedup | Returns |
-|------|-------|------------------|-------|---------|
-| `<object>` | `json` | `{schema}/{table}/{pk}/` | No | ObjectRef |
-| `<object@s>` | `json` | `{schema}/{table}/{pk}/` | No | ObjectRef |
-| `<content>` | `json` | `_content/{hash}` | Yes | bytes |
-| `<content@s>` | `json` | `_content/{hash}` | Yes | bytes |
-| `<filepath@s>` | `json` | Configured store (relative path) | No | ObjectRef |
-| `<djblob>` | `longblob` | Database | No | Python object |
-| `<xblob>` | `<content>` | `_content/{hash}` | Yes | Python object |
-| `<xblob@s>` | `<content@s>` | `_content/{hash}` | Yes | Python object |
-| `<attach>` | `longblob` | Database | No | Local file path |
-| `<xattach>` | `<content>` | `_content/{hash}` | Yes | Local file path |
-| `<xattach@s>` | `<content@s>` | `_content/{hash}` | Yes | Local file path |
-
-## Reference Counting for Content Type
-
-The `ContentRegistry` is a **project-level** table that tracks content-addressed objects
-across all schemas. This differs from the legacy `~external_*` tables which were per-schema.
+Users can define custom codecs for domain-specific data:
 
 ```python
-class ContentRegistry:
-    """
-    Project-level content registry.
-    Stored in a designated database (e.g., `{project}_content`).
-    """
-    definition = """
-    # Content-addressed object registry (project-wide)
-    content_hash : char(64)          # SHA256 hex
-    ---
-    store        : varchar(64)       # Store name
-    size         : bigint unsigned   # Size in bytes
-    created      : timestamp DEFAULT CURRENT_TIMESTAMP
-    """
+class GraphCodec(dj.Codec):
+    """Store NetworkX graphs. Internal only (no external support)."""
+    name = "graph"
+
+    def get_dtype(self, is_external: bool) -> str:
+        if is_external:
+            raise DataJointError("<graph> does not support external storage")
+        return "<blob>"  # Chain to blob for serialization
+
+    def encode(self, graph, *, key=None, store_name=None):
+        return {'nodes': list(graph.nodes()), 'edges': list(graph.edges())}
+
+    def decode(self, stored, *, key=None):
+        import networkx as nx
+        G = nx.Graph()
+        G.add_nodes_from(stored['nodes'])
+        G.add_edges_from(stored['edges'])
+        return G
+```
+
+Custom codecs can support both modes by returning different dtypes:
+
+```python
+class ImageCodec(dj.Codec):
+    """Store images. Supports both internal and external."""
+    name = "image"
+
+    def get_dtype(self, is_external: bool) -> str:
+        return "<hash>" if is_external else "bytes"
+
+    def encode(self, image, *, key=None, store_name=None) -> bytes:
+        # Convert PIL Image to PNG bytes
+        buffer = io.BytesIO()
+        image.save(buffer, format='PNG')
+        return buffer.getvalue()
+
+    def decode(self, stored: bytes, *, key=None):
+        return PIL.Image.open(io.BytesIO(stored))
 ```
 
-Garbage collection scans **all schemas** in the project:
+## Storage Comparison
+
+| Type | get_dtype | Resolves To | Storage Location | Dedup | Returns |
+|------|-----------|-------------|------------------|-------|---------|
+| `<blob>` | `bytes` | `LONGBLOB`/`BYTEA` | Database | No | Python object |
+| `<blob@>` | `<hash>` | `json` | `_hash/{hash}` | Yes | Python object |
+| `<blob@s>` | `<hash>` | `json` | `_hash/{hash}` | Yes | Python object |
+| `<attach>` | `bytes` | `LONGBLOB`/`BYTEA` | Database | No | Local file path |
+| `<attach@>` | `<hash>` | `json` | `_hash/{hash}` | Yes | Local file path |
+| `<attach@s>` | `<hash>` | `json` | `_hash/{hash}` | Yes | Local file path |
+| `<object@>` | `json` | `JSON`/`JSONB` | `{schema}/{table}/{pk}/` | No | ObjectRef |
+| `<object@s>` | `json` | `JSON`/`JSONB` | `{schema}/{table}/{pk}/` | No | ObjectRef |
+| `<hash@>` | `json` | `JSON`/`JSONB` | `_hash/{hash}` | Yes | bytes |
+| `<hash@s>` | `json` | `JSON`/`JSONB` | `_hash/{hash}` | Yes | bytes |
+| `<filepath@s>` | `json` | `JSON`/`JSONB` | Configured store | No | ObjectRef |
+
+## Garbage Collection for Hash Storage
+
+Hash metadata (hash, store, size) is stored directly in each table's JSON column - no separate
+registry table is needed. Garbage collection scans all tables to find referenced hashes:
 
 ```python
-def garbage_collect(project):
-    """Remove content not referenced by any table in any schema."""
-    # Get all registered hashes
-    registered = set(ContentRegistry().fetch('content_hash', 'store'))
+def garbage_collect(store_name):
+    """Remove hash-addressed data not referenced by any table."""
+    # Scan store for all hash files
+    store = get_store(store_name)
+    all_hashes = set(store.list_hashes())  # from _hash/ directory
 
-    # Get all referenced hashes from ALL schemas in the project
+    # Scan all tables for referenced hashes
     referenced = set()
     for schema in project.schemas:
         for table in schema.tables:
             for attr in table.heading.attributes:
-                if attr.type in ('content', 'content@...'):
-                    hashes = table.fetch(attr.name)
-                    referenced.update((h, attr.store) for h in hashes)
-
-    # Delete orphaned content
-    for content_hash, store in (registered - referenced):
-        store_backend = get_store(store)
-        store_backend.delete(content_path(content_hash))
-        (ContentRegistry() & {'content_hash': content_hash}).delete()
+                if uses_hash_storage(attr):  # <blob@>, <attach@>, <hash@>
+                    for row in table.fetch(attr.name):
+                        if row and row.get('store') == store_name:
+                            referenced.add(row['hash'])
+
+    # Delete orphaned files
+    for hash_id in (all_hashes - referenced):
+        store.delete(hash_path(hash_id))
 ```
 
-## Built-in AttributeType Comparison
+## Built-in Codec Comparison
 
-| Feature | `<object>` | `<content>` | `<filepath@store>` |
-|---------|------------|-------------|---------------------|
-| dtype | `json` | `json` | `json` |
-| Location | OAS store | OAS store | Configured store |
-| Addressing | Primary key | Content hash | Relative path |
-| Path control | DataJoint | DataJoint | User |
-| Deduplication | No | Yes | No |
-| Structure | Files, folders, Zarr | Single blob only | Any (via fsspec) |
-| Access | ObjectRef (lazy) | Transparent (bytes) | ObjectRef (lazy) |
-| GC | Deleted with row | Reference counted | N/A (user managed) |
-| Integrity | DataJoint managed | DataJoint managed | User managed |
+| Feature | `<blob>` | `<attach>` | `<object@>` | `<hash@>` | `<filepath@>` |
+|---------|----------|------------|-------------|--------------|---------------|
+| Storage modes | Both | Both | External only | External only | External only |
+| Internal dtype | `bytes` | `bytes` | N/A | N/A | N/A |
+| External dtype | `<hash>` | `<hash>` | `json` | `json` | `json` |
+| Addressing | Hash | Hash | Primary key | Hash | Relative path |
+| Deduplication | Yes (external) | Yes (external) | No | Yes | No |
+| Structure | Single blob | Single file | Files, folders | Single blob | Any |
+| Returns | Python object | Local path | ObjectRef | bytes | ObjectRef |
+| GC | Ref counted | Ref counted | With row | Ref counted | User managed |
 
 **When to use each:**
-- **`<object>`**: Large/complex objects where DataJoint controls organization (Zarr, HDF5)
-- **`<content>`**: Deduplicated serialized data or file attachments via `<xblob>`, `<xattach>`
-- **`<filepath@store>`**: Portable references to files in configured stores
+- **`<blob>`**: Serialized Python objects (NumPy arrays, dicts). Use `<blob@>` for large/duplicated data
+- **`<attach>`**: File attachments with filename preserved. Use `<attach@>` for large files
+- **`<object@>`**: Large/complex file structures (Zarr, HDF5) where DataJoint controls organization
+- **`<hash@>`**: Raw bytes with deduplication (typically used via `<blob@>` or `<attach@>`)
+- **`<filepath@store>`**: Portable references to externally-managed files
 - **`varchar`**: Arbitrary URLs/paths where ObjectRef semantics aren't needed
 
 ## Key Design Decisions
@@ -574,51 +768,88 @@ def garbage_collect(project):
 1. **Three-layer architecture**:
    - Layer 1: Native database types (backend-specific, discouraged)
    - Layer 2: Core DataJoint types (standardized, scientist-friendly)
-   - Layer 3: AttributeTypes (encode/decode, composable)
-2. **Core types are scientist-friendly**: `float32`, `uint8`, `bool` instead of `FLOAT`, `TINYINT UNSIGNED`, `TINYINT(1)`
-3. **AttributeTypes use angle brackets**: `<object>`, `<djblob>`, `<filepath@store>` - distinguishes from core types
-4. **AttributeTypes are composable**: `<xblob>` uses `<content>`, which uses `json`
-5. **Built-in AttributeTypes use JSON dtype**: Stores metadata (path, hash, store name, etc.)
-6. **Two OAS regions**: object (PK-addressed) and content (hash-addressed) within managed stores
-7. **Filepath for portability**: `<filepath@store>` uses relative paths within stores for environment portability
-8. **No `uri` type**: For arbitrary URLs, use `varchar`—simpler and more transparent
-9. **Content type**: Single-blob, content-addressed, deduplicated storage
-10. **Parameterized types**: `<type@param>` passes store parameter
-11. **Naming convention**:
-    - `<djblob>` = internal serialized (database)
-    - `<xblob>` = external serialized (content-addressed)
-    - `<attach>` = internal file (single file)
-    - `<xattach>` = external file (single file)
-12. **Transparent access**: AttributeTypes return Python objects or file paths
-13. **Lazy access**: `<object>`, `<object@store>`, and `<filepath@store>` return ObjectRef
+   - Layer 3: Codec types (encode/decode, composable)
+2. **Core types are scientist-friendly**: `float32`, `uint8`, `bool`, `bytes` instead of `FLOAT`, `TINYINT UNSIGNED`, `LONGBLOB`
+3. **Codecs use angle brackets**: `<blob>`, `<object@store>`, `<filepath@main>` - distinguishes from core types
+4. **`@` indicates external storage**: No `@` = database, `@` present = object store
+5. **`get_dtype(is_external)` method**: Codecs resolve dtype at declaration time based on storage mode
+6. **Codecs are composable**: `<blob@>` uses `<hash@>`, which uses `json`
+7. **Built-in external codecs use JSON dtype**: Stores metadata (path, hash, store name, etc.)
+8. **Two OAS regions**: object (PK-addressed) and hash (hash-addressed) within managed stores
+9. **Filepath for portability**: `<filepath@store>` uses relative paths within stores for environment portability
+10. **No `uri` type**: For arbitrary URLs, use `varchar`—simpler and more transparent
+11. **Naming conventions**:
+    - `@` = external storage (object store)
+    - No `@` = internal storage (database)
+    - `@` alone = default store
+    - `@name` = named store
+12. **Dual-mode codecs**: `<blob>` and `<attach>` support both internal and external storage
+13. **External-only codecs**: `<object@>`, `<hash@>`, `<filepath@>` require `@`
+14. **Transparent access**: Codecs return Python objects or file paths
+15. **Lazy access**: `<object@>` and `<filepath@store>` return ObjectRef
+16. **MD5 for content hashing**: See [Hash Algorithm Choice](#hash-algorithm-choice) below
+17. **No separate registry**: Hash metadata stored in JSON columns, not a separate table
+18. **Auto-registration via `__init_subclass__`**: Codecs register automatically when subclassed—no decorator needed. Use `register=False` for abstract bases. Requires Python 3.10+.
+
+### Hash Algorithm Choice
+
+Content-addressed storage uses **MD5** (128-bit, 32-char hex) rather than SHA256 (256-bit, 64-char hex).
+
+**Rationale:**
+
+1. **Practical collision resistance is sufficient**: The birthday bound for MD5 is ~2^64 operations
+   before 50% collision probability. No scientific project will store anywhere near 10^19 files.
+   For content deduplication (not cryptographic verification), MD5 provides adequate uniqueness.
+
+2. **Storage efficiency**: 32-char hashes vs 64-char hashes in every JSON metadata field.
+   With millions of records, this halves the storage overhead for hash identifiers.
+
+3. **Performance**: MD5 is ~2-3x faster than SHA256 for large files. While both are fast,
+   the difference is measurable when hashing large scientific datasets.
+
+4. **Legacy compatibility**: DataJoint's existing `uuid_from_buffer()` function uses MD5.
+   The new system changes only the storage format (hex string in JSON vs binary UUID),
+   not the underlying hash algorithm. This simplifies migration.
+
+5. **Consistency with existing codebase**: The `dj.hash` module already uses MD5 for
+   `key_hash()` (job reservation) and `uuid_from_buffer()` (query caching).
+
+**Why not SHA256?**
+
+SHA256 is the modern standard for content-addressable storage (Git, Docker, IPFS). However:
+- These systems prioritize cryptographic security against adversarial collision attacks
+- Scientific data pipelines face no adversarial threat model
+- The practical benefits (storage, speed, compatibility) outweigh theoretical security gains
+
+**Note**: If cryptographic verification is ever needed (e.g., for compliance or reproducibility
+audits), SHA256 checksums can be computed on-demand without changing the storage addressing scheme.
 
 ## Migration from Legacy Types
 
 | Legacy | New Equivalent |
 |--------|----------------|
-| `longblob` (auto-serialized) | `<djblob>` |
-| `blob@store` | `<xblob@store>` |
+| `longblob` (auto-serialized) | `<blob>` |
+| `blob@store` | `<blob@store>` |
 | `attach` | `<attach>` |
-| `attach@store` | `<xattach@store>` |
-| `filepath@store` (copy-based) | `filepath@store` (ObjectRef-based, upgraded) |
+| `attach@store` | `<attach@store>` |
+| `filepath@store` (copy-based) | `<filepath@store>` (ObjectRef-based) |
 
 ### Migration from Legacy `~external_*` Stores
 
-Legacy external storage used per-schema `~external_{store}` tables. Migration to the new
-per-project `ContentRegistry` requires:
+Legacy external storage used per-schema `~external_{store}` tables with UUID references.
+Migration to the new JSON-based hash storage requires:
 
 ```python
 def migrate_external_store(schema, store_name):
     """
-    Migrate legacy ~external_{store} to new ContentRegistry.
+    Migrate legacy ~external_{store} to new HashRegistry.
 
     1. Read all entries from ~external_{store}
     2. For each entry:
        - Fetch content from legacy location
-       - Compute SHA256 hash
-       - Copy to _content/{hash}/ if not exists
-       - Update table column from UUID to hash
-       - Register in ContentRegistry
+       - Compute MD5 hash
+       - Copy to _hash/{hash}/ if not exists
+       - Update table column to new hash format
     3. After all schemas migrated, drop ~external_{store} tables
     """
     external_table = schema.external[store_name]
@@ -630,22 +861,16 @@ def migrate_external_store(schema, store_name):
         content = external_table.get(legacy_uuid)
 
         # Compute new content hash
-        content_hash = hashlib.sha256(content).hexdigest()
+        hash_id = hashlib.md5(content).hexdigest()
 
         # Store in new location if not exists
-        new_path = f"_content/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
+        new_path = f"_hash/{hash_id[:2]}/{hash_id[2:4]}/{hash_id}"
         store = get_store(store_name)
         if not store.exists(new_path):
             store.put(new_path, content)
 
-        # Register in project-wide ContentRegistry
-        ContentRegistry().insert1({
-            'content_hash': content_hash,
-            'store': store_name,
-            'size': len(content)
-        }, skip_duplicates=True)
-
-        # Update referencing tables (UUID -> hash)
+        # Update referencing tables: convert UUID column to JSON with hash metadata
+        # The JSON column stores {"hash": hash_id, "store": store_name, "size": len(content)}
         # ... update all tables that reference this UUID ...
 
     # After migration complete for all schemas:
@@ -653,13 +878,13 @@ def migrate_external_store(schema, store_name):
 ```
 
 **Migration considerations:**
-- Legacy UUIDs were based on content hash but stored as `binary(16)`
-- New system uses `char(64)` SHA256 hex strings
+- Legacy UUIDs were based on MD5 content hash stored as `binary(16)` (UUID format)
+- New system uses `char(32)` MD5 hex strings stored in JSON
+- The hash algorithm is unchanged (MD5), only the storage format differs
 - Migration can be done incrementally per schema
 - Backward compatibility layer can read both formats during transition
 
 ## Open Questions
 
-1. Should `content` without `@store` use a default store, or require explicit store?
-2. Should we support `<xblob>` without `@store` syntax (implying default store)?
-3. How long should the backward compatibility layer support legacy `~external_*` format?
+1. How long should the backward compatibility layer support legacy `~external_*` format?
+2. Should `<hash@>` (without store name) use a default store or require explicit store name?
diff --git a/pyproject.toml b/pyproject.toml
index 82cad39e..154a4039 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,9 +84,9 @@ datajoint = "datajoint.cli:cli"
 test = [
   "pytest",
   "pytest-cov",
-  "pytest-env",
   "requests",
   "graphviz",
+  "testcontainers[mysql,minio]>=4.0",
 ]
 
 [project.optional-dependencies]
@@ -96,9 +96,9 @@ azure = ["adlfs>=2023.1.0"]
 test = [
   "pytest",
   "pytest-cov",
-  "pytest-env",
   "requests",
   "s3fs>=2023.1.0",
+  "testcontainers[mysql,minio]>=4.0",
 ]
 dev = [
   "pre-commit",
@@ -158,20 +158,11 @@ skip = ".git,*.pdf,*.svg,*.csv,*.ipynb,*.drawio"
 # astroid -- Python library name (not "asteroid")
 ignore-words-list = "rever,numer,astroid"
 
-[tool.pytest_env]
-# Default environment variables for tests (D: prefix = only set if not defined)
-# These defaults work for local development with `docker compose up -d db minio`
-# For devcontainer/docker: override DJ_HOST=db and S3_ENDPOINT=minio:9000
-"D:DJ_HOST" = "localhost"
-"D:DJ_PORT" = "3306"
-"D:DJ_USER" = "root"
-"D:DJ_PASS" = "password"
-"D:DJ_TEST_USER" = "datajoint"
-"D:DJ_TEST_PASSWORD" = "datajoint"
-"D:S3_ENDPOINT" = "localhost:9000"
-"D:S3_ACCESS_KEY" = "datajoint"
-"D:S3_SECRET_KEY" = "datajoint"
-"D:S3_BUCKET" = "datajoint.test"
+[tool.pytest.ini_options]
+markers = [
+    "requires_mysql: marks tests as requiring MySQL database (deselect with '-m \"not requires_mysql\"')",
+    "requires_minio: marks tests as requiring MinIO object storage (deselect with '-m \"not requires_minio\"')",
+]
 
 
 [tool.pixi.workspace]
@@ -187,12 +178,13 @@ dev = { features = ["dev"], solve-group = "default" }
 test = { features = ["test"], solve-group = "default" }
 
 [tool.pixi.tasks]
-# Start required services (MySQL and MinIO)
+# Tests use testcontainers - no manual setup required
+test = "pytest tests/"
+test-cov = "pytest --cov-report term-missing --cov=datajoint tests/"
+# Optional: use external containers (docker-compose) instead of testcontainers
 services-up = "docker compose up -d db minio"
 services-down = "docker compose down"
-# Run tests (requires services to be running, uses localhost defaults from pytest_env)
-test = { cmd = "pytest tests/", depends-on = ["services-up"] }
-test-cov = { cmd = "pytest --cov-report term-missing --cov=datajoint tests/", depends-on = ["services-up"] }
+test-external = { cmd = "DJ_USE_EXTERNAL_CONTAINERS=1 pytest tests/", depends-on = ["services-up"] }
 
 [tool.pixi.dependencies]
 python = ">=3.10,<3.14"
diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py
index a19aae6d..684ffd08 100644
--- a/src/datajoint/__init__.py
+++ b/src/datajoint/__init__.py
@@ -45,9 +45,10 @@
     "kill",
     "MatCell",
     "MatStruct",
-    "AttributeType",
-    "register_type",
-    "list_types",
+    # Codec API
+    "Codec",
+    "list_codecs",
+    "get_codec",
     "errors",
     "migrate",
     "DataJointError",
@@ -61,7 +62,11 @@
 from . import errors
 from . import migrate
 from .admin import kill
-from .attribute_type import AttributeType, list_types, register_type
+from .codecs import (
+    Codec,
+    get_codec,
+    list_codecs,
+)
 from .blob import MatCell, MatStruct
 from .cli import cli
 from .connection import Connection, conn
diff --git a/src/datajoint/attribute_type.py b/src/datajoint/attribute_type.py
deleted file mode 100644
index 37fae88c..00000000
--- a/src/datajoint/attribute_type.py
+++ /dev/null
@@ -1,497 +0,0 @@
-"""
-Custom attribute type system for DataJoint.
-
-This module provides the AttributeType base class and registration mechanism
-for creating custom data types that extend DataJoint's native type system.
-
-Custom types enable seamless integration of complex Python objects (like NumPy arrays,
-graphs, or domain-specific structures) with DataJoint's relational storage.
-
-Example:
-    @dj.register_type
-    class GraphType(dj.AttributeType):
-        type_name = "graph"
-        dtype = "longblob"
-
-        def encode(self, graph: nx.Graph) -> list:
-            return list(graph.edges)
-
-        def decode(self, edges: list) -> nx.Graph:
-            return nx.Graph(edges)
-
-    # Then use in table definitions:
-    class MyTable(dj.Manual):
-        definition = '''
-        id : int
-        ---
-        data : <graph>
-        '''
-"""
-
-from __future__ import annotations
-
-import logging
-from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any
-
-from .errors import DataJointError
-
-if TYPE_CHECKING:
-    pass
-
-logger = logging.getLogger(__name__.split(".")[0])
-
-# Global type registry - maps type_name to AttributeType instance
-_type_registry: dict[str, AttributeType] = {}
-_entry_points_loaded: bool = False
-
-
-class AttributeType(ABC):
-    """
-    Base class for custom DataJoint attribute types.
-
-    Subclass this to create custom types that can be used in table definitions
-    with the ``<type_name>`` syntax. Custom types define bidirectional conversion
-    between Python objects and DataJoint's storage format.
-
-    Attributes:
-        type_name: Unique identifier used in ``<type_name>`` syntax
-        dtype: Underlying DataJoint storage type
-
-    Example:
-        @dj.register_type
-        class GraphType(dj.AttributeType):
-            type_name = "graph"
-            dtype = "longblob"
-
-            def encode(self, graph):
-                return list(graph.edges)
-
-            def decode(self, edges):
-                import networkx as nx
-                return nx.Graph(edges)
-
-    The type can then be used in table definitions::
-
-        class Connectivity(dj.Manual):
-            definition = '''
-            id : int
-            ---
-            graph_data : <graph>
-            '''
-    """
-
-    @property
-    @abstractmethod
-    def type_name(self) -> str:
-        """
-        Unique identifier for this type, used in table definitions as ``<type_name>``.
-
-        This name must be unique across all registered types. It should be lowercase
-        with underscores (e.g., "graph", "zarr_array", "compressed_image").
-
-        Returns:
-            The type name string without angle brackets.
-        """
-        ...
-
-    @property
-    @abstractmethod
-    def dtype(self) -> str:
-        """
-        The underlying DataJoint type used for storage.
-
-        Can be:
-            - A native type: ``"longblob"``, ``"blob"``, ``"varchar(255)"``, ``"int"``, ``"json"``
-            - An external type: ``"blob@store"``, ``"attach@store"``
-            - The object type: ``"object"``
-            - Another custom type: ``"<other_type>"`` (enables type chaining)
-
-        Returns:
-            The storage type specification string.
-        """
-        ...
-
-    @abstractmethod
-    def encode(self, value: Any, *, key: dict | None = None) -> Any:
-        """
-        Convert a Python object to the storable format.
-
-        Called during INSERT operations to transform user-provided objects
-        into a format suitable for storage in the underlying ``dtype``.
-
-        Args:
-            value: The Python object to store.
-            key: Primary key values as a dict. Available when the dtype uses
-                 object storage and may be needed for path construction.
-
-        Returns:
-            Value in the format expected by ``dtype``. For example:
-                - For ``dtype="longblob"``: any picklable Python object
-                - For ``dtype="object"``: path string or file-like object
-                - For ``dtype="varchar(N)"``: string
-        """
-        ...
-
-    @abstractmethod
-    def decode(self, stored: Any, *, key: dict | None = None) -> Any:
-        """
-        Convert stored data back to a Python object.
-
-        Called during FETCH operations to reconstruct the original Python
-        object from the stored format.
-
-        Args:
-            stored: Data retrieved from storage. Type depends on ``dtype``:
-                - For ``"object"``: an ``ObjectRef`` handle
-                - For blob types: the unpacked Python object
-                - For native types: the native Python value (str, int, etc.)
-            key: Primary key values as a dict.
-
-        Returns:
-            The reconstructed Python object.
-        """
-        ...
-
-    def validate(self, value: Any) -> None:
-        """
-        Validate a value before encoding.
-
-        Override this method to add type checking or domain constraints.
-        Called automatically before ``encode()`` during INSERT operations.
-        The default implementation accepts any value.
-
-        Args:
-            value: The value to validate.
-
-        Raises:
-            TypeError: If the value has an incompatible type.
-            ValueError: If the value fails domain validation.
-        """
-        pass
-
-    def default(self) -> Any:
-        """
-        Return a default value for this type.
-
-        Override if the type has a sensible default value. The default
-        implementation raises NotImplementedError, indicating no default exists.
-
-        Returns:
-            The default value for this type.
-
-        Raises:
-            NotImplementedError: If no default exists (the default behavior).
-        """
-        raise NotImplementedError(f"No default value for type <{self.type_name}>")
-
-    def __repr__(self) -> str:
-        return f"<{self.__class__.__name__}(type_name={self.type_name!r}, dtype={self.dtype!r})>"
-
-
-def register_type(cls: type[AttributeType]) -> type[AttributeType]:
-    """
-    Register a custom attribute type with DataJoint.
-
-    Can be used as a decorator or called directly. The type becomes available
-    for use in table definitions with the ``<type_name>`` syntax.
-
-    Args:
-        cls: An AttributeType subclass to register.
-
-    Returns:
-        The same class, unmodified (allows use as decorator).
-
-    Raises:
-        DataJointError: If a type with the same name is already registered
-            by a different class.
-        TypeError: If cls is not an AttributeType subclass.
-
-    Example:
-        As a decorator::
-
-            @dj.register_type
-            class GraphType(dj.AttributeType):
-                type_name = "graph"
-                ...
-
-        Or called directly::
-
-            dj.register_type(GraphType)
-    """
-    if not isinstance(cls, type) or not issubclass(cls, AttributeType):
-        raise TypeError(f"register_type requires an AttributeType subclass, got {cls!r}")
-
-    instance = cls()
-    name = instance.type_name
-
-    if not isinstance(name, str) or not name:
-        raise DataJointError(f"type_name must be a non-empty string, got {name!r}")
-
-    if name in _type_registry:
-        existing = _type_registry[name]
-        if type(existing) is not cls:
-            raise DataJointError(
-                f"Type <{name}> is already registered by " f"{type(existing).__module__}.{type(existing).__name__}"
-            )
-        # Same class registered twice - idempotent, no error
-        return cls
-
-    _type_registry[name] = instance
-    logger.debug(f"Registered attribute type <{name}> from {cls.__module__}.{cls.__name__}")
-    return cls
-
-
-def parse_type_spec(spec: str) -> tuple[str, str | None]:
-    """
-    Parse a type specification into type name and optional store parameter.
-
-    Handles formats like:
-    - "<xblob>" -> ("xblob", None)
-    - "<xblob@cold>" -> ("xblob", "cold")
-    - "xblob@cold" -> ("xblob", "cold")
-    - "xblob" -> ("xblob", None)
-
-    Args:
-        spec: Type specification string, with or without angle brackets.
-
-    Returns:
-        Tuple of (type_name, store_name). store_name is None if not specified.
-    """
-    # Strip angle brackets
-    spec = spec.strip("<>").strip()
-
-    if "@" in spec:
-        type_name, store_name = spec.split("@", 1)
-        return type_name.strip(), store_name.strip()
-
-    return spec, None
-
-
-def unregister_type(name: str) -> None:
-    """
-    Remove a type from the registry.
-
-    Primarily useful for testing. Use with caution in production code.
-
-    Args:
-        name: The type_name to unregister.
-
-    Raises:
-        DataJointError: If the type is not registered.
-    """
-    name = name.strip("<>")
-    if name not in _type_registry:
-        raise DataJointError(f"Type <{name}> is not registered")
-    del _type_registry[name]
-
-
-def get_type(name: str) -> AttributeType:
-    """
-    Retrieve a registered attribute type by name.
-
-    Looks up the type in the explicit registry first, then attempts
-    to load from installed packages via entry points.
-
-    Args:
-        name: The type name, with or without angle brackets.
-              Store parameters (e.g., "<xblob@cold>") are stripped.
-
-    Returns:
-        The registered AttributeType instance.
-
-    Raises:
-        DataJointError: If the type is not found.
-    """
-    # Strip angle brackets and store parameter
-    type_name, _ = parse_type_spec(name)
-
-    # Check explicit registry first
-    if type_name in _type_registry:
-        return _type_registry[type_name]
-
-    # Lazy-load entry points
-    _load_entry_points()
-
-    if type_name in _type_registry:
-        return _type_registry[type_name]
-
-    raise DataJointError(
-        f"Unknown attribute type: <{type_name}>. "
-        f"Ensure the type is registered via @dj.register_type or installed as a package."
-    )
-
-
-def list_types() -> list[str]:
-    """
-    List all registered type names.
-
-    Returns:
-        Sorted list of registered type names.
-    """
-    _load_entry_points()
-    return sorted(_type_registry.keys())
-
-
-def is_type_registered(name: str) -> bool:
-    """
-    Check if a type name is registered.
-
-    Args:
-        name: The type name to check (store parameters are ignored).
-
-    Returns:
-        True if the type is registered.
-    """
-    type_name, _ = parse_type_spec(name)
-    if type_name in _type_registry:
-        return True
-    _load_entry_points()
-    return type_name in _type_registry
-
-
-def _load_entry_points() -> None:
-    """
-    Load attribute types from installed packages via entry points.
-
-    Types are discovered from the ``datajoint.types`` entry point group.
-    Packages declare types in pyproject.toml::
-
-        [project.entry-points."datajoint.types"]
-        zarr_array = "dj_zarr:ZarrArrayType"
-
-    This function is idempotent - entry points are only loaded once.
-    """
-    global _entry_points_loaded
-    if _entry_points_loaded:
-        return
-
-    _entry_points_loaded = True
-
-    try:
-        from importlib.metadata import entry_points
-    except ImportError:
-        # Python < 3.10 fallback
-        try:
-            from importlib_metadata import entry_points
-        except ImportError:
-            logger.debug("importlib.metadata not available, skipping entry point discovery")
-            return
-
-    try:
-        # Python 3.10+ / importlib_metadata 3.6+
-        eps = entry_points(group="datajoint.types")
-    except TypeError:
-        # Older API
-        eps = entry_points().get("datajoint.types", [])
-
-    for ep in eps:
-        if ep.name in _type_registry:
-            # Already registered explicitly, skip entry point
-            continue
-        try:
-            type_class = ep.load()
-            register_type(type_class)
-            logger.debug(f"Loaded attribute type <{ep.name}> from entry point {ep.value}")
-        except Exception as e:
-            logger.warning(f"Failed to load attribute type '{ep.name}' from {ep.value}: {e}")
-
-
-def resolve_dtype(
-    dtype: str, seen: set[str] | None = None, store_name: str | None = None
-) -> tuple[str, list[AttributeType], str | None]:
-    """
-    Resolve a dtype string, following type chains.
-
-    If dtype references another custom type (e.g., "<other_type>"), recursively
-    resolves to find the ultimate storage type. Store parameters are propagated
-    through the chain.
-
-    Args:
-        dtype: The dtype string to resolve (e.g., "<xblob>", "<xblob@cold>", "longblob").
-        seen: Set of already-seen type names (for cycle detection).
-        store_name: Store name from outer type specification (propagated inward).
-
-    Returns:
-        Tuple of (final_storage_type, list_of_types_in_chain, resolved_store_name).
-        The chain is ordered from outermost to innermost type.
-
-    Raises:
-        DataJointError: If a circular type reference is detected.
-
-    Examples:
-        >>> resolve_dtype("<xblob>")
-        ("json", [XBlobType, ContentType], None)
-
-        >>> resolve_dtype("<xblob@cold>")
-        ("json", [XBlobType, ContentType], "cold")
-
-        >>> resolve_dtype("longblob")
-        ("longblob", [], None)
-    """
-    if seen is None:
-        seen = set()
-
-    chain: list[AttributeType] = []
-
-    # Check if dtype is a custom type reference
-    if dtype.startswith("<") and dtype.endswith(">"):
-        type_name, dtype_store = parse_type_spec(dtype)
-
-        # Store from this level overrides inherited store
-        effective_store = dtype_store if dtype_store is not None else store_name
-
-        if type_name in seen:
-            raise DataJointError(f"Circular type reference detected: <{type_name}>")
-
-        seen.add(type_name)
-        attr_type = get_type(type_name)
-        chain.append(attr_type)
-
-        # Recursively resolve the inner dtype, propagating store
-        inner_dtype, inner_chain, resolved_store = resolve_dtype(attr_type.dtype, seen, effective_store)
-        chain.extend(inner_chain)
-        return inner_dtype, chain, resolved_store
-
-    # Not a custom type - check if it has a store suffix (e.g., "blob@store")
-    if "@" in dtype:
-        base_type, dtype_store = dtype.split("@", 1)
-        effective_store = dtype_store if dtype_store else store_name
-        return base_type, chain, effective_store
-
-    # Plain type - return as-is with propagated store
-    return dtype, chain, store_name
-
-
-def get_adapter(context: dict | None, adapter_name: str) -> tuple[AttributeType, str | None]:
-    """
-    Get an attribute type by name.
-
-    This is a compatibility function used by heading and declare modules.
-
-    Args:
-        context: Ignored (legacy parameter, kept for API compatibility).
-        adapter_name: The type name, with or without angle brackets.
-                      May include store parameter (e.g., "<xblob@cold>").
-
-    Returns:
-        Tuple of (AttributeType instance, store_name or None).
-
-    Raises:
-        DataJointError: If the type is not found.
-    """
-    type_name, store_name = parse_type_spec(adapter_name)
-
-    if is_type_registered(type_name):
-        return get_type(type_name), store_name
-
-    raise DataJointError(f"Attribute type <{type_name}> is not registered. " "Use @dj.register_type to register custom types.")
-
-
-# =============================================================================
-# Auto-register built-in types
-# =============================================================================
-
-# Import builtin_types module to register built-in types (DJBlobType, ContentType, etc.)
-# This import has a side effect: it registers the types via @register_type decorators
-from . import builtin_types as _builtin_types  # noqa: F401, E402
diff --git a/src/datajoint/builtin_types.py b/src/datajoint/builtin_codecs.py
similarity index 64%
rename from src/datajoint/builtin_types.py
rename to src/datajoint/builtin_codecs.py
index 3c1654a6..a55494e8 100644
--- a/src/datajoint/builtin_types.py
+++ b/src/datajoint/builtin_codecs.py
@@ -1,31 +1,30 @@
 """
-Built-in DataJoint attribute types.
+Built-in DataJoint codecs.
 
-This module defines the standard AttributeTypes that ship with DataJoint.
-These serve as both useful built-in types and as examples for users who
-want to create their own custom types.
+This module defines the standard codecs that ship with DataJoint.
+These serve as both useful built-in codecs and as examples for users who
+want to create their own custom codecs.
 
-Built-in Types:
-    - ``<djblob>``: Serialize Python objects to DataJoint's blob format (internal storage)
-    - ``<content>``: Content-addressed storage with SHA256 deduplication
-    - ``<xblob>``: External serialized blobs using content-addressed storage
+Built-in Codecs:
+    - ``<blob>``: Serialize Python objects (internal) or external with dedup
+    - ``<hash>``: Hash-addressed storage with MD5 deduplication
     - ``<object>``: Path-addressed storage for files/folders (Zarr, HDF5)
-    - ``<attach>``: Internal file attachment stored in database
-    - ``<xattach>``: External file attachment with deduplication
+    - ``<attach>``: File attachment (internal) or external with dedup
     - ``<filepath@store>``: Reference to existing file in store
 
-Example - Creating a Custom Type:
-    Here's how to define your own AttributeType, modeled after the built-in types::
+Example - Creating a Custom Codec:
+    Here's how to define your own codec, modeled after the built-in codecs::
 
         import datajoint as dj
         import networkx as nx
 
-        @dj.register_type
-        class GraphType(dj.AttributeType):
+        class GraphCodec(dj.Codec):
             '''Store NetworkX graphs as edge lists.'''
 
-            type_name = "graph"      # Use as <graph> in definitions
-            dtype = "<djblob>"       # Compose with djblob for serialization
+            name = "graph"  # Use as <graph> in definitions
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<blob>"  # Compose with blob for serialization
 
             def encode(self, graph, *, key=None, store_name=None):
                 # Convert graph to a serializable format
@@ -59,22 +58,26 @@ class Networks(dj.Manual):
 
 from typing import Any
 
-from .attribute_type import AttributeType, register_type
+from .codecs import Codec
+from .errors import DataJointError
 
 
 # =============================================================================
-# DJBlob Types - DataJoint's native serialization
+# Blob Codec - DataJoint's native serialization
 # =============================================================================
 
 
-@register_type
-class DJBlobType(AttributeType):
+class BlobCodec(Codec):
     """
     Serialize Python objects using DataJoint's blob format.
 
-    The ``<djblob>`` type handles serialization of arbitrary Python objects
+    The ``<blob>`` codec handles serialization of arbitrary Python objects
     including NumPy arrays, dictionaries, lists, datetime objects, and UUIDs.
-    Data is stored in a MySQL ``LONGBLOB`` column.
+
+    Supports both internal and external storage:
+    - ``<blob>``: Stored in database (bytes → LONGBLOB)
+    - ``<blob@>``: Stored externally via ``<hash@>`` with deduplication
+    - ``<blob@store>``: Stored in specific named store
 
     Format Features:
         - Protocol headers (``mYm`` for MATLAB-compatible, ``dj0`` for Python-native)
@@ -88,19 +91,20 @@ class ProcessedData(dj.Manual):
             definition = '''
             data_id : int
             ---
-            results : <djblob>      # Serialized Python objects
+            small_result : <blob>       # internal (in database)
+            large_result : <blob@>      # external (default store)
+            archive : <blob@cold>       # external (specific store)
             '''
 
         # Insert any serializable object
-        table.insert1({'data_id': 1, 'results': {'scores': [0.9, 0.8], 'labels': ['a', 'b']}})
-
-    Note:
-        Plain ``longblob`` columns store raw bytes without serialization.
-        Use ``<djblob>`` when you need automatic serialization.
+        table.insert1({'data_id': 1, 'small_result': {'scores': [0.9, 0.8]}})
     """
 
-    type_name = "djblob"
-    dtype = "longblob"
+    name = "blob"
+
+    def get_dtype(self, is_external: bool) -> str:
+        """Return bytes for internal, <hash> for external storage."""
+        return "<hash>" if is_external else "bytes"
 
     def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> bytes:
         """Serialize a Python object to DataJoint's blob format."""
@@ -116,22 +120,23 @@ def decode(self, stored: bytes, *, key: dict | None = None) -> Any:
 
 
 # =============================================================================
-# Content-Addressed Storage Types
+# Hash-Addressed Storage Codec
 # =============================================================================
 
 
-@register_type
-class ContentType(AttributeType):
+class HashCodec(Codec):
     """
-    Content-addressed storage with SHA256 deduplication.
+    Hash-addressed storage with MD5 deduplication.
 
-    The ``<content>`` type stores raw bytes using content-addressed storage.
-    Data is identified by its SHA256 hash and stored in a hierarchical directory:
-    ``_content/{hash[:2]}/{hash[2:4]}/{hash}``
+    The ``<hash@>`` codec stores raw bytes using content-addressed storage.
+    Data is identified by its MD5 hash and stored in a hierarchical directory:
+    ``_hash/{hash[:2]}/{hash[2:4]}/{hash}``
 
     The database column stores JSON metadata: ``{hash, store, size}``.
     Duplicate content is automatically deduplicated.
 
+    External only - requires @ modifier.
+
     Example::
 
         @schema
@@ -139,20 +144,24 @@ class RawContent(dj.Manual):
             definition = '''
             content_id : int
             ---
-            data : <content@mystore>
+            data : <hash@mystore>
             '''
 
         # Insert raw bytes
         table.insert1({'content_id': 1, 'data': b'raw binary content'})
 
     Note:
-        This type accepts only ``bytes``. For Python objects, use ``<xblob>``.
-        A store must be specified (e.g., ``<content@store>``) unless a default
-        store is configured.
+        This codec accepts only ``bytes``. For Python objects, use ``<blob@>``.
+        Typically used indirectly via ``<blob@>`` or ``<attach@>`` rather than directly.
     """
 
-    type_name = "content"
-    dtype = "json"
+    name = "hash"
+
+    def get_dtype(self, is_external: bool) -> str:
+        """Hash storage is external only."""
+        if not is_external:
+            raise DataJointError("<hash> requires @ (external storage only)")
+        return "json"
 
     def encode(self, value: bytes, *, key: dict | None = None, store_name: str | None = None) -> dict:
         """
@@ -188,75 +197,22 @@ def decode(self, stored: dict, *, key: dict | None = None) -> bytes:
     def validate(self, value: Any) -> None:
         """Validate that value is bytes."""
         if not isinstance(value, bytes):
-            raise TypeError(f"<content> expects bytes, got {type(value).__name__}")
-
-
-@register_type
-class XBlobType(AttributeType):
-    """
-    External serialized blobs with content-addressed storage.
-
-    The ``<xblob>`` type combines DataJoint's blob serialization with
-    content-addressed storage. Objects are serialized, then stored externally
-    with automatic deduplication.
-
-    This is ideal for large objects (NumPy arrays, DataFrames) that may be
-    duplicated across rows.
-
-    Example::
-
-        @schema
-        class LargeArrays(dj.Manual):
-            definition = '''
-            array_id : int
-            ---
-            data : <xblob@mystore>
-            '''
-
-        import numpy as np
-        table.insert1({'array_id': 1, 'data': np.random.rand(1000, 1000)})
-
-    Type Composition:
-        ``<xblob>`` composes with ``<content>``::
-
-            Insert: object → blob.pack() → put_content() → JSON metadata
-            Fetch:  JSON → get_content() → blob.unpack() → object
-
-    Note:
-        - For internal storage, use ``<djblob>``
-        - For raw bytes without serialization, use ``<content>``
-    """
-
-    type_name = "xblob"
-    dtype = "<content>"  # Composition: uses ContentType
-
-    def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> bytes:
-        """Serialize object to bytes (passed to ContentType)."""
-        from . import blob
-
-        return blob.pack(value, compress=True)
-
-    def decode(self, stored: bytes, *, key: dict | None = None) -> Any:
-        """Deserialize bytes back to Python object."""
-        from . import blob
-
-        return blob.unpack(stored, squeeze=False)
+            raise TypeError(f"<hash> expects bytes, got {type(value).__name__}")
 
 
 # =============================================================================
-# Path-Addressed Storage Types (OAS - Object-Augmented Schema)
+# Path-Addressed Storage Codec (OAS - Object-Augmented Schema)
 # =============================================================================
 
 
-@register_type
-class ObjectType(AttributeType):
+class ObjectCodec(Codec):
     """
     Path-addressed storage for files and folders.
 
-    The ``<object>`` type provides managed file/folder storage where the path
-    is derived from the primary key: ``{schema}/{table}/objects/{pk}/{field}_{token}.{ext}``
+    The ``<object@>`` codec provides managed file/folder storage where the path
+    is derived from the primary key: ``{schema}/{table}/{pk}/{field}/``
 
-    Unlike ``<content>`` (content-addressed), each row has its own storage path,
+    Unlike ``<hash@>`` (hash-addressed), each row has its own storage path,
     and content is deleted when the row is deleted. This is ideal for:
 
     - Zarr arrays (hierarchical chunked data)
@@ -264,6 +220,8 @@ class ObjectType(AttributeType):
     - Complex multi-file outputs
     - Any content that shouldn't be deduplicated
 
+    External only - requires @ modifier.
+
     Example::
 
         @schema
@@ -287,26 +245,25 @@ def make(self, key):
     Storage Structure:
         Objects are stored at::
 
-            {store_root}/{schema}/{table}/objects/{pk}/{field}_{token}.ext
-
-        The token ensures uniqueness even if content is replaced.
+            {store_root}/{schema}/{table}/{pk}/{field}/
 
-    Comparison with ``<content>``::
+    Comparison with ``<hash@>``::
 
-        | Aspect         | <object>          | <content>           |
+        | Aspect         | <object@>         | <hash@>             |
         |----------------|-------------------|---------------------|
         | Addressing     | Path (by PK)      | Hash (by content)   |
         | Deduplication  | No                | Yes                 |
         | Deletion       | With row          | GC when unreferenced|
         | Use case       | Zarr, HDF5        | Blobs, attachments  |
-
-    Note:
-        A store must be specified (``<object@store>``) unless a default store
-        is configured. Returns ``ObjectRef`` on fetch for lazy access.
     """
 
-    type_name = "object"
-    dtype = "json"
+    name = "object"
+
+    def get_dtype(self, is_external: bool) -> str:
+        """Object storage is external only."""
+        if not is_external:
+            raise DataJointError("<object> requires @ (external storage only)")
+        return "json"
 
     def encode(
         self,
@@ -355,7 +312,6 @@ def encode(
         ext = None
         size = None
         item_count = None
-        source_path = None
 
         if isinstance(value, bytes):
             content = value
@@ -371,8 +327,6 @@ def encode(
         elif isinstance(value, (str, Path)):
             source_path = Path(value)
             if not source_path.exists():
-                from .errors import DataJointError
-
                 raise DataJointError(f"Source path not found: {source_path}")
             is_dir = source_path.is_dir()
             ext = source_path.suffix if not is_dir else None
@@ -434,8 +388,8 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
         Returns:
             ObjectRef for accessing the stored content.
         """
-        from .content_registry import get_store_backend
         from .objectref import ObjectRef
+        from .content_registry import get_store_backend
 
         store_name = stored.get("store")
         backend = get_store_backend(store_name)
@@ -460,16 +414,19 @@ def validate(self, value: Any) -> None:
 
 
 # =============================================================================
-# File Attachment Types
+# File Attachment Codecs
 # =============================================================================
 
 
-@register_type
-class AttachType(AttributeType):
+class AttachCodec(Codec):
     """
-    Internal file attachment stored in database.
+    File attachment with filename preserved.
+
+    Supports both internal and external storage:
+    - ``<attach>``: Stored in database (bytes → LONGBLOB)
+    - ``<attach@>``: Stored externally via ``<hash@>`` with deduplication
+    - ``<attach@store>``: Stored in specific named store
 
-    The ``<attach>`` type stores a file directly in the database as a ``LONGBLOB``.
     The filename is preserved and the file is extracted to the configured
     download path on fetch.
 
@@ -480,26 +437,27 @@ class Documents(dj.Manual):
             definition = '''
             doc_id : int
             ---
-            report : <attach>
+            config : <attach>           # internal (small file in DB)
+            dataset : <attach@>         # external (default store)
+            archive : <attach@cold>     # external (specific store)
             '''
 
         # Insert a file
-        table.insert1({'doc_id': 1, 'report': '/path/to/report.pdf'})
+        table.insert1({'doc_id': 1, 'config': '/path/to/config.json'})
 
         # Fetch extracts to download_path and returns local path
-        local_path = (table & 'doc_id=1').fetch1('report')
+        local_path = (table & 'doc_id=1').fetch1('config')
 
-    Storage Format:
+    Storage Format (internal):
         The blob contains: ``filename\\0contents``
         - Filename (UTF-8 encoded) + null byte + raw file contents
-
-    Note:
-        - For large files, use ``<xattach>`` (external storage with deduplication)
-        - For files that shouldn't be copied, use ``<filepath@store>``
     """
 
-    type_name = "attach"
-    dtype = "longblob"
+    name = "attach"
+
+    def get_dtype(self, is_external: bool) -> str:
+        """Return bytes for internal, <hash> for external storage."""
+        return "<hash>" if is_external else "bytes"
 
     def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> bytes:
         """
@@ -576,138 +534,21 @@ def validate(self, value: Any) -> None:
             raise TypeError(f"<attach> expects a file path, got {type(value).__name__}")
 
 
-@register_type
-class XAttachType(AttributeType):
-    """
-    External file attachment with content-addressed storage.
-
-    The ``<xattach>`` type stores files externally using content-addressed
-    storage. Like ``<attach>``, the filename is preserved and the file is
-    extracted on fetch. Unlike ``<attach>``, files are stored externally
-    with automatic deduplication.
-
-    Example::
-
-        @schema
-        class LargeDocuments(dj.Manual):
-            definition = '''
-            doc_id : int
-            ---
-            dataset : <xattach@mystore>
-            '''
-
-        # Insert a large file
-        table.insert1({'doc_id': 1, 'dataset': '/path/to/large_file.h5'})
-
-        # Fetch downloads and returns local path
-        local_path = (table & 'doc_id=1').fetch1('dataset')
-
-    Type Composition:
-        ``<xattach>`` composes with ``<content>``::
-
-            Insert: file → read + encode filename → put_content() → JSON
-            Fetch:  JSON → get_content() → extract → local path
-
-    Comparison::
-
-        | Type       | Storage  | Deduplication | Best for           |
-        |------------|----------|---------------|---------------------|
-        | <attach>   | Database | No            | Small files (<16MB) |
-        | <xattach>  | External | Yes           | Large files         |
-    """
-
-    type_name = "xattach"
-    dtype = "<content>"  # Composition: uses ContentType
-
-    def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> bytes:
-        """
-        Read file and encode as filename + contents.
-
-        Args:
-            value: Path to file (str or Path).
-            key: Primary key values (unused).
-            store_name: Passed to ContentType for storage.
-
-        Returns:
-            Bytes: filename (UTF-8) + null byte + file contents
-        """
-        from pathlib import Path
-
-        path = Path(value)
-        if not path.exists():
-            raise FileNotFoundError(f"Attachment file not found: {path}")
-        if path.is_dir():
-            raise IsADirectoryError(f"<xattach> does not support directories: {path}")
-
-        filename = path.name
-        contents = path.read_bytes()
-        return filename.encode("utf-8") + b"\x00" + contents
-
-    def decode(self, stored: bytes, *, key: dict | None = None) -> str:
-        """
-        Extract file to download path and return local path.
-
-        Args:
-            stored: Bytes containing filename + null + contents.
-            key: Primary key values (unused).
-
-        Returns:
-            Path to extracted file as string.
-        """
-        from pathlib import Path
-
-        from .settings import config
-
-        # Split on first null byte
-        null_pos = stored.index(b"\x00")
-        filename = stored[:null_pos].decode("utf-8")
-        contents = stored[null_pos + 1 :]
-
-        # Write to download path
-        download_path = Path(config.get("download_path", "."))
-        download_path.mkdir(parents=True, exist_ok=True)
-        local_path = download_path / filename
-
-        # Handle filename collision - if file exists with different content, add suffix
-        if local_path.exists():
-            existing_contents = local_path.read_bytes()
-            if existing_contents != contents:
-                # Find unique filename
-                stem = local_path.stem
-                suffix = local_path.suffix
-                counter = 1
-                while local_path.exists() and local_path.read_bytes() != contents:
-                    local_path = download_path / f"{stem}_{counter}{suffix}"
-                    counter += 1
-
-        # Only write if file doesn't exist or has different content
-        if not local_path.exists():
-            local_path.write_bytes(contents)
-
-        return str(local_path)
-
-    def validate(self, value: Any) -> None:
-        """Validate that value is a valid file path."""
-        from pathlib import Path
-
-        if not isinstance(value, (str, Path)):
-            raise TypeError(f"<xattach> expects a file path, got {type(value).__name__}")
-
-
 # =============================================================================
-# Filepath Reference Type
+# Filepath Reference Codec
 # =============================================================================
 
 
-@register_type
-class FilepathType(AttributeType):
+class FilepathCodec(Codec):
     """
     Reference to existing file in configured store.
 
-    The ``<filepath@store>`` type stores a reference to a file that already
-    exists in the storage backend. Unlike ``<attach>`` or ``<object>``, no
+    The ``<filepath@store>`` codec stores a reference to a file that already
+    exists in the storage backend. Unlike ``<attach>`` or ``<object@>``, no
     file copying occurs - only the path is recorded.
 
+    External only - requires @store.
+
     This is useful when:
     - Files are managed externally (e.g., by acquisition software)
     - Files are too large to copy
@@ -739,8 +580,13 @@ class Recordings(dj.Manual):
         DataJoint does not manage the lifecycle of referenced files.
     """
 
-    type_name = "filepath"
-    dtype = "json"
+    name = "filepath"
+
+    def get_dtype(self, is_external: bool) -> str:
+        """Filepath is external only."""
+        if not is_external:
+            raise DataJointError("<filepath> requires @store")
+        return "json"
 
     def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> dict:
         """
@@ -790,8 +636,8 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
         Returns:
             ObjectRef for accessing the file.
         """
-        from .content_registry import get_store_backend
         from .objectref import ObjectRef
+        from .content_registry import get_store_backend
 
         store_name = stored.get("store")
         backend = get_store_backend(store_name)
diff --git a/src/datajoint/codecs.py b/src/datajoint/codecs.py
new file mode 100644
index 00000000..cc592bad
--- /dev/null
+++ b/src/datajoint/codecs.py
@@ -0,0 +1,450 @@
+"""
+Codec type system for DataJoint.
+
+This module provides the Codec base class for creating custom data types
+that extend DataJoint's native type system. Codecs provide encode/decode
+semantics for complex Python objects.
+
+Codecs auto-register when subclassed - no decorator needed (Python 3.10+).
+
+Example:
+    class GraphCodec(dj.Codec):
+        name = "graph"
+
+        def get_dtype(self, is_external: bool) -> str:
+            return "<blob>"
+
+        def encode(self, graph, *, key=None, store_name=None):
+            return {'nodes': list(graph.nodes()), 'edges': list(graph.edges())}
+
+        def decode(self, stored, *, key=None):
+            import networkx as nx
+            G = nx.Graph()
+            G.add_nodes_from(stored['nodes'])
+            G.add_edges_from(stored['edges'])
+            return G
+
+    # Then use in table definitions:
+    class MyTable(dj.Manual):
+        definition = '''
+        id : int
+        ---
+        data : <graph>
+        '''
+"""
+
+from __future__ import annotations
+
+import logging
+from abc import ABC, abstractmethod
+from typing import Any
+
+from .errors import DataJointError
+
+logger = logging.getLogger(__name__.split(".")[0])
+
+# Global codec registry - maps name to Codec instance
+_codec_registry: dict[str, Codec] = {}
+_entry_points_loaded: bool = False
+
+
+class Codec(ABC):
+    """
+    Base class for codec types. Subclasses auto-register by name.
+
+    Requires Python 3.10+.
+
+    Attributes:
+        name: Unique identifier used in ``<name>`` syntax. Must be set by subclasses.
+
+    Example:
+        class GraphCodec(dj.Codec):
+            name = "graph"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<blob>"
+
+            def encode(self, graph, *, key=None, store_name=None):
+                return {'nodes': list(graph.nodes()), 'edges': list(graph.edges())}
+
+            def decode(self, stored, *, key=None):
+                import networkx as nx
+                G = nx.Graph()
+                G.add_nodes_from(stored['nodes'])
+                G.add_edges_from(stored['edges'])
+                return G
+
+    The codec can then be used in table definitions::
+
+        class Connectivity(dj.Manual):
+            definition = '''
+            id : int
+            ---
+            graph_data : <graph>
+            '''
+
+    To skip auto-registration (for abstract base classes)::
+
+        class ExternalOnlyCodec(dj.Codec, register=False):
+            '''Abstract base - not registered.'''
+            ...
+    """
+
+    name: str | None = None  # Must be set by concrete subclasses
+
+    def __init_subclass__(cls, *, register: bool = True, **kwargs):
+        """Auto-register concrete codecs when subclassed."""
+        super().__init_subclass__(**kwargs)
+
+        if not register:
+            return  # Skip registration for abstract bases
+
+        if cls.name is None:
+            return  # Skip registration if no name (abstract)
+
+        if not isinstance(cls.name, str) or not cls.name:
+            raise DataJointError(f"Codec name must be a non-empty string, got {cls.name!r}")
+
+        if cls.name in _codec_registry:
+            existing = _codec_registry[cls.name]
+            if type(existing) is not cls:
+                raise DataJointError(
+                    f"Codec <{cls.name}> already registered by " f"{type(existing).__module__}.{type(existing).__name__}"
+                )
+            return  # Same class, idempotent
+
+        _codec_registry[cls.name] = cls()
+        logger.debug(f"Registered codec <{cls.name}> from {cls.__module__}.{cls.__name__}")
+
+    def get_dtype(self, is_external: bool) -> str:
+        """
+        Return the storage dtype for this codec.
+
+        Args:
+            is_external: True if @ modifier present (external storage)
+
+        Returns:
+            A core type (e.g., "bytes", "json") or another codec (e.g., "<hash>")
+
+        Raises:
+            NotImplementedError: If not overridden by subclass.
+            DataJointError: If external storage not supported but requested.
+        """
+        raise NotImplementedError(f"Codec <{self.name}> must implement get_dtype()")
+
+    @abstractmethod
+    def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> Any:
+        """
+        Encode Python value for storage.
+
+        Args:
+            value: The Python object to store.
+            key: Primary key values as a dict. May be needed for path construction.
+            store_name: Target store name for external storage.
+
+        Returns:
+            Value in the format expected by the dtype.
+        """
+        ...
+
+    @abstractmethod
+    def decode(self, stored: Any, *, key: dict | None = None) -> Any:
+        """
+        Decode stored value back to Python.
+
+        Args:
+            stored: Data retrieved from storage.
+            key: Primary key values as a dict.
+
+        Returns:
+            The reconstructed Python object.
+        """
+        ...
+
+    def validate(self, value: Any) -> None:
+        """
+        Validate a value before encoding.
+
+        Override this method to add type checking or domain constraints.
+        Called automatically before ``encode()`` during INSERT operations.
+        The default implementation accepts any value.
+
+        Args:
+            value: The value to validate.
+
+        Raises:
+            TypeError: If the value has an incompatible type.
+            ValueError: If the value fails domain validation.
+        """
+        pass
+
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__}(name={self.name!r})>"
+
+
+def parse_type_spec(spec: str) -> tuple[str, str | None]:
+    """
+    Parse a type specification into type name and optional store parameter.
+
+    Handles formats like:
+    - "<blob>" -> ("blob", None)
+    - "<blob@cold>" -> ("blob", "cold")
+    - "<blob@>" -> ("blob", "")  # default store
+    - "blob@cold" -> ("blob", "cold")
+    - "blob" -> ("blob", None)
+
+    Args:
+        spec: Type specification string, with or without angle brackets.
+
+    Returns:
+        Tuple of (type_name, store_name). store_name is None if not specified,
+        empty string if @ present without name (default store).
+    """
+    # Strip angle brackets
+    spec = spec.strip("<>").strip()
+
+    if "@" in spec:
+        type_name, store_name = spec.split("@", 1)
+        return type_name.strip(), store_name.strip()
+
+    return spec, None
+
+
+def unregister_codec(name: str) -> None:
+    """
+    Remove a codec from the registry.
+
+    Primarily useful for testing. Use with caution in production code.
+
+    Args:
+        name: The codec name to unregister.
+
+    Raises:
+        DataJointError: If the codec is not registered.
+    """
+    name = name.strip("<>")
+    if name not in _codec_registry:
+        raise DataJointError(f"Codec <{name}> is not registered")
+    del _codec_registry[name]
+
+
+def get_codec(name: str) -> Codec:
+    """
+    Retrieve a registered codec by name.
+
+    Looks up the codec in the explicit registry first, then attempts
+    to load from installed packages via entry points.
+
+    Args:
+        name: The codec name, with or without angle brackets.
+              Store parameters (e.g., "<blob@cold>") are stripped.
+
+    Returns:
+        The registered Codec instance.
+
+    Raises:
+        DataJointError: If the codec is not found.
+    """
+    # Strip angle brackets and store parameter
+    type_name, _ = parse_type_spec(name)
+
+    # Check explicit registry first
+    if type_name in _codec_registry:
+        return _codec_registry[type_name]
+
+    # Lazy-load entry points
+    _load_entry_points()
+
+    if type_name in _codec_registry:
+        return _codec_registry[type_name]
+
+    raise DataJointError(
+        f"Unknown codec: <{type_name}>. " f"Ensure the codec is defined (inherit from dj.Codec with name='{type_name}')."
+    )
+
+
+def list_codecs() -> list[str]:
+    """
+    List all registered codec names.
+
+    Returns:
+        Sorted list of registered codec names.
+    """
+    _load_entry_points()
+    return sorted(_codec_registry.keys())
+
+
+def is_codec_registered(name: str) -> bool:
+    """
+    Check if a codec name is registered.
+
+    Args:
+        name: The codec name to check (store parameters are ignored).
+
+    Returns:
+        True if the codec is registered.
+    """
+    type_name, _ = parse_type_spec(name)
+    if type_name in _codec_registry:
+        return True
+    _load_entry_points()
+    return type_name in _codec_registry
+
+
+def _load_entry_points() -> None:
+    """
+    Load codecs from installed packages via entry points.
+
+    Codecs are discovered from the ``datajoint.codecs`` entry point group
+    (also checks legacy ``datajoint.types`` for backward compatibility).
+
+    Packages declare codecs in pyproject.toml::
+
+        [project.entry-points."datajoint.codecs"]
+        zarr_array = "dj_zarr:ZarrArrayCodec"
+
+    This function is idempotent - entry points are only loaded once.
+    """
+    global _entry_points_loaded
+    if _entry_points_loaded:
+        return
+
+    _entry_points_loaded = True
+
+    try:
+        from importlib.metadata import entry_points
+    except ImportError:
+        logger.debug("importlib.metadata not available, skipping entry point discovery")
+        return
+
+    # Load from both new and legacy entry point groups
+    for group in ("datajoint.codecs", "datajoint.types"):
+        try:
+            eps = entry_points(group=group)
+        except TypeError:
+            # Older API fallback
+            eps = entry_points().get(group, [])
+
+        for ep in eps:
+            if ep.name in _codec_registry:
+                # Already registered explicitly, skip entry point
+                continue
+            try:
+                codec_class = ep.load()
+                # The class should auto-register via __init_subclass__
+                # But if it's an old-style class, manually register
+                if ep.name not in _codec_registry and hasattr(codec_class, "name"):
+                    _codec_registry[ep.name] = codec_class()
+                logger.debug(f"Loaded codec <{ep.name}> from entry point {ep.value}")
+            except Exception as e:
+                logger.warning(f"Failed to load codec '{ep.name}' from {ep.value}: {e}")
+
+
+def resolve_dtype(
+    dtype: str, seen: set[str] | None = None, store_name: str | None = None
+) -> tuple[str, list[Codec], str | None]:
+    """
+    Resolve a dtype string, following codec chains.
+
+    If dtype references another codec (e.g., "<hash>"), recursively
+    resolves to find the ultimate storage type. Store parameters are propagated
+    through the chain.
+
+    Args:
+        dtype: The dtype string to resolve (e.g., "<blob>", "<blob@cold>", "bytes").
+        seen: Set of already-seen codec names (for cycle detection).
+        store_name: Store name from outer type specification (propagated inward).
+
+    Returns:
+        Tuple of (final_storage_type, list_of_codecs_in_chain, resolved_store_name).
+        The chain is ordered from outermost to innermost codec.
+
+    Raises:
+        DataJointError: If a circular type reference is detected.
+
+    Examples:
+        >>> resolve_dtype("<blob>")
+        ("bytes", [BlobCodec], None)
+
+        >>> resolve_dtype("<blob@cold>")
+        ("<hash>", [BlobCodec], "cold")  # BlobCodec.get_dtype(True) returns "<hash>"
+
+        >>> resolve_dtype("bytes")
+        ("bytes", [], None)
+    """
+    if seen is None:
+        seen = set()
+
+    chain: list[Codec] = []
+
+    # Check if dtype is a codec reference
+    if dtype.startswith("<") and dtype.endswith(">"):
+        type_name, dtype_store = parse_type_spec(dtype)
+
+        # Store from this level overrides inherited store
+        # Empty string means default store (@), None means no store specified
+        if dtype_store is not None:
+            effective_store = dtype_store
+        else:
+            effective_store = store_name
+
+        if type_name in seen:
+            raise DataJointError(f"Circular codec reference detected: <{type_name}>")
+
+        seen.add(type_name)
+        codec = get_codec(type_name)
+        chain.append(codec)
+
+        # Determine if external based on whether @ is present
+        is_external = effective_store is not None
+
+        # Get the inner dtype from the codec
+        inner_dtype = codec.get_dtype(is_external)
+
+        # Recursively resolve the inner dtype, propagating store
+        final_dtype, inner_chain, resolved_store = resolve_dtype(inner_dtype, seen, effective_store)
+        chain.extend(inner_chain)
+        return final_dtype, chain, resolved_store
+
+    # Not a codec - check if it has a store suffix (e.g., "blob@store")
+    if "@" in dtype:
+        base_type, dtype_store = dtype.split("@", 1)
+        effective_store = dtype_store if dtype_store else store_name
+        return base_type, chain, effective_store
+
+    # Plain type - return as-is with propagated store
+    return dtype, chain, store_name
+
+
+def lookup_codec(codec_spec: str) -> tuple[Codec, str | None]:
+    """
+    Look up a codec from a type specification string.
+
+    Parses a codec specification (e.g., "<blob@store>") and returns
+    the codec instance along with any store name.
+
+    Args:
+        codec_spec: The codec specification, with or without angle brackets.
+                    May include store parameter (e.g., "<blob@cold>").
+
+    Returns:
+        Tuple of (Codec instance, store_name or None).
+
+    Raises:
+        DataJointError: If the codec is not found.
+    """
+    type_name, store_name = parse_type_spec(codec_spec)
+
+    if is_codec_registered(type_name):
+        return get_codec(type_name), store_name
+
+    raise DataJointError(f"Codec <{type_name}> is not registered. " "Define a Codec subclass with name='{type_name}'.")
+
+
+# =============================================================================
+# Auto-register built-in codecs
+# =============================================================================
+
+# Import builtin_codecs module to register built-in codecs
+# This import has a side effect: it registers the codecs via __init_subclass__
+from . import builtin_codecs as _builtin_codecs  # noqa: F401, E402
diff --git a/src/datajoint/content_registry.py b/src/datajoint/content_registry.py
index 652f35de..abed955a 100644
--- a/src/datajoint/content_registry.py
+++ b/src/datajoint/content_registry.py
@@ -1,9 +1,9 @@
 """
 Content-addressed storage registry for DataJoint.
 
-This module provides content-addressed storage with deduplication for the <content>
-AttributeType. Content is identified by its SHA256 hash and stored in a hierarchical
-directory structure: _content/{hash[:2]}/{hash[2:4]}/{hash}
+This module provides content-addressed storage with deduplication for the <hash>
+Codec. Content is identified by its MD5 hash and stored in a hierarchical
+directory structure: _hash/{hash[:2]}/{hash[2:4]}/{hash}
 
 The ContentRegistry tracks stored content for garbage collection purposes.
 """
diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py
index 758c709e..8b6bfda8 100644
--- a/src/datajoint/declare.py
+++ b/src/datajoint/declare.py
@@ -9,7 +9,7 @@
 
 import pyparsing as pp
 
-from .attribute_type import get_adapter
+from .codecs import lookup_codec
 from .condition import translate_attribute
 from .errors import DataJointError
 from .settings import config
@@ -34,16 +34,20 @@
     "uuid": (r"uuid$", "binary(16)"),
     # JSON
     "json": (r"json$", None),  # json passes through as-is
-    # Binary (blob maps to longblob)
-    "blob": (r"blob$", "longblob"),
+    # Binary (bytes maps to longblob in MySQL, bytea in PostgreSQL)
+    "bytes": (r"bytes$", "longblob"),
     # Temporal
     "date": (r"date$", None),
     "datetime": (r"datetime$", None),
     # String types (with parameters)
     "char": (r"char\s*\(\d+\)$", None),
     "varchar": (r"varchar\s*\(\d+\)$", None),
+    # Unlimited text
+    "text": (r"text$", None),
     # Enumeration
     "enum": (r"enum\s*\(.+\)$", None),
+    # Fixed-point decimal
+    "decimal": (r"decimal\s*\(\d+\s*,\s*\d+\)$", None),
 }
 
 # Compile core type patterns
@@ -66,14 +70,14 @@
         **{name.upper(): pattern for name, (pattern, _) in CORE_TYPES.items()},
         # Native SQL types (passthrough with warning for non-standard use)
         INTEGER=r"((tiny|small|medium|big|)int|integer)(\s*\(.+\))?(\s+unsigned)?(\s+auto_increment)?|serial$",
-        DECIMAL=r"(decimal|numeric)(\s*\(.+\))?(\s+unsigned)?$",
+        NUMERIC=r"numeric(\s*\(.+\))?(\s+unsigned)?$",  # numeric is SQL alias, use decimal instead
         FLOAT=r"(double|float|real)(\s*\(.+\))?(\s+unsigned)?$",
         STRING=r"(var)?char\s*\(.+\)$",  # Catches char/varchar not matched by core types
         TEMPORAL=r"(time|timestamp|year)(\s*\(.+\))?$",  # time, timestamp, year (not date/datetime)
         NATIVE_BLOB=r"(tiny|small|medium|long)blob$",  # Specific blob variants
-        TEXT=r"(tiny|small|medium|long)?text$",  # Text types
-        # AttributeTypes use angle brackets
-        ADAPTED=r"<.+>$",
+        NATIVE_TEXT=r"(tiny|small|medium|long)text$",  # Text variants (use plain 'text' instead)
+        # Codecs use angle brackets
+        CODEC=r"<.+>$",
     ).items()
 }
 
@@ -81,7 +85,7 @@
 CORE_TYPE_NAMES = {name.upper() for name in CORE_TYPES}
 
 # Special types that need comment storage (core types + adapted)
-SPECIAL_TYPES = CORE_TYPE_NAMES | {"ADAPTED"}
+SPECIAL_TYPES = CORE_TYPE_NAMES | {"CODEC"}
 
 # Native SQL types that pass through (with optional warning)
 NATIVE_TYPES = set(TYPE_PATTERN) - SPECIAL_TYPES
@@ -100,23 +104,6 @@ def match_type(attribute_type):
 logger = logging.getLogger(__name__.split(".")[0])
 
 
-def build_foreign_key_parser_old():
-    # old-style foreign key parser. Superseded by expression-based syntax. See issue #436
-    # This will be deprecated in a future release.
-    left = pp.Literal("(").suppress()
-    right = pp.Literal(")").suppress()
-    attribute_name = pp.Word(pp.srange("[a-z]"), pp.srange("[a-z0-9_]"))
-    new_attrs = pp.Optional(left + pp.DelimitedList(attribute_name) + right).set_results_name("new_attrs")
-    arrow = pp.Literal("->").suppress()
-    lbracket = pp.Literal("[").suppress()
-    rbracket = pp.Literal("]").suppress()
-    option = pp.Word(pp.srange("[a-zA-Z]"))
-    options = pp.Optional(lbracket + pp.DelimitedList(option) + rbracket).set_results_name("options")
-    ref_table = pp.Word(pp.alphas, pp.alphanums + "._").set_results_name("ref_table")
-    ref_attrs = pp.Optional(left + pp.DelimitedList(attribute_name) + right).set_results_name("ref_attrs")
-    return new_attrs + arrow + options + ref_table + ref_attrs
-
-
 def build_foreign_key_parser():
     arrow = pp.Literal("->").suppress()
     lbracket = pp.Literal("[").suppress()
@@ -140,7 +127,6 @@ def build_attribute_parser():
     return attribute_name + pp.Optional(default) + colon + data_type + comment
 
 
-foreign_key_parser_old = build_foreign_key_parser_old()
 foreign_key_parser = build_foreign_key_parser()
 attribute_parser = build_attribute_parser()
 
@@ -454,20 +440,30 @@ def substitute_special_type(match, category, foreign_key_sql, context):
     Substitute special types with their native SQL equivalents.
 
     Special types are:
-    - Core DataJoint types (float32 → float, uuid → binary(16), blob → longblob, etc.)
-    - ADAPTED types (AttributeTypes in angle brackets)
+    - Core DataJoint types (float32 → float, uuid → binary(16), bytes → longblob, etc.)
+    - CODEC types (Codecs in angle brackets)
 
     :param match: dict containing with keys "type" and "comment" -- will be modified in place
     :param category: attribute type category from TYPE_PATTERN
     :param foreign_key_sql: list of foreign key declarations to add to
-    :param context: context for looking up user-defined attribute_type adapters
+    :param context: context for looking up user-defined codecs (unused, kept for compatibility)
     """
-    if category == "ADAPTED":
-        # AttributeType - resolve to underlying dtype
-        attr_type, store_name = get_adapter(context, match["type"])
+    if category == "CODEC":
+        # Codec - resolve to underlying dtype
+        codec, store_name = lookup_codec(match["type"])
         if store_name is not None:
             match["store"] = store_name
-        match["type"] = attr_type.dtype
+        # Determine if external storage is used (store_name is present, even if empty string for default)
+        is_external = store_name is not None
+        inner_dtype = codec.get_dtype(is_external=is_external)
+
+        # If inner dtype is a codec without store, propagate the store from outer type
+        # e.g., <attach@mystore> returns <hash>, we need to resolve as <hash@mystore>
+        if inner_dtype.startswith("<") and "@" not in inner_dtype and match.get("store") is not None:
+            # Append store to the inner dtype
+            inner_dtype = inner_dtype[:-1] + "@" + match["store"] + ">"
+
+        match["type"] = inner_dtype
         # Recursively resolve if dtype is also a special type
         category = match_type(match["type"])
         if category in SPECIAL_TYPES:
@@ -526,7 +522,7 @@ def compile_attribute(line, in_key, foreign_key_sql, context):
     category = match_type(match["type"])
 
     if category in SPECIAL_TYPES:
-        # Core types and AttributeTypes are recorded in comment for reconstruction
+        # Core types and Codecs are recorded in comment for reconstruction
         match["comment"] = ":{type}:{comment}".format(**match)
         substitute_special_type(match, category, foreign_key_sql, context)
     elif category in NATIVE_TYPES:
diff --git a/src/datajoint/errors.py b/src/datajoint/errors.py
index 03555bf1..aadc74ca 100644
--- a/src/datajoint/errors.py
+++ b/src/datajoint/errors.py
@@ -2,8 +2,6 @@
 Exception classes for the DataJoint library
 """
 
-import os
-
 
 # --- Top Level ---
 class DataJointError(Exception):
@@ -87,43 +85,3 @@ class BucketInaccessible(DataJointError):
     """
     Error raised when a S3 bucket is inaccessible
     """
-
-
-# environment variables to control availability of experimental features
-
-ADAPTED_TYPE_SWITCH = "DJ_SUPPORT_ADAPTED_TYPES"
-FILEPATH_FEATURE_SWITCH = "DJ_SUPPORT_FILEPATH_MANAGEMENT"
-
-
-def _switch_adapted_types(on):
-    """
-    Enable (on=True) or disable (on=False) support for AttributeAdapter
-    """
-    if on:
-        os.environ[ADAPTED_TYPE_SWITCH] = "TRUE"
-    else:
-        del os.environ[ADAPTED_TYPE_SWITCH]
-
-
-def _support_adapted_types():
-    """
-    check if support for AttributeAdapter is enabled
-    """
-    return os.getenv(ADAPTED_TYPE_SWITCH, "FALSE").upper() == "TRUE"
-
-
-def _switch_filepath_types(on):
-    """
-    Enable (on=True) or disable (on=False) support for AttributeAdapter
-    """
-    if on:
-        os.environ[FILEPATH_FEATURE_SWITCH] = "TRUE"
-    else:
-        del os.environ[FILEPATH_FEATURE_SWITCH]
-
-
-def _support_filepath_types():
-    """
-    check if support for AttributeAdapter is enabled
-    """
-    return os.getenv(FILEPATH_FEATURE_SWITCH, "FALSE").upper() == "TRUE"
diff --git a/src/datajoint/fetch.py b/src/datajoint/fetch.py
index bd97dfd1..575f3cbf 100644
--- a/src/datajoint/fetch.py
+++ b/src/datajoint/fetch.py
@@ -39,10 +39,10 @@ def _get(connection, attr, data, squeeze, download_path):
     - Native types pass through unchanged
     - JSON types are parsed
     - UUID types are converted from bytes
-    - Blob types return raw bytes (unless an adapter handles them)
-    - Adapters (AttributeTypes) handle all custom encoding/decoding via type chains
+    - Blob types return raw bytes (unless a codec handles them)
+    - Codecs handle all custom encoding/decoding via type chains
 
-    For composed types (e.g., <xblob> using <content>), decoders are applied
+    For composed types (e.g., <blob@> using <hash>), decoders are applied
     in reverse order: innermost first, then outermost.
 
     :param connection: a dj.Connection object
@@ -57,11 +57,17 @@ def _get(connection, attr, data, squeeze, download_path):
     if data is None:
         return None
 
-    # Get the final storage type and type chain if adapter present
-    if attr.adapter:
-        from .attribute_type import resolve_dtype
+    # Get the final storage type and type chain if codec present
+    if attr.codec:
+        from .codecs import resolve_dtype
 
-        final_dtype, type_chain, _ = resolve_dtype(f"<{attr.adapter.type_name}>")
+        # Include store if present to get correct chain for external storage
+        store = getattr(attr, "store", None)
+        if store is not None:
+            dtype_spec = f"<{attr.codec.name}@{store}>"
+        else:
+            dtype_spec = f"<{attr.codec.name}>"
+        final_dtype, type_chain, _ = resolve_dtype(dtype_spec)
 
         # First, process the final dtype (what's stored in the database)
         if final_dtype.lower() == "json":
@@ -87,7 +93,7 @@ def _get(connection, attr, data, squeeze, download_path):
 
         return data
 
-    # No adapter - handle native types
+    # No codec - handle native types
     if attr.json:
         return json.loads(data)
 
@@ -95,7 +101,7 @@ def _get(connection, attr, data, squeeze, download_path):
         return uuid_module.UUID(bytes=data)
 
     if attr.is_blob:
-        return data  # raw bytes (use <djblob> for automatic deserialization)
+        return data  # raw bytes (use <blob> for automatic deserialization)
 
     # Native types - pass through unchanged
     return data
diff --git a/src/datajoint/gc.py b/src/datajoint/gc.py
index e0b7aaaf..db327f37 100644
--- a/src/datajoint/gc.py
+++ b/src/datajoint/gc.py
@@ -6,10 +6,10 @@
 referencing it are deleted.
 
 Supports two storage patterns:
-- Content-addressed storage: <content>, <xblob>, <xattach>
+- Content-addressed storage: <hash@>, <blob@>, <attach@>
   Stored at: _content/{hash[:2]}/{hash[2:4]}/{hash}
 
-- Path-addressed storage: <object>
+- Path-addressed storage: <object@>
   Stored at: {schema}/{table}/objects/{pk}/{field}_{token}/
 
 Usage:
@@ -41,10 +41,10 @@ def _uses_content_storage(attr) -> bool:
     """
     Check if an attribute uses content-addressed storage.
 
-    This includes types that compose with <content>:
-    - <content> directly
-    - <xblob> (composes with <content>)
-    - <xattach> (composes with <content>)
+    This includes types that chain to <hash> for external storage:
+    - <hash@store> directly
+    - <blob@store> (chains to <hash>)
+    - <attach@store> (chains to <hash>)
 
     Args:
         attr: Attribute from table heading
@@ -52,12 +52,22 @@ def _uses_content_storage(attr) -> bool:
     Returns:
         True if the attribute stores content hashes
     """
-    if not attr.adapter:
+    if not attr.codec:
         return False
 
-    # Check if this type or its composition chain uses content storage
-    type_name = getattr(attr.adapter, "type_name", "")
-    return type_name in ("content", "xblob", "xattach")
+    # Check if this type uses content storage
+    codec_name = getattr(attr.codec, "name", "")
+    store = getattr(attr, "store", None)
+
+    # <hash> always uses content storage (external only)
+    if codec_name == "hash":
+        return True
+
+    # <blob@> and <attach@> use content storage when external (has store)
+    if codec_name in ("blob", "attach") and store is not None:
+        return True
+
+    return False
 
 
 def _uses_object_storage(attr) -> bool:
@@ -70,11 +80,11 @@ def _uses_object_storage(attr) -> bool:
     Returns:
         True if the attribute stores object paths
     """
-    if not attr.adapter:
+    if not attr.codec:
         return False
 
-    type_name = getattr(attr.adapter, "type_name", "")
-    return type_name == "object"
+    codec_name = getattr(attr.codec, "name", "")
+    return codec_name == "object"
 
 
 def _extract_content_refs(value: Any) -> list[tuple[str, str | None]]:
@@ -144,7 +154,7 @@ def scan_references(
     Scan schemas for content references.
 
     Examines all tables in the given schemas and extracts content hashes
-    from columns that use content-addressed storage (<content>, <xblob>, <xattach>).
+    from columns that use content-addressed storage (<hash@>, <blob@>, <attach@>).
 
     Args:
         *schemas: Schema instances to scan
@@ -384,7 +394,7 @@ def scan(
     """
     Scan for orphaned content and objects without deleting.
 
-    Scans both content-addressed storage (for <content>, <xblob>, <xattach>)
+    Scans both content-addressed storage (for <hash@>, <blob@>, <attach@>)
     and path-addressed storage (for <object>).
 
     Args:
@@ -542,7 +552,7 @@ def format_stats(stats: dict[str, Any]) -> str:
     # Show content-addressed storage stats if present
     if "content_referenced" in stats:
         lines.append("")
-        lines.append("Content-Addressed Storage (<content>, <xblob>, <xattach>):")
+        lines.append("Content-Addressed Storage (<hash@>, <blob@>, <attach@>):")
         lines.append(f"  Referenced: {stats['content_referenced']}")
         lines.append(f"  Stored:     {stats['content_stored']}")
         lines.append(f"  Orphaned:   {stats['content_orphaned']}")
diff --git a/src/datajoint/hash.py b/src/datajoint/hash.py
index f58c6573..88a737fb 100644
--- a/src/datajoint/hash.py
+++ b/src/datajoint/hash.py
@@ -1,7 +1,5 @@
 import hashlib
-import io
 import uuid
-from pathlib import Path
 
 
 def key_hash(mapping):
@@ -16,24 +14,14 @@ def key_hash(mapping):
     return hashed.hexdigest()
 
 
-def uuid_from_stream(stream, *, init_string=""):
+def uuid_from_buffer(buffer=b"", *, init_string=""):
     """
-    :return: 16-byte digest of stream data
-    :stream: stream object or open file handle
-    :init_string: string to initialize the checksum
+    Compute MD5 hash of buffer data, returned as UUID.
+
+    :param buffer: bytes to hash
+    :param init_string: string to initialize the checksum (for namespacing)
+    :return: UUID based on MD5 digest
     """
     hashed = hashlib.md5(init_string.encode())
-    chunk = True
-    chunk_size = 1 << 14
-    while chunk:
-        chunk = stream.read(chunk_size)
-        hashed.update(chunk)
+    hashed.update(buffer)
     return uuid.UUID(bytes=hashed.digest())
-
-
-def uuid_from_buffer(buffer=b"", *, init_string=""):
-    return uuid_from_stream(io.BytesIO(buffer), init_string=init_string)
-
-
-def uuid_from_file(filepath, *, init_string=""):
-    return uuid_from_stream(Path(filepath).open("rb"), init_string=init_string)
diff --git a/src/datajoint/heading.py b/src/datajoint/heading.py
index 78b6af77..bc555224 100644
--- a/src/datajoint/heading.py
+++ b/src/datajoint/heading.py
@@ -5,8 +5,8 @@
 
 import numpy as np
 
-from .attribute_type import get_adapter
-from .attribute_type import AttributeType
+from .codecs import lookup_codec
+from .codecs import Codec
 from .declare import (
     CORE_TYPE_NAMES,
     SPECIAL_TYPES,
@@ -15,33 +15,29 @@
 from .errors import DataJointError
 
 
-class _MissingType(AttributeType):
-    """Placeholder for missing/unregistered attribute types. Raises error on use."""
+class _MissingType(Codec, register=False):
+    """Placeholder for missing/unregistered codecs. Raises error on use."""
 
-    def __init__(self, name: str):
-        self._name = name
+    def __init__(self, codec_name: str):
+        self._codec_name = codec_name
 
     @property
-    def type_name(self) -> str:
-        return self._name
+    def name(self) -> str:
+        return self._codec_name
 
-    @property
-    def dtype(self) -> str:
+    def get_dtype(self, is_external: bool) -> str:
         raise DataJointError(
-            f"Attribute type <{self._name}> is not registered. "
-            "Register it with @dj.register_type or include it in the schema context."
+            f"Codec <{self._codec_name}> is not registered. " f"Define a Codec subclass with name='{self._codec_name}'."
         )
 
-    def encode(self, value, *, key=None):
+    def encode(self, value, *, key=None, store_name=None):
         raise DataJointError(
-            f"Attribute type <{self._name}> is not registered. "
-            "Register it with @dj.register_type or include it in the schema context."
+            f"Codec <{self._codec_name}> is not registered. " f"Define a Codec subclass with name='{self._codec_name}'."
         )
 
     def decode(self, stored, *, key=None):
         raise DataJointError(
-            f"Attribute type <{self._name}> is not registered. "
-            "Register it with @dj.register_type or include it in the schema context."
+            f"Codec <{self._codec_name}> is not registered. " f"Define a Codec subclass with name='{self._codec_name}'."
         )
 
 
@@ -62,7 +58,7 @@ def decode(self, stored, *, key=None):
     json=None,
     is_blob=False,
     is_hidden=False,
-    adapter=None,
+    codec=None,
     store=None,
     unsupported=False,
     attribute_expression=None,
@@ -283,10 +279,10 @@ def _init_from_database(self):
                 autoincrement=bool(re.search(r"auto_increment", attr["Extra"], flags=re.I)),
                 numeric=any(TYPE_PATTERN[t].match(attr["type"]) for t in ("DECIMAL", "INTEGER", "FLOAT")),
                 string=any(TYPE_PATTERN[t].match(attr["type"]) for t in ("ENUM", "TEMPORAL", "STRING")),
-                is_blob=any(TYPE_PATTERN[t].match(attr["type"]) for t in ("BLOB", "NATIVE_BLOB")),
+                is_blob=any(TYPE_PATTERN[t].match(attr["type"]) for t in ("BYTES", "NATIVE_BLOB")),
                 uuid=False,
                 json=bool(TYPE_PATTERN["JSON"].match(attr["type"])),
-                adapter=None,
+                codec=None,
                 store=None,
                 attribute_expression=None,
                 is_hidden=attr["name"].startswith("_"),
@@ -311,26 +307,26 @@ def _init_from_database(self):
                     # Store the original type name for display but keep db_type for SQL
                     attr["original_type"] = special["type"]
 
-            # process AttributeTypes (adapted types in angle brackets)
-            if special and TYPE_PATTERN["ADAPTED"].match(attr["type"]):
+            # process Codecs (types in angle brackets)
+            if special and TYPE_PATTERN["CODEC"].match(attr["type"]):
                 # Context can be None for built-in types that are globally registered
-                adapter_name = special["type"]
+                codec_spec = special["type"]
                 try:
-                    adapter_result = get_adapter(context, adapter_name)
-                    # get_adapter returns (adapter, store_name) tuple
-                    if isinstance(adapter_result, tuple):
-                        attr["adapter"], attr["store"] = adapter_result
-                    else:
-                        attr["adapter"] = adapter_result
+                    codec_instance, codec_store = lookup_codec(codec_spec)
+                    attr["codec"] = codec_instance
+                    if codec_store is not None:
+                        attr["store"] = codec_store
                 except DataJointError:
-                    # if no adapter, then delay the error until the first invocation
-                    attr["adapter"] = _MissingType(adapter_name)
+                    # if no codec, then delay the error until the first invocation
+                    attr["codec"] = _MissingType(codec_spec)
                 else:
-                    attr["type"] = attr["adapter"].dtype
+                    # Determine if external storage based on store presence
+                    is_external = attr.get("store") is not None
+                    attr["type"] = attr["codec"].get_dtype(is_external=is_external)
                     if not any(r.match(attr["type"]) for r in TYPE_PATTERN.values()):
-                        raise DataJointError(f"Invalid dtype '{attr['type']}' in attribute type <{adapter_name}>.")
-                    # Update is_blob based on resolved dtype (check both BLOB and NATIVE_BLOB patterns)
-                    attr["is_blob"] = any(TYPE_PATTERN[t].match(attr["type"]) for t in ("BLOB", "NATIVE_BLOB"))
+                        raise DataJointError(f"Invalid dtype '{attr['type']}' in codec <{codec_spec}>.")
+                    # Update is_blob based on resolved dtype (check both BYTES and NATIVE_BLOB patterns)
+                    attr["is_blob"] = any(TYPE_PATTERN[t].match(attr["type"]) for t in ("BYTES", "NATIVE_BLOB"))
 
             # Handle core type aliases (uuid, float32, etc.)
             if special:
@@ -365,7 +361,7 @@ def _init_from_database(self):
 
             # fill out dtype. All floats and non-nullable integers are turned into specific dtypes
             attr["dtype"] = object
-            if attr["numeric"] and not attr["adapter"]:
+            if attr["numeric"] and not attr["codec"]:
                 is_integer = TYPE_PATTERN["INTEGER"].match(attr["type"])
                 is_float = TYPE_PATTERN["FLOAT"].match(attr["type"])
                 if is_integer and not attr["nullable"] or is_float:
@@ -375,9 +371,9 @@ def _init_from_database(self):
                     assert (t, is_unsigned) in numeric_types, "dtype not found for type %s" % t
                     attr["dtype"] = numeric_types[(t, is_unsigned)]
 
-            if attr["adapter"]:
-                # restore adapted type name for display
-                attr["type"] = adapter_name
+            if attr["codec"]:
+                # restore codec type name for display
+                attr["type"] = codec_spec
 
         self._attributes = dict(((q["name"], Attribute(**q)) for q in attributes))
 
diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py
index 18bf5730..b542f936 100644
--- a/src/datajoint/jobs.py
+++ b/src/datajoint/jobs.py
@@ -26,9 +26,9 @@ def __init__(self, conn, database):
         key_hash  :char(32)  # key hash
         ---
         status  :enum('reserved','error','ignore')  # if tuple is missing, the job is available
-        key=null  :<djblob>  # structure containing the key
+        key=null  :<blob>  # structure containing the key
         error_message=""  :varchar({error_message_length})  # error message returned if failed
-        error_stack=null  :<djblob>  # error stack if failed
+        error_stack=null  :<blob>  # error stack if failed
         user="" :varchar(255) # database user
         host=""  :varchar(255)  # system hostname
         pid=0  :int unsigned  # system process id
diff --git a/src/datajoint/migrate.py b/src/datajoint/migrate.py
index 696ca380..1948cbe0 100644
--- a/src/datajoint/migrate.py
+++ b/src/datajoint/migrate.py
@@ -2,8 +2,8 @@
 Migration utilities for DataJoint schema updates.
 
 This module provides tools for migrating existing schemas to use the new
-AttributeType system, particularly for upgrading blob columns to use
-explicit `<djblob>` type declarations.
+Codec system, particularly for upgrading blob columns to use
+explicit `<blob>` type declarations.
 """
 
 from __future__ import annotations
@@ -25,11 +25,11 @@
 
 def analyze_blob_columns(schema: Schema) -> list[dict]:
     """
-    Analyze a schema to find blob columns that could be migrated to <djblob>.
+    Analyze a schema to find blob columns that could be migrated to <blob>.
 
     This function identifies blob columns that:
     1. Have a MySQL blob type (tinyblob, blob, mediumblob, longblob)
-    2. Do NOT already have an adapter/type specified in their comment
+    2. Do NOT already have a codec/type specified in their comment
 
     All blob size variants are included in the analysis.
 
@@ -80,8 +80,8 @@ def analyze_blob_columns(schema: Schema) -> list[dict]:
         columns = connection.query(columns_query, args=(schema.database, table_name)).fetchall()
 
         for column_name, column_type, comment in columns:
-            # Check if comment already has an adapter type (starts with :type:)
-            has_adapter = comment and comment.startswith(":")
+            # Check if comment already has a codec type (starts with :type:)
+            has_codec = comment and comment.startswith(":")
 
             results.append(
                 {
@@ -89,7 +89,7 @@ def analyze_blob_columns(schema: Schema) -> list[dict]:
                     "column_name": column_name,
                     "column_type": column_type,
                     "current_comment": comment or "",
-                    "needs_migration": not has_adapter,
+                    "needs_migration": not has_codec,
                 }
             )
 
@@ -98,19 +98,19 @@ def analyze_blob_columns(schema: Schema) -> list[dict]:
 
 def generate_migration_sql(
     schema: Schema,
-    target_type: str = "djblob",
+    target_type: str = "blob",
     dry_run: bool = True,
 ) -> list[str]:
     """
-    Generate SQL statements to migrate blob columns to use <djblob>.
+    Generate SQL statements to migrate blob columns to use <blob>.
 
     This generates ALTER TABLE statements that update column comments to
-    include the `:<djblob>:` prefix, marking them as using explicit
+    include the `:<blob>:` prefix, marking them as using explicit
     DataJoint blob serialization.
 
     Args:
         schema: The DataJoint schema to migrate.
-        target_type: The type name to migrate to (default: "djblob").
+        target_type: The type name to migrate to (default: "blob").
         dry_run: If True, only return SQL without executing.
 
     Returns:
@@ -156,18 +156,18 @@ def generate_migration_sql(
 
 def migrate_blob_columns(
     schema: Schema,
-    target_type: str = "djblob",
+    target_type: str = "blob",
     dry_run: bool = True,
 ) -> dict:
     """
-    Migrate blob columns in a schema to use explicit <djblob> type.
+    Migrate blob columns in a schema to use explicit <blob> type.
 
     This updates column comments in the database to include the type
     declaration. The data format remains unchanged.
 
     Args:
         schema: The DataJoint schema to migrate.
-        target_type: The type name to migrate to (default: "djblob").
+        target_type: The type name to migrate to (default: "blob").
         dry_run: If True, only preview changes without applying.
 
     Returns:
@@ -188,7 +188,7 @@ def migrate_blob_columns(
 
     Warning:
         After migration, table definitions should be updated to use
-        `<djblob>` instead of `longblob` for consistency. The migration
+        `<blob>` instead of `longblob` for consistency. The migration
         only updates database metadata; source code changes are manual.
     """
     columns = analyze_blob_columns(schema)
diff --git a/src/datajoint/preview.py b/src/datajoint/preview.py
index 7572125e..0ef096d2 100644
--- a/src/datajoint/preview.py
+++ b/src/datajoint/preview.py
@@ -27,7 +27,7 @@ def _format_object_display(json_data):
 def preview(query_expression, limit, width):
     heading = query_expression.heading
     rel = query_expression.proj(*heading.non_blobs)
-    # Object fields are AttributeTypes with adapters - not specially handled in simplified model
+    # Object fields use codecs - not specially handled in simplified model
     object_fields = []
     if limit is None:
         limit = config["display.limit"]
@@ -88,7 +88,7 @@ def get_display_value(tup, f, idx):
 def repr_html(query_expression):
     heading = query_expression.heading
     rel = query_expression.proj(*heading.non_blobs)
-    # Object fields are AttributeTypes with adapters - not specially handled in simplified model
+    # Object fields use codecs - not specially handled in simplified model
     object_fields = []
     info = heading.table_status
     tuples = rel.fetch(limit=config["display.limit"] + 1, format="array")
diff --git a/src/datajoint/staged_insert.py b/src/datajoint/staged_insert.py
index dbf51c6b..8f9c94d2 100644
--- a/src/datajoint/staged_insert.py
+++ b/src/datajoint/staged_insert.py
@@ -98,8 +98,8 @@ def _get_storage_path(self, field: str, ext: str = "") -> str:
             raise DataJointError(f"Attribute '{field}' not found in table heading")
 
         attr = self._table.heading[field]
-        # Check if this is an object AttributeType (has adapter with "object" in type_name)
-        if not (attr.adapter and hasattr(attr.adapter, "type_name") and "object" in attr.adapter.type_name):
+        # Check if this is an object Codec (has codec with "object" as name)
+        if not (attr.codec and attr.codec.name == "object"):
             raise DataJointError(f"Attribute '{field}' is not an <object> type")
 
         # Extract primary key from rec
diff --git a/src/datajoint/table.py b/src/datajoint/table.py
index 1ce7e816..23648e1d 100644
--- a/src/datajoint/table.py
+++ b/src/datajoint/table.py
@@ -732,7 +732,7 @@ def __make_placeholder(self, name, value, ignore_extra_fields=False, row=None):
         processing by mysql API.
 
         In the simplified type system:
-        - Adapters (AttributeTypes) handle all custom encoding via type chains
+        - Codecs handle all custom encoding via type chains
         - UUID values are converted to bytes
         - JSON values are serialized
         - Blob values pass through as bytes
@@ -748,17 +748,17 @@ def __make_placeholder(self, name, value, ignore_extra_fields=False, row=None):
         attr = self.heading[name]
 
         # Apply adapter encoding with type chain support
-        if attr.adapter:
-            from .attribute_type import resolve_dtype
+        if attr.codec:
+            from .codecs import resolve_dtype
 
             # Skip validation and encoding for None values (nullable columns)
             if value is None:
                 return name, "DEFAULT", None
 
-            attr.adapter.validate(value)
+            attr.codec.validate(value)
 
             # Resolve full type chain
-            _, type_chain, resolved_store = resolve_dtype(f"<{attr.adapter.type_name}>", store_name=attr.store)
+            _, type_chain, resolved_store = resolve_dtype(f"<{attr.codec.name}>", store_name=attr.store)
 
             # Apply encoders from outermost to innermost
             for attr_type in type_chain:
@@ -790,7 +790,7 @@ def __make_placeholder(self, name, value, ignore_extra_fields=False, row=None):
             # Numeric - convert to string
             elif attr.numeric:
                 value = str(int(value) if isinstance(value, bool) else value)
-            # Blob - pass through as bytes (use <djblob> for automatic serialization)
+            # Blob - pass through as bytes (use <blob> for automatic serialization)
 
         return name, placeholder, value
 
diff --git a/src/datajoint/version.py b/src/datajoint/version.py
index 200fd9ba..4684015a 100644
--- a/src/datajoint/version.py
+++ b/src/datajoint/version.py
@@ -1,4 +1,4 @@
 # version bump auto managed by Github Actions:
 # label_prs.yaml(prep), release.yaml(bump), post_release.yaml(edit)
 # manually set this version will be eventually overwritten by the above actions
-__version__ = "2.0.0a5"
+__version__ = "2.0.0a9"
diff --git a/tests/conftest.py b/tests/conftest.py
index d6440423..14b848d4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,16 +1,19 @@
 """
 Pytest configuration for DataJoint tests.
 
-Expects MySQL and MinIO services to be running via docker-compose:
-    docker-compose up -d db minio
-
-Environment variables (with defaults from docker-compose.yaml):
-    DJ_HOST=db          MySQL host
-    DJ_USER=root        MySQL root user
-    DJ_PASS=password    MySQL root password
-    S3_ENDPOINT=minio:9000   MinIO endpoint
-    S3_ACCESS_KEY=datajoint  MinIO access key
-    S3_SECRET_KEY=datajoint  MinIO secret key
+Tests are organized by their dependencies:
+- Unit tests: No external dependencies, run with `pytest -m "not requires_mysql"`
+- Integration tests: Require MySQL/MinIO, marked with @pytest.mark.requires_mysql
+
+Containers are automatically started via testcontainers when needed.
+Just run: pytest tests/
+
+To use external containers instead (e.g., docker-compose), set:
+    DJ_USE_EXTERNAL_CONTAINERS=1
+    DJ_HOST=localhost DJ_PORT=3306 S3_ENDPOINT=localhost:9000 pytest
+
+To run only unit tests (no Docker required):
+    pytest -m "not requires_mysql"
 """
 
 import logging
@@ -21,13 +24,9 @@
 import certifi
 import pytest
 import urllib3
-from packaging import version
 
 import datajoint as dj
-from datajoint.errors import (
-    FILEPATH_FEATURE_SWITCH,
-    DataJointError,
-)
+from datajoint.errors import DataJointError
 
 from . import schema, schema_advanced, schema_external, schema_object, schema_simple
 from . import schema_uuid as schema_uuid_module
@@ -36,7 +35,120 @@
 logger = logging.getLogger(__name__)
 
 
-# --- Database connection fixtures ---
+# =============================================================================
+# Pytest Hooks
+# =============================================================================
+
+
+def pytest_collection_modifyitems(config, items):
+    """Auto-mark integration tests based on their fixtures."""
+    # Tests that use these fixtures require MySQL
+    mysql_fixtures = {
+        "connection_root",
+        "connection_root_bare",
+        "connection_test",
+        "schema_any",
+        "schema_any_fresh",
+        "schema_simp",
+        "schema_adv",
+        "schema_ext",
+        "schema_uuid",
+        "schema_type_aliases",
+        "schema_obj",
+        "db_creds_root",
+        "db_creds_test",
+    }
+    # Tests that use these fixtures require MinIO
+    minio_fixtures = {
+        "minio_client",
+        "s3fs_client",
+        "s3_creds",
+        "stores_config",
+        "mock_stores",
+    }
+
+    for item in items:
+        # Get all fixtures this test uses (directly or indirectly)
+        try:
+            fixturenames = set(item.fixturenames)
+        except AttributeError:
+            continue
+
+        # Auto-add marks based on fixture usage
+        if fixturenames & mysql_fixtures:
+            item.add_marker(pytest.mark.requires_mysql)
+        if fixturenames & minio_fixtures:
+            item.add_marker(pytest.mark.requires_minio)
+
+
+# =============================================================================
+# Container Fixtures - Auto-start MySQL and MinIO via testcontainers
+# =============================================================================
+
+# Check if we should use external containers (for CI or manual docker-compose)
+USE_EXTERNAL_CONTAINERS = os.environ.get("DJ_USE_EXTERNAL_CONTAINERS", "").lower() in ("1", "true", "yes")
+
+
+@pytest.fixture(scope="session")
+def mysql_container():
+    """Start MySQL container for the test session (or use external)."""
+    if USE_EXTERNAL_CONTAINERS:
+        # Use external container - return None, credentials come from env
+        logger.info("Using external MySQL container")
+        yield None
+        return
+
+    from testcontainers.mysql import MySqlContainer
+
+    container = MySqlContainer(
+        image="mysql:8.0",
+        username="root",
+        password="password",
+        dbname="test",
+    )
+    container.start()
+
+    host = container.get_container_host_ip()
+    port = container.get_exposed_port(3306)
+    logger.info(f"MySQL container started at {host}:{port}")
+
+    yield container
+
+    container.stop()
+    logger.info("MySQL container stopped")
+
+
+@pytest.fixture(scope="session")
+def minio_container():
+    """Start MinIO container for the test session (or use external)."""
+    if USE_EXTERNAL_CONTAINERS:
+        # Use external container - return None, credentials come from env
+        logger.info("Using external MinIO container")
+        yield None
+        return
+
+    from testcontainers.minio import MinioContainer
+
+    container = MinioContainer(
+        image="minio/minio:latest",
+        access_key="datajoint",
+        secret_key="datajoint",
+    )
+    container.start()
+
+    host = container.get_container_host_ip()
+    port = container.get_exposed_port(9000)
+    logger.info(f"MinIO container started at {host}:{port}")
+
+    yield container
+
+    container.stop()
+    logger.info("MinIO container stopped")
+
+
+# =============================================================================
+# Credential Fixtures - Derived from containers or environment
+# =============================================================================
 
 
 @pytest.fixture(scope="session")
@@ -45,45 +157,92 @@ def prefix():
 
 
 @pytest.fixture(scope="session")
-def db_creds_root() -> Dict:
-    """Root database credentials from environment."""
-    host = os.environ.get("DJ_HOST", "db")
-    port = os.environ.get("DJ_PORT", "3306")
-    return dict(
-        host=f"{host}:{port}" if port else host,
-        user=os.environ.get("DJ_USER", "root"),
-        password=os.environ.get("DJ_PASS", "password"),
-    )
+def db_creds_root(mysql_container) -> Dict:
+    """Root database credentials from container or environment."""
+    if mysql_container is not None:
+        # From testcontainer
+        host = mysql_container.get_container_host_ip()
+        port = mysql_container.get_exposed_port(3306)
+        return dict(
+            host=f"{host}:{port}",
+            user="root",
+            password="password",
+        )
+    else:
+        # From environment (external container)
+        host = os.environ.get("DJ_HOST", "localhost")
+        port = os.environ.get("DJ_PORT", "3306")
+        return dict(
+            host=f"{host}:{port}" if port else host,
+            user=os.environ.get("DJ_USER", "root"),
+            password=os.environ.get("DJ_PASS", "password"),
+        )
 
 
 @pytest.fixture(scope="session")
-def db_creds_test() -> Dict:
-    """Test user database credentials from environment."""
-    host = os.environ.get("DJ_HOST", "db")
-    port = os.environ.get("DJ_PORT", "3306")
-    return dict(
-        host=f"{host}:{port}" if port else host,
-        user=os.environ.get("DJ_TEST_USER", "datajoint"),
-        password=os.environ.get("DJ_TEST_PASSWORD", "datajoint"),
-    )
+def db_creds_test(mysql_container) -> Dict:
+    """Test user database credentials from container or environment."""
+    if mysql_container is not None:
+        # From testcontainer
+        host = mysql_container.get_container_host_ip()
+        port = mysql_container.get_exposed_port(3306)
+        return dict(
+            host=f"{host}:{port}",
+            user="datajoint",
+            password="datajoint",
+        )
+    else:
+        # From environment (external container)
+        host = os.environ.get("DJ_HOST", "localhost")
+        port = os.environ.get("DJ_PORT", "3306")
+        return dict(
+            host=f"{host}:{port}" if port else host,
+            user=os.environ.get("DJ_TEST_USER", "datajoint"),
+            password=os.environ.get("DJ_TEST_PASSWORD", "datajoint"),
+        )
 
 
 @pytest.fixture(scope="session")
-def s3_creds() -> Dict:
-    """S3/MinIO credentials from environment."""
-    return dict(
-        endpoint=os.environ.get("S3_ENDPOINT", "minio:9000"),
-        access_key=os.environ.get("S3_ACCESS_KEY", "datajoint"),
-        secret_key=os.environ.get("S3_SECRET_KEY", "datajoint"),
-        bucket=os.environ.get("S3_BUCKET", "datajoint.test"),
-    )
+def s3_creds(minio_container) -> Dict:
+    """S3/MinIO credentials from container or environment."""
+    if minio_container is not None:
+        # From testcontainer
+        host = minio_container.get_container_host_ip()
+        port = minio_container.get_exposed_port(9000)
+        return dict(
+            endpoint=f"{host}:{port}",
+            access_key="datajoint",
+            secret_key="datajoint",
+            bucket="datajoint.test",
+        )
+    else:
+        # From environment (external container)
+        return dict(
+            endpoint=os.environ.get("S3_ENDPOINT", "localhost:9000"),
+            access_key=os.environ.get("S3_ACCESS_KEY", "datajoint"),
+            secret_key=os.environ.get("S3_SECRET_KEY", "datajoint"),
+            bucket=os.environ.get("S3_BUCKET", "datajoint.test"),
+        )
+
+
+# =============================================================================
+# DataJoint Configuration
+# =============================================================================
 
 
-@pytest.fixture(scope="session", autouse=True)
+@pytest.fixture(scope="session")
 def configure_datajoint(db_creds_root):
-    """Configure DataJoint to use docker-compose services."""
-    host = os.environ.get("DJ_HOST", "db")
-    port = os.environ.get("DJ_PORT", "3306")
+    """Configure DataJoint to use test database.
+
+    This fixture is NOT autouse - it only runs when a test requests
+    a fixture that depends on it (e.g., connection_root_bare).
+    """
+    # Parse host:port from credentials
+    host_port = db_creds_root["host"]
+    if ":" in host_port:
+        host, port = host_port.rsplit(":", 1)
+    else:
+        host, port = host_port, "3306"
 
     dj.config["database.host"] = host
     dj.config["database.port"] = int(port)
@@ -92,8 +251,13 @@ def configure_datajoint(db_creds_root):
     logger.info(f"Configured DataJoint to use MySQL at {host}:{port}")
 
 
+# =============================================================================
+# Connection Fixtures
+# =============================================================================
+
+
 @pytest.fixture(scope="session")
-def connection_root_bare(db_creds_root):
+def connection_root_bare(db_creds_root, configure_datajoint):
     """Bare root connection without user setup."""
     connection = dj.Connection(**db_creds_root)
     yield connection
@@ -104,45 +268,29 @@ def connection_root(connection_root_bare, prefix):
     """Root database connection with test users created."""
     conn_root = connection_root_bare
 
-    # Create MySQL users
-    if version.parse(conn_root.query("select @@version;").fetchone()[0]) >= version.parse("8.0.0"):
-        conn_root.query(
-            """
-            CREATE USER IF NOT EXISTS 'datajoint'@'%%'
-            IDENTIFIED BY 'datajoint';
-            """
-        )
-        conn_root.query(
-            """
-            CREATE USER IF NOT EXISTS 'djview'@'%%'
-            IDENTIFIED BY 'djview';
-            """
-        )
-        conn_root.query(
-            """
-            CREATE USER IF NOT EXISTS 'djssl'@'%%'
-            IDENTIFIED BY 'djssl'
-            REQUIRE SSL;
-            """
-        )
-        conn_root.query("GRANT ALL PRIVILEGES ON `djtest%%`.* TO 'datajoint'@'%%';")
-        conn_root.query("GRANT SELECT ON `djtest%%`.* TO 'djview'@'%%';")
-        conn_root.query("GRANT SELECT ON `djtest%%`.* TO 'djssl'@'%%';")
-    else:
-        conn_root.query(
-            """
-            GRANT ALL PRIVILEGES ON `djtest%%`.* TO 'datajoint'@'%%'
-            IDENTIFIED BY 'datajoint';
-            """
-        )
-        conn_root.query("GRANT SELECT ON `djtest%%`.* TO 'djview'@'%%' IDENTIFIED BY 'djview';")
-        conn_root.query(
-            """
-            GRANT SELECT ON `djtest%%`.* TO 'djssl'@'%%'
-            IDENTIFIED BY 'djssl'
-            REQUIRE SSL;
-            """
-        )
+    # Create MySQL users (MySQL 8.0+ syntax - we only support 8.0+)
+    conn_root.query(
+        """
+        CREATE USER IF NOT EXISTS 'datajoint'@'%%'
+        IDENTIFIED BY 'datajoint';
+        """
+    )
+    conn_root.query(
+        """
+        CREATE USER IF NOT EXISTS 'djview'@'%%'
+        IDENTIFIED BY 'djview';
+        """
+    )
+    conn_root.query(
+        """
+        CREATE USER IF NOT EXISTS 'djssl'@'%%'
+        IDENTIFIED BY 'djssl'
+        REQUIRE SSL;
+        """
+    )
+    conn_root.query("GRANT ALL PRIVILEGES ON `djtest%%`.* TO 'datajoint'@'%%';")
+    conn_root.query("GRANT SELECT ON `djtest%%`.* TO 'djview'@'%%';")
+    conn_root.query("GRANT SELECT ON `djtest%%`.* TO 'djssl'@'%%';")
 
     yield conn_root
 
@@ -167,27 +315,19 @@ def connection_test(connection_root, prefix, db_creds_test):
     database = f"{prefix}%%"
     permission = "ALL PRIVILEGES"
 
-    if version.parse(connection_root.query("select @@version;").fetchone()[0]) >= version.parse("8.0.0"):
-        connection_root.query(
-            f"""
-            CREATE USER IF NOT EXISTS '{db_creds_test["user"]}'@'%%'
-            IDENTIFIED BY '{db_creds_test["password"]}';
-            """
-        )
-        connection_root.query(
-            f"""
-            GRANT {permission} ON `{database}`.*
-            TO '{db_creds_test["user"]}'@'%%';
-            """
-        )
-    else:
-        connection_root.query(
-            f"""
-            GRANT {permission} ON `{database}`.*
-            TO '{db_creds_test["user"]}'@'%%'
-            IDENTIFIED BY '{db_creds_test["password"]}';
-            """
-        )
+    # MySQL 8.0+ syntax
+    connection_root.query(
+        f"""
+        CREATE USER IF NOT EXISTS '{db_creds_test["user"]}'@'%%'
+        IDENTIFIED BY '{db_creds_test["password"]}';
+        """
+    )
+    connection_root.query(
+        f"""
+        GRANT {permission} ON `{database}`.*
+        TO '{db_creds_test["user"]}'@'%%';
+        """
+    )
 
     connection = dj.Connection(**db_creds_test)
     yield connection
@@ -195,7 +335,9 @@ def connection_test(connection_root, prefix, db_creds_test):
     connection.close()
 
 
-# --- S3/MinIO fixtures ---
+# =============================================================================
+# S3/MinIO Fixtures
+# =============================================================================
 
 
 @pytest.fixture(scope="session")
@@ -312,7 +454,9 @@ def minio_client(s3_creds, s3fs_client, teardown=False):
         pass
 
 
-# --- Utility fixtures ---
+# =============================================================================
+# Utility Fixtures
+# =============================================================================
 
 
 @pytest.fixture(scope="session")
@@ -333,14 +477,9 @@ def enable_adapted_types():
     yield
 
 
-@pytest.fixture
-def enable_filepath_feature(monkeypatch):
-    monkeypatch.setenv(FILEPATH_FEATURE_SWITCH, "TRUE")
-    yield
-    monkeypatch.delenv(FILEPATH_FEATURE_SWITCH, raising=True)
-
-
-# --- Cleanup fixtures ---
+# =============================================================================
+# Cleanup Fixtures
+# =============================================================================
 
 
 @pytest.fixture
@@ -374,7 +513,9 @@ def clean_test_tables(test, test_extra, test_no_extra):
     test_no_extra.delete()
 
 
-# --- Schema fixtures ---
+# =============================================================================
+# Schema Fixtures
+# =============================================================================
 
 
 @pytest.fixture(scope="module")
@@ -554,7 +695,7 @@ def schema_adv(connection_test, prefix):
 
 
 @pytest.fixture
-def schema_ext(connection_test, enable_filepath_feature, mock_stores, mock_cache, prefix):
+def schema_ext(connection_test, mock_stores, mock_cache, prefix):
     schema = dj.Schema(
         prefix + "_extern",
         context=schema_external.LOCALS_EXTERNAL,
@@ -601,7 +742,9 @@ def schema_type_aliases(connection_test, prefix):
     schema.drop()
 
 
-# --- Table fixtures ---
+# =============================================================================
+# Table Fixtures
+# =============================================================================
 
 
 @pytest.fixture
@@ -677,7 +820,9 @@ def trash(schema_any):
     return schema.UberTrash()
 
 
-# --- Object storage fixtures ---
+# =============================================================================
+# Object Storage Fixtures
+# =============================================================================
 
 
 @pytest.fixture
@@ -701,6 +846,7 @@ def mock_object_storage(object_storage_config):
         "protocol": dj.config.object_storage.protocol,
         "location": dj.config.object_storage.location,
         "token_length": dj.config.object_storage.token_length,
+        "stores": dict(dj.config.object_storage.stores),
     }
 
     # Set test values
@@ -709,6 +855,12 @@ def mock_object_storage(object_storage_config):
     dj.config.object_storage.location = object_storage_config["location"]
     dj.config.object_storage.token_length = object_storage_config.get("token_length", 8)
 
+    # Configure 'local' store using same location
+    dj.config.object_storage.stores["local"] = {
+        "protocol": "file",
+        "location": object_storage_config["location"],
+    }
+
     yield object_storage_config
 
     # Restore original values
@@ -716,6 +868,8 @@ def mock_object_storage(object_storage_config):
     dj.config.object_storage.protocol = original["protocol"]
     dj.config.object_storage.location = original["location"]
     dj.config.object_storage.token_length = original["token_length"]
+    dj.config.object_storage.stores.clear()
+    dj.config.object_storage.stores.update(original["stores"])
 
 
 @pytest.fixture
diff --git a/tests/integration/test_autopopulate.py b/tests/integration/test_autopopulate.py
index 6bde3b49..de9dc95a 100644
--- a/tests/integration/test_autopopulate.py
+++ b/tests/integration/test_autopopulate.py
@@ -121,7 +121,7 @@ class Image(dj.Imported):
         definition = """
         -> ImageSource
         ---
-        image_data: <djblob>
+        image_data: <blob>
         """
 
         def make(self, key):
@@ -134,7 +134,7 @@ class Crop(dj.Computed):
         definition = """
         -> Image
         ---
-        crop_image: <djblob>
+        crop_image: <blob>
         """
 
         def make(self, key):
diff --git a/tests/integration/test_blob_matlab.py b/tests/integration/test_blob_matlab.py
index 8e5e9235..07f42660 100644
--- a/tests/integration/test_blob_matlab.py
+++ b/tests/integration/test_blob_matlab.py
@@ -11,7 +11,7 @@ class Blob(dj.Manual):
     id : int
     -----
     comment  :  varchar(255)
-    blob  : <djblob>
+    blob  : <blob>
     """
 
 
diff --git a/tests/integration/test_codec_chaining.py b/tests/integration/test_codec_chaining.py
new file mode 100644
index 00000000..defbd428
--- /dev/null
+++ b/tests/integration/test_codec_chaining.py
@@ -0,0 +1,368 @@
+"""
+Tests for codec chaining (composition).
+
+This tests the <blob@> → <hash> → json composition pattern
+and similar codec chains.
+"""
+
+from datajoint.codecs import (
+    Codec,
+    _codec_registry,
+    resolve_dtype,
+)
+
+
+class TestCodecChainResolution:
+    """Tests for resolving codec chains."""
+
+    def setup_method(self):
+        """Clear test codecs from registry before each test."""
+        for name in list(_codec_registry.keys()):
+            if name.startswith("test_"):
+                del _codec_registry[name]
+
+    def teardown_method(self):
+        """Clean up test codecs after each test."""
+        for name in list(_codec_registry.keys()):
+            if name.startswith("test_"):
+                del _codec_registry[name]
+
+    def test_single_codec_chain(self):
+        """Test resolving a single-codec chain."""
+
+        class TestSingle(Codec):
+            name = "test_single"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "varchar(100)"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return str(value)
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        final_dtype, chain, store = resolve_dtype("<test_single>")
+
+        assert final_dtype == "varchar(100)"
+        assert len(chain) == 1
+        assert chain[0].name == "test_single"
+        assert store is None
+
+    def test_two_codec_chain(self):
+        """Test resolving a two-codec chain."""
+
+        class TestInner(Codec):
+            name = "test_inner"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "bytes"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        class TestOuter(Codec):
+            name = "test_outer"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<test_inner>"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        final_dtype, chain, store = resolve_dtype("<test_outer>")
+
+        assert final_dtype == "bytes"
+        assert len(chain) == 2
+        assert chain[0].name == "test_outer"
+        assert chain[1].name == "test_inner"
+
+    def test_three_codec_chain(self):
+        """Test resolving a three-codec chain."""
+
+        class TestBase(Codec):
+            name = "test_base"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "json"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        class TestMiddle(Codec):
+            name = "test_middle"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<test_base>"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        class TestTop(Codec):
+            name = "test_top"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<test_middle>"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        final_dtype, chain, store = resolve_dtype("<test_top>")
+
+        assert final_dtype == "json"
+        assert len(chain) == 3
+        assert chain[0].name == "test_top"
+        assert chain[1].name == "test_middle"
+        assert chain[2].name == "test_base"
+
+
+class TestCodecChainEncodeDecode:
+    """Tests for encode/decode through codec chains."""
+
+    def setup_method(self):
+        """Clear test codecs from registry before each test."""
+        for name in list(_codec_registry.keys()):
+            if name.startswith("test_"):
+                del _codec_registry[name]
+
+    def teardown_method(self):
+        """Clean up test codecs after each test."""
+        for name in list(_codec_registry.keys()):
+            if name.startswith("test_"):
+                del _codec_registry[name]
+
+    def test_encode_order(self):
+        """Test that encode is applied outer → inner."""
+        encode_order = []
+
+        class TestInnerEnc(Codec):
+            name = "test_inner_enc"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "bytes"
+
+            def encode(self, value, *, key=None, store_name=None):
+                encode_order.append("inner")
+                return value + b"_inner"
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        class TestOuterEnc(Codec):
+            name = "test_outer_enc"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<test_inner_enc>"
+
+            def encode(self, value, *, key=None, store_name=None):
+                encode_order.append("outer")
+                return value + b"_outer"
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        _, chain, _ = resolve_dtype("<test_outer_enc>")
+
+        # Apply encode in order: outer first, then inner
+        value = b"start"
+        for codec in chain:
+            value = codec.encode(value)
+
+        assert encode_order == ["outer", "inner"]
+        assert value == b"start_outer_inner"
+
+    def test_decode_order(self):
+        """Test that decode is applied inner → outer (reverse of encode)."""
+        decode_order = []
+
+        class TestInnerDec(Codec):
+            name = "test_inner_dec"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "bytes"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                decode_order.append("inner")
+                return stored.replace(b"_inner", b"")
+
+        class TestOuterDec(Codec):
+            name = "test_outer_dec"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<test_inner_dec>"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                decode_order.append("outer")
+                return stored.replace(b"_outer", b"")
+
+        _, chain, _ = resolve_dtype("<test_outer_dec>")
+
+        # Apply decode in reverse order: inner first, then outer
+        value = b"start_outer_inner"
+        for codec in reversed(chain):
+            value = codec.decode(value)
+
+        assert decode_order == ["inner", "outer"]
+        assert value == b"start"
+
+    def test_roundtrip(self):
+        """Test encode/decode roundtrip through a codec chain."""
+
+        class TestInnerRt(Codec):
+            name = "test_inner_rt"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "bytes"
+
+            def encode(self, value, *, key=None, store_name=None):
+                # Compress (just add prefix for testing)
+                return b"COMPRESSED:" + value
+
+            def decode(self, stored, *, key=None):
+                # Decompress
+                return stored.replace(b"COMPRESSED:", b"")
+
+        class TestOuterRt(Codec):
+            name = "test_outer_rt"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<test_inner_rt>"
+
+            def encode(self, value, *, key=None, store_name=None):
+                # Serialize (just encode string for testing)
+                return str(value).encode("utf-8")
+
+            def decode(self, stored, *, key=None):
+                # Deserialize
+                return stored.decode("utf-8")
+
+        _, chain, _ = resolve_dtype("<test_outer_rt>")
+
+        # Original value
+        original = "test data"
+
+        # Encode: outer → inner
+        encoded = original
+        for codec in chain:
+            encoded = codec.encode(encoded)
+
+        assert encoded == b"COMPRESSED:test data"
+
+        # Decode: inner → outer (reversed)
+        decoded = encoded
+        for codec in reversed(chain):
+            decoded = codec.decode(decoded)
+
+        assert decoded == original
+
+
+class TestBuiltinCodecChains:
+    """Tests for built-in codec chains."""
+
+    def test_blob_internal_resolves_to_bytes(self):
+        """Test that <blob> (internal) → bytes."""
+        final_dtype, chain, _ = resolve_dtype("<blob>")
+
+        assert final_dtype == "bytes"
+        assert len(chain) == 1
+        assert chain[0].name == "blob"
+
+    def test_blob_external_resolves_to_json(self):
+        """Test that <blob@store> → <hash> → json."""
+        final_dtype, chain, store = resolve_dtype("<blob@store>")
+
+        assert final_dtype == "json"
+        assert len(chain) == 2
+        assert chain[0].name == "blob"
+        assert chain[1].name == "hash"
+        assert store == "store"
+
+    def test_attach_internal_resolves_to_bytes(self):
+        """Test that <attach> (internal) → bytes."""
+        final_dtype, chain, _ = resolve_dtype("<attach>")
+
+        assert final_dtype == "bytes"
+        assert len(chain) == 1
+        assert chain[0].name == "attach"
+
+    def test_attach_external_resolves_to_json(self):
+        """Test that <attach@store> → <hash> → json."""
+        final_dtype, chain, store = resolve_dtype("<attach@store>")
+
+        assert final_dtype == "json"
+        assert len(chain) == 2
+        assert chain[0].name == "attach"
+        assert chain[1].name == "hash"
+        assert store == "store"
+
+    def test_hash_external_resolves_to_json(self):
+        """Test that <hash@store> → json (external only)."""
+        final_dtype, chain, store = resolve_dtype("<hash@store>")
+
+        assert final_dtype == "json"
+        assert len(chain) == 1
+        assert chain[0].name == "hash"
+        assert store == "store"
+
+    def test_object_external_resolves_to_json(self):
+        """Test that <object@> → json (external only)."""
+        final_dtype, chain, store = resolve_dtype("<object@store>")
+
+        assert final_dtype == "json"
+        assert len(chain) == 1
+        assert chain[0].name == "object"
+        assert store == "store"
+
+    def test_filepath_external_resolves_to_json(self):
+        """Test that <filepath@> → json (external only)."""
+        final_dtype, chain, store = resolve_dtype("<filepath@store>")
+
+        assert final_dtype == "json"
+        assert len(chain) == 1
+        assert chain[0].name == "filepath"
+        assert store == "store"
+
+
+class TestStoreNameParsing:
+    """Tests for store name parsing in codec specs."""
+
+    def test_codec_with_store(self):
+        """Test parsing codec with store name."""
+        final_dtype, chain, store = resolve_dtype("<blob@mystore>")
+
+        assert final_dtype == "json"
+        assert store == "mystore"
+
+    def test_codec_without_store(self):
+        """Test parsing codec without store name."""
+        final_dtype, chain, store = resolve_dtype("<blob>")
+
+        assert store is None
+
+    def test_filepath_with_store(self):
+        """Test parsing filepath with store name."""
+        final_dtype, chain, store = resolve_dtype("<filepath@s3store>")
+
+        assert final_dtype == "json"
+        assert store == "s3store"
diff --git a/tests/integration/test_adapted_attributes.py b/tests/integration/test_codecs.py
similarity index 69%
rename from tests/integration/test_adapted_attributes.py
rename to tests/integration/test_codecs.py
index ee88c6fc..05b8aabe 100644
--- a/tests/integration/test_adapted_attributes.py
+++ b/tests/integration/test_codecs.py
@@ -1,7 +1,7 @@
 """
-Tests for adapted/custom attribute types.
+Tests for custom codecs.
 
-These tests verify the AttributeType system for custom data types.
+These tests verify the Codec system for custom data types.
 """
 
 from itertools import zip_longest
@@ -11,51 +11,51 @@
 
 import datajoint as dj
 
-from tests import schema_adapted
-from tests.schema_adapted import Connectivity, Layout
+from tests import schema_codecs
+from tests.schema_codecs import Connectivity, Layout
 
 
 @pytest.fixture
 def schema_name(prefix):
-    return prefix + "_test_custom_datatype"
+    return prefix + "_test_codecs"
 
 
 @pytest.fixture
-def schema_ad(
+def schema_codec(
     connection_test,
-    enable_filepath_feature,
     s3_creds,
     tmpdir,
     schema_name,
 ):
-    dj.config["stores"] = {"repo-s3": dict(s3_creds, protocol="s3", location="adapted/repo", stage=str(tmpdir))}
-    # Types are registered globally via @dj.register_type decorator in schema_adapted
-    context = {**schema_adapted.LOCALS_ADAPTED}
+    dj.config["stores"] = {"repo-s3": dict(s3_creds, protocol="s3", location="codecs/repo", stage=str(tmpdir))}
+    # Codecs are auto-registered via __init_subclass__ in schema_codecs
+    context = {**schema_codecs.LOCALS_CODECS}
     schema = dj.schema(schema_name, context=context, connection=connection_test)
-    schema(schema_adapted.Connectivity)
-    schema(schema_adapted.Layout)
+    schema(schema_codecs.Connectivity)
+    schema(schema_codecs.Layout)
     yield schema
     schema.drop()
 
 
 @pytest.fixture
-def local_schema(schema_ad, schema_name):
+def local_schema(schema_codec, schema_name):
     """Fixture for testing spawned classes"""
-    local_schema = dj.Schema(schema_name, connection=schema_ad.connection)
+    local_schema = dj.Schema(schema_name, connection=schema_codec.connection)
     local_schema.spawn_missing_classes()
     yield local_schema
-    # Don't drop - schema_ad fixture handles cleanup
+    # Don't drop - schema_codec fixture handles cleanup
 
 
 @pytest.fixture
-def schema_virtual_module(schema_ad, schema_name):
+def schema_virtual_module(schema_codec, schema_name):
     """Fixture for testing virtual modules"""
-    # Types are registered globally, no need to add_objects for adapters
-    schema_virtual_module = dj.VirtualModule("virtual_module", schema_name, connection=schema_ad.connection)
+    # Codecs are registered globally, no need to add_objects
+    schema_virtual_module = dj.VirtualModule("virtual_module", schema_name, connection=schema_codec.connection)
     return schema_virtual_module
 
 
-def test_adapted_type(schema_ad):
+def test_codec_graph(schema_codec):
+    """Test basic codec encode/decode with graph type."""
     c = Connectivity()
     graphs = [
         nx.lollipop_graph(4, 2),
@@ -72,8 +72,8 @@ def test_adapted_type(schema_ad):
     c.delete()
 
 
-def test_adapted_filepath_type(schema_ad, minio_client):
-    """https://github.com/datajoint/datajoint-python/issues/684"""
+def test_codec_chained(schema_codec, minio_client):
+    """Test codec chaining (layout -> blob)."""
     c = Connectivity()
     c.delete()
     c.insert1((0, nx.lollipop_graph(4, 2)))
@@ -89,7 +89,8 @@ def test_adapted_filepath_type(schema_ad, minio_client):
     c.delete()
 
 
-def test_adapted_spawned(local_schema):
+def test_codec_spawned(local_schema):
+    """Test codecs work with spawned classes."""
     c = Connectivity()  # a spawned class
     graphs = [
         nx.lollipop_graph(4, 2),
@@ -106,7 +107,8 @@ def test_adapted_spawned(local_schema):
     c.delete()
 
 
-def test_adapted_virtual(schema_virtual_module):
+def test_codec_virtual_module(schema_virtual_module):
+    """Test codecs work with virtual modules."""
     c = schema_virtual_module.Connectivity()
     graphs = [
         nx.lollipop_graph(4, 2),
diff --git a/tests/integration/test_fetch_same.py b/tests/integration/test_fetch_same.py
index ad830616..886af2b9 100644
--- a/tests/integration/test_fetch_same.py
+++ b/tests/integration/test_fetch_same.py
@@ -10,7 +10,7 @@ class ProjData(dj.Manual):
     ---
     resp : float
     sim  : float
-    big : <djblob>
+    big : <blob>
     blah : varchar(10)
     """
 
diff --git a/tests/integration/test_gc.py b/tests/integration/test_gc.py
index 2c312bcc..e0c5fafc 100644
--- a/tests/integration/test_gc.py
+++ b/tests/integration/test_gc.py
@@ -14,41 +14,45 @@ class TestUsesContentStorage:
     """Tests for _uses_content_storage helper function."""
 
     def test_returns_false_for_no_adapter(self):
-        """Test that False is returned when attribute has no adapter."""
+        """Test that False is returned when attribute has no codec."""
         attr = MagicMock()
-        attr.adapter = None
+        attr.codec = None
 
         assert gc._uses_content_storage(attr) is False
 
-    def test_returns_true_for_content_type(self):
-        """Test that True is returned for <content> type."""
+    def test_returns_true_for_hash_type(self):
+        """Test that True is returned for <hash@> type."""
         attr = MagicMock()
-        attr.adapter = MagicMock()
-        attr.adapter.type_name = "content"
+        attr.codec = MagicMock()
+        attr.codec.name = "hash"
+        attr.store = "mystore"
 
         assert gc._uses_content_storage(attr) is True
 
-    def test_returns_true_for_xblob_type(self):
-        """Test that True is returned for <xblob> type."""
+    def test_returns_true_for_blob_external(self):
+        """Test that True is returned for <blob@> type (external)."""
         attr = MagicMock()
-        attr.adapter = MagicMock()
-        attr.adapter.type_name = "xblob"
+        attr.codec = MagicMock()
+        attr.codec.name = "blob"
+        attr.store = "mystore"
 
         assert gc._uses_content_storage(attr) is True
 
-    def test_returns_true_for_xattach_type(self):
-        """Test that True is returned for <xattach> type."""
+    def test_returns_true_for_attach_external(self):
+        """Test that True is returned for <attach@> type (external)."""
         attr = MagicMock()
-        attr.adapter = MagicMock()
-        attr.adapter.type_name = "xattach"
+        attr.codec = MagicMock()
+        attr.codec.name = "attach"
+        attr.store = "mystore"
 
         assert gc._uses_content_storage(attr) is True
 
-    def test_returns_false_for_other_types(self):
-        """Test that False is returned for non-content types."""
+    def test_returns_false_for_blob_internal(self):
+        """Test that False is returned for <blob> internal storage."""
         attr = MagicMock()
-        attr.adapter = MagicMock()
-        attr.adapter.type_name = "djblob"
+        attr.codec = MagicMock()
+        attr.codec.name = "blob"
+        attr.store = None
 
         assert gc._uses_content_storage(attr) is False
 
@@ -89,25 +93,25 @@ class TestUsesObjectStorage:
     """Tests for _uses_object_storage helper function."""
 
     def test_returns_false_for_no_adapter(self):
-        """Test that False is returned when attribute has no adapter."""
+        """Test that False is returned when attribute has no codec."""
         attr = MagicMock()
-        attr.adapter = None
+        attr.codec = None
 
         assert gc._uses_object_storage(attr) is False
 
     def test_returns_true_for_object_type(self):
         """Test that True is returned for <object> type."""
         attr = MagicMock()
-        attr.adapter = MagicMock()
-        attr.adapter.type_name = "object"
+        attr.codec = MagicMock()
+        attr.codec.name = "object"
 
         assert gc._uses_object_storage(attr) is True
 
     def test_returns_false_for_other_types(self):
         """Test that False is returned for non-object types."""
         attr = MagicMock()
-        attr.adapter = MagicMock()
-        attr.adapter.type_name = "xblob"
+        attr.codec = MagicMock()
+        attr.codec.name = "blob"
 
         assert gc._uses_object_storage(attr) is False
 
diff --git a/tests/integration/test_relational_operand.py b/tests/integration/test_relational_operand.py
index d6580ee8..3f15a731 100644
--- a/tests/integration/test_relational_operand.py
+++ b/tests/integration/test_relational_operand.py
@@ -561,30 +561,42 @@ def test_restrictions_by_top(self, schema_simp_pop):
         ]
 
     def test_top_restriction_with_keywords(self, schema_simp_pop):
+        # dj.Top only guarantees which elements are selected, not their order
         select = SelectPK() & dj.Top(limit=9, order_by=["select desc"])
         key = KeyPK() & dj.Top(limit=9, order_by="key desc")
-        assert select.fetch(as_dict=True) == [
-            {"id": 2, "select": 8},
-            {"id": 2, "select": 6},
-            {"id": 1, "select": 4},
-            {"id": 2, "select": 4},
-            {"id": 1, "select": 3},
-            {"id": 1, "select": 2},
-            {"id": 2, "select": 2},
-            {"id": 1, "select": 1},
-            {"id": 0, "select": 0},
-        ]
-        assert key.fetch(as_dict=True) == [
-            {"id": 2, "key": 6},
-            {"id": 2, "key": 5},
-            {"id": 1, "key": 5},
-            {"id": 0, "key": 4},
-            {"id": 1, "key": 4},
-            {"id": 2, "key": 4},
-            {"id": 0, "key": 3},
-            {"id": 1, "key": 3},
-            {"id": 2, "key": 3},
-        ]
+        # Convert to sets of tuples for order-independent comparison
+        select_result = {tuple(sorted(d.items())) for d in select.fetch(as_dict=True)}
+        select_expected = {
+            tuple(sorted(d.items()))
+            for d in [
+                {"id": 2, "select": 8},
+                {"id": 2, "select": 6},
+                {"id": 1, "select": 4},
+                {"id": 2, "select": 4},
+                {"id": 1, "select": 3},
+                {"id": 1, "select": 2},
+                {"id": 2, "select": 2},
+                {"id": 1, "select": 1},
+                {"id": 0, "select": 0},
+            ]
+        }
+        assert select_result == select_expected
+        key_result = {tuple(sorted(d.items())) for d in key.fetch(as_dict=True)}
+        key_expected = {
+            tuple(sorted(d.items()))
+            for d in [
+                {"id": 2, "key": 6},
+                {"id": 2, "key": 5},
+                {"id": 1, "key": 5},
+                {"id": 0, "key": 4},
+                {"id": 1, "key": 4},
+                {"id": 2, "key": 4},
+                {"id": 0, "key": 3},
+                {"id": 1, "key": 3},
+                {"id": 2, "key": 3},
+            ]
+        }
+        assert key_result == key_expected
 
     def test_top_errors(self, schema_simp_pop):
         with pytest.raises(DataJointError) as err1:
diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py
index 36a15493..d463ccf4 100644
--- a/tests/integration/test_schema.py
+++ b/tests/integration/test_schema.py
@@ -66,8 +66,10 @@ def test_schema_list(schema_any):
     assert schema_any.database in schemas
 
 
-def test_drop_unauthorized():
-    info_schema = dj.schema("information_schema")
+@pytest.mark.requires_mysql
+def test_drop_unauthorized(connection_test):
+    """Test that dropping information_schema raises AccessError."""
+    info_schema = dj.schema("information_schema", connection=connection_test)
     with pytest.raises(dj.errors.AccessError):
         info_schema.drop()
 
diff --git a/tests/integration/test_type_composition.py b/tests/integration/test_type_composition.py
deleted file mode 100644
index 0b51b3d6..00000000
--- a/tests/integration/test_type_composition.py
+++ /dev/null
@@ -1,352 +0,0 @@
-"""
-Tests for type composition (type chain encoding/decoding).
-
-This tests the <xblob> → <content> → json composition pattern
-and similar type chains.
-"""
-
-from datajoint.attribute_type import (
-    AttributeType,
-    _type_registry,
-    register_type,
-    resolve_dtype,
-)
-
-
-class TestTypeChainResolution:
-    """Tests for resolving type chains."""
-
-    def setup_method(self):
-        """Clear test types from registry before each test."""
-        for name in list(_type_registry.keys()):
-            if name.startswith("test_"):
-                del _type_registry[name]
-
-    def teardown_method(self):
-        """Clean up test types after each test."""
-        for name in list(_type_registry.keys()):
-            if name.startswith("test_"):
-                del _type_registry[name]
-
-    def test_single_type_chain(self):
-        """Test resolving a single-type chain."""
-
-        @register_type
-        class TestSingle(AttributeType):
-            type_name = "test_single"
-            dtype = "varchar(100)"
-
-            def encode(self, value, *, key=None, store_name=None):
-                return str(value)
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        final_dtype, chain, store = resolve_dtype("<test_single>")
-
-        assert final_dtype == "varchar(100)"
-        assert len(chain) == 1
-        assert chain[0].type_name == "test_single"
-        assert store is None
-
-    def test_two_type_chain(self):
-        """Test resolving a two-type chain."""
-
-        @register_type
-        class TestInner(AttributeType):
-            type_name = "test_inner"
-            dtype = "longblob"
-
-            def encode(self, value, *, key=None, store_name=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        @register_type
-        class TestOuter(AttributeType):
-            type_name = "test_outer"
-            dtype = "<test_inner>"
-
-            def encode(self, value, *, key=None, store_name=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        final_dtype, chain, store = resolve_dtype("<test_outer>")
-
-        assert final_dtype == "longblob"
-        assert len(chain) == 2
-        assert chain[0].type_name == "test_outer"
-        assert chain[1].type_name == "test_inner"
-
-    def test_three_type_chain(self):
-        """Test resolving a three-type chain."""
-
-        @register_type
-        class TestBase(AttributeType):
-            type_name = "test_base"
-            dtype = "json"
-
-            def encode(self, value, *, key=None, store_name=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        @register_type
-        class TestMiddle(AttributeType):
-            type_name = "test_middle"
-            dtype = "<test_base>"
-
-            def encode(self, value, *, key=None, store_name=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        @register_type
-        class TestTop(AttributeType):
-            type_name = "test_top"
-            dtype = "<test_middle>"
-
-            def encode(self, value, *, key=None, store_name=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        final_dtype, chain, store = resolve_dtype("<test_top>")
-
-        assert final_dtype == "json"
-        assert len(chain) == 3
-        assert chain[0].type_name == "test_top"
-        assert chain[1].type_name == "test_middle"
-        assert chain[2].type_name == "test_base"
-
-
-class TestTypeChainEncodeDecode:
-    """Tests for encode/decode through type chains."""
-
-    def setup_method(self):
-        """Clear test types from registry before each test."""
-        for name in list(_type_registry.keys()):
-            if name.startswith("test_"):
-                del _type_registry[name]
-
-    def teardown_method(self):
-        """Clean up test types after each test."""
-        for name in list(_type_registry.keys()):
-            if name.startswith("test_"):
-                del _type_registry[name]
-
-    def test_encode_order(self):
-        """Test that encode is applied outer → inner."""
-        encode_order = []
-
-        @register_type
-        class TestInnerEnc(AttributeType):
-            type_name = "test_inner_enc"
-            dtype = "longblob"
-
-            def encode(self, value, *, key=None, store_name=None):
-                encode_order.append("inner")
-                return value + b"_inner"
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        @register_type
-        class TestOuterEnc(AttributeType):
-            type_name = "test_outer_enc"
-            dtype = "<test_inner_enc>"
-
-            def encode(self, value, *, key=None, store_name=None):
-                encode_order.append("outer")
-                return value + b"_outer"
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        _, chain, _ = resolve_dtype("<test_outer_enc>")
-
-        # Apply encode in order: outer first, then inner
-        value = b"start"
-        for attr_type in chain:
-            value = attr_type.encode(value)
-
-        assert encode_order == ["outer", "inner"]
-        assert value == b"start_outer_inner"
-
-    def test_decode_order(self):
-        """Test that decode is applied inner → outer (reverse of encode)."""
-        decode_order = []
-
-        @register_type
-        class TestInnerDec(AttributeType):
-            type_name = "test_inner_dec"
-            dtype = "longblob"
-
-            def encode(self, value, *, key=None, store_name=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                decode_order.append("inner")
-                return stored.replace(b"_inner", b"")
-
-        @register_type
-        class TestOuterDec(AttributeType):
-            type_name = "test_outer_dec"
-            dtype = "<test_inner_dec>"
-
-            def encode(self, value, *, key=None, store_name=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                decode_order.append("outer")
-                return stored.replace(b"_outer", b"")
-
-        _, chain, _ = resolve_dtype("<test_outer_dec>")
-
-        # Apply decode in reverse order: inner first, then outer
-        value = b"start_outer_inner"
-        for attr_type in reversed(chain):
-            value = attr_type.decode(value)
-
-        assert decode_order == ["inner", "outer"]
-        assert value == b"start"
-
-    def test_roundtrip(self):
-        """Test encode/decode roundtrip through a type chain."""
-
-        @register_type
-        class TestInnerRt(AttributeType):
-            type_name = "test_inner_rt"
-            dtype = "longblob"
-
-            def encode(self, value, *, key=None, store_name=None):
-                # Compress (just add prefix for testing)
-                return b"COMPRESSED:" + value
-
-            def decode(self, stored, *, key=None):
-                # Decompress
-                return stored.replace(b"COMPRESSED:", b"")
-
-        @register_type
-        class TestOuterRt(AttributeType):
-            type_name = "test_outer_rt"
-            dtype = "<test_inner_rt>"
-
-            def encode(self, value, *, key=None, store_name=None):
-                # Serialize (just encode string for testing)
-                return str(value).encode("utf-8")
-
-            def decode(self, stored, *, key=None):
-                # Deserialize
-                return stored.decode("utf-8")
-
-        _, chain, _ = resolve_dtype("<test_outer_rt>")
-
-        # Original value
-        original = "test data"
-
-        # Encode: outer → inner
-        encoded = original
-        for attr_type in chain:
-            encoded = attr_type.encode(encoded)
-
-        assert encoded == b"COMPRESSED:test data"
-
-        # Decode: inner → outer (reversed)
-        decoded = encoded
-        for attr_type in reversed(chain):
-            decoded = attr_type.decode(decoded)
-
-        assert decoded == original
-
-
-class TestBuiltinTypeComposition:
-    """Tests for built-in type composition."""
-
-    def test_xblob_resolves_to_json(self):
-        """Test that <xblob> → <content> → json."""
-        final_dtype, chain, _ = resolve_dtype("<xblob>")
-
-        assert final_dtype == "json"
-        assert len(chain) == 2
-        assert chain[0].type_name == "xblob"
-        assert chain[1].type_name == "content"
-
-    def test_xattach_resolves_to_json(self):
-        """Test that <xattach> → <content> → json."""
-        final_dtype, chain, _ = resolve_dtype("<xattach>")
-
-        assert final_dtype == "json"
-        assert len(chain) == 2
-        assert chain[0].type_name == "xattach"
-        assert chain[1].type_name == "content"
-
-    def test_djblob_resolves_to_longblob(self):
-        """Test that <djblob> → longblob (no chain)."""
-        final_dtype, chain, _ = resolve_dtype("<djblob>")
-
-        assert final_dtype == "longblob"
-        assert len(chain) == 1
-        assert chain[0].type_name == "djblob"
-
-    def test_content_resolves_to_json(self):
-        """Test that <content> → json."""
-        final_dtype, chain, _ = resolve_dtype("<content>")
-
-        assert final_dtype == "json"
-        assert len(chain) == 1
-        assert chain[0].type_name == "content"
-
-    def test_object_resolves_to_json(self):
-        """Test that <object> → json."""
-        final_dtype, chain, _ = resolve_dtype("<object>")
-
-        assert final_dtype == "json"
-        assert len(chain) == 1
-        assert chain[0].type_name == "object"
-
-    def test_attach_resolves_to_longblob(self):
-        """Test that <attach> → longblob."""
-        final_dtype, chain, _ = resolve_dtype("<attach>")
-
-        assert final_dtype == "longblob"
-        assert len(chain) == 1
-        assert chain[0].type_name == "attach"
-
-    def test_filepath_resolves_to_json(self):
-        """Test that <filepath> → json."""
-        final_dtype, chain, _ = resolve_dtype("<filepath>")
-
-        assert final_dtype == "json"
-        assert len(chain) == 1
-        assert chain[0].type_name == "filepath"
-
-
-class TestStoreNameParsing:
-    """Tests for store name parsing in type specs."""
-
-    def test_type_with_store(self):
-        """Test parsing type with store name."""
-        final_dtype, chain, store = resolve_dtype("<xblob@mystore>")
-
-        assert final_dtype == "json"
-        assert store == "mystore"
-
-    def test_type_without_store(self):
-        """Test parsing type without store name."""
-        final_dtype, chain, store = resolve_dtype("<xblob>")
-
-        assert store is None
-
-    def test_filepath_with_store(self):
-        """Test parsing filepath with store name."""
-        final_dtype, chain, store = resolve_dtype("<filepath@s3store>")
-
-        assert final_dtype == "json"
-        assert store == "s3store"
diff --git a/tests/integration/test_update1.py b/tests/integration/test_update1.py
index d09f70c4..eb525a6b 100644
--- a/tests/integration/test_update1.py
+++ b/tests/integration/test_update1.py
@@ -14,8 +14,8 @@ class Thing(dj.Manual):
     ---
     number=0  : int
     frac    : float
-    picture = null    :   <xattach@update_store>
-    params = null  : <djblob>
+    picture = null    :   <attach@update_store>
+    params = null  : <blob>
     img_file = null: <filepath@update_repo>
     timestamp = CURRENT_TIMESTAMP :   datetime
     """
@@ -57,7 +57,7 @@ def schema_update1(connection_test, prefix):
     schema.drop()
 
 
-def test_update1(tmpdir, enable_filepath_feature, schema_update1, mock_stores_update):
+def test_update1(tmpdir, schema_update1, mock_stores_update):
     """Test normal updates"""
     # CHECK 1 -- initial insert
     key = dict(thing=1)
@@ -128,19 +128,19 @@ def test_update1(tmpdir, enable_filepath_feature, schema_update1, mock_stores_up
     assert original_file_data == final_file_data
 
 
-def test_update1_nonexistent(enable_filepath_feature, schema_update1, mock_stores_update):
+def test_update1_nonexistent(schema_update1, mock_stores_update):
     with pytest.raises(DataJointError):
         # updating a non-existent entry
         Thing.update1(dict(thing=100, frac=0.5))
 
 
-def test_update1_noprimary(enable_filepath_feature, schema_update1, mock_stores_update):
+def test_update1_noprimary(schema_update1, mock_stores_update):
     with pytest.raises(DataJointError):
         # missing primary key
         Thing.update1(dict(number=None))
 
 
-def test_update1_misspelled_attribute(enable_filepath_feature, schema_update1, mock_stores_update):
+def test_update1_misspelled_attribute(schema_update1, mock_stores_update):
     key = dict(thing=17)
     Thing.insert1(dict(key, frac=1.5))
     with pytest.raises(DataJointError):
diff --git a/tests/schema.py b/tests/schema.py
index b4ffa7f0..99a7c457 100644
--- a/tests/schema.py
+++ b/tests/schema.py
@@ -200,8 +200,8 @@ class Channel(dj.Part):
         -> master
         channel    :tinyint unsigned   # channel number within Ephys
         ----
-        voltage    : <djblob>
-        current = null : <djblob>   # optional current to test null handling
+        voltage    : <blob>
+        current = null : <blob>   # optional current to test null handling
         """
 
     def _make_tuples(self, key):
@@ -228,7 +228,7 @@ class Image(dj.Manual):
     # table for testing blob inserts
     id           : int # image identifier
     ---
-    img             : <djblob> # image
+    img             : <blob> # image
     """
 
 
@@ -454,7 +454,7 @@ class Longblob(dj.Manual):
     definition = """
     id: int
     ---
-    data: <djblob>
+    data: <blob>
     """
 
 
diff --git a/tests/schema_adapted.py b/tests/schema_adapted.py
deleted file mode 100644
index a2b3e492..00000000
--- a/tests/schema_adapted.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import inspect
-
-import networkx as nx
-
-import datajoint as dj
-
-
-@dj.register_type
-class GraphType(dj.AttributeType):
-    """Custom type for storing NetworkX graphs as edge lists."""
-
-    type_name = "graph"
-    dtype = "<djblob>"  # Use djblob for proper serialization
-
-    def encode(self, obj, *, key=None):
-        """Convert graph object into an edge list."""
-        assert isinstance(obj, nx.Graph)
-        return list(obj.edges)
-
-    def decode(self, stored, *, key=None):
-        """Convert edge list into a graph."""
-        return nx.Graph(stored)
-
-
-@dj.register_type
-class LayoutToFilepathType(dj.AttributeType):
-    """Custom type that saves a graph layout as serialized JSON blob."""
-
-    type_name = "layout_to_filepath"
-    dtype = "<djblob>"  # Use djblob for serialization
-
-    def encode(self, layout, *, key=None):
-        """Serialize layout dict."""
-        return layout  # djblob handles serialization
-
-    def decode(self, stored, *, key=None):
-        """Deserialize layout dict."""
-        return stored  # djblob handles deserialization
-
-
-class Connectivity(dj.Manual):
-    definition = """
-    connid : int
-    ---
-    conn_graph = null : <graph>
-    """
-
-
-class Layout(dj.Manual):
-    definition = """
-    # stores graph layout
-    -> Connectivity
-    ---
-    layout: <layout_to_filepath>
-    """
-
-
-LOCALS_ADAPTED = {k: v for k, v in locals().items() if inspect.isclass(v)}
-__all__ = list(LOCALS_ADAPTED)
diff --git a/tests/schema_alter.py b/tests/schema_alter.py
index 6f18448e..ef8b35f0 100644
--- a/tests/schema_alter.py
+++ b/tests/schema_alter.py
@@ -20,7 +20,7 @@ class Experiment(dj.Imported):
     experiment_id  :smallint  # experiment number for this subject
     ---
     data_path     : int  # some number
-    extra=null : <djblob>  # just testing
+    extra=null : <blob>  # just testing
     -> [nullable] User
     subject_notes=null         :varchar(2048) # {notes} e.g. purpose of experiment
     entry_time=CURRENT_TIMESTAMP :timestamp   # automatic timestamp
diff --git a/tests/schema_codecs.py b/tests/schema_codecs.py
new file mode 100644
index 00000000..6a8d478d
--- /dev/null
+++ b/tests/schema_codecs.py
@@ -0,0 +1,63 @@
+import inspect
+
+import networkx as nx
+
+import datajoint as dj
+
+
+class GraphCodec(dj.Codec):
+    """Custom codec for storing NetworkX graphs as edge lists."""
+
+    name = "graph"
+
+    def get_dtype(self, is_external: bool) -> str:
+        """Chain to blob for serialization."""
+        return "<blob>"
+
+    def encode(self, obj, *, key=None, store_name=None):
+        """Convert graph object into an edge list."""
+        assert isinstance(obj, nx.Graph)
+        return list(obj.edges)
+
+    def decode(self, stored, *, key=None):
+        """Convert edge list into a graph."""
+        return nx.Graph(stored)
+
+
+class LayoutCodec(dj.Codec):
+    """Custom codec that saves a graph layout as serialized blob."""
+
+    name = "layout"
+
+    def get_dtype(self, is_external: bool) -> str:
+        """Chain to blob for serialization."""
+        return "<blob>"
+
+    def encode(self, layout, *, key=None, store_name=None):
+        """Serialize layout dict."""
+        return layout  # blob handles serialization
+
+    def decode(self, stored, *, key=None):
+        """Deserialize layout dict."""
+        return stored  # blob handles deserialization
+
+
+class Connectivity(dj.Manual):
+    definition = """
+    connid : int
+    ---
+    conn_graph = null : <graph>
+    """
+
+
+class Layout(dj.Manual):
+    definition = """
+    # stores graph layout
+    -> Connectivity
+    ---
+    layout: <layout>
+    """
+
+
+LOCALS_CODECS = {k: v for k, v in locals().items() if inspect.isclass(v)}
+__all__ = list(LOCALS_CODECS)
diff --git a/tests/schema_external.py b/tests/schema_external.py
index 5a2db1e8..ae1803f5 100644
--- a/tests/schema_external.py
+++ b/tests/schema_external.py
@@ -13,7 +13,7 @@ class Simple(dj.Manual):
     definition = """
     simple  : int
     ---
-    item  : <xblob@local>
+    item  : <blob@local>
     """
 
 
@@ -21,7 +21,7 @@ class SimpleRemote(dj.Manual):
     definition = """
     simple  : int
     ---
-    item  : <xblob@share>
+    item  : <blob@share>
     """
 
 
@@ -36,7 +36,7 @@ class Dimension(dj.Lookup):
     definition = """
     dim  : int
     ---
-    dimensions  : <djblob>
+    dimensions  : <blob>
     """
     contents = ([0, [100, 50]], [1, [3, 4, 8, 6]])
 
@@ -47,8 +47,8 @@ class Image(dj.Computed):
     -> Seed
     -> Dimension
     ----
-    img : <xblob@share>     #  objects are stored as specified by dj.config['stores']['share']
-    neg : <xblob@local>   # objects are stored as specified by dj.config['stores']['local']
+    img : <blob@share>     #  objects are stored as specified by dj.config['stores']['share']
+    neg : <blob@local>   # objects are stored as specified by dj.config['stores']['local']
     """
 
     def make(self, key):
@@ -62,7 +62,7 @@ class Attach(dj.Manual):
     # table for storing attachments
     attach : int
     ----
-    img : <xattach@share>    #  attachments are stored as specified by: dj.config['stores']['share']
+    img : <attach@share>    #  attachments are stored as specified by: dj.config['stores']['share']
     txt : <attach>      #  attachments are stored directly in the database
     """
 
diff --git a/tests/schema_object.py b/tests/schema_object.py
index 7caf7e16..ef1d957d 100644
--- a/tests/schema_object.py
+++ b/tests/schema_object.py
@@ -13,7 +13,7 @@ class ObjectFile(dj.Manual):
     definition = """
     file_id : int
     ---
-    data_file : <object>  # stored file
+    data_file : <object@local>  # stored file
     """
 
 
@@ -23,7 +23,7 @@ class ObjectFolder(dj.Manual):
     definition = """
     folder_id : int
     ---
-    data_folder : <object>  # stored folder
+    data_folder : <object@local>  # stored folder
     """
 
 
@@ -33,8 +33,8 @@ class ObjectMultiple(dj.Manual):
     definition = """
     record_id : int
     ---
-    raw_data : <object>    # raw data file
-    processed : <object>   # processed data file
+    raw_data : <object@local>    # raw data file
+    processed : <object@local>   # processed data file
     """
 
 
@@ -46,6 +46,6 @@ class ObjectWithOther(dj.Manual):
     session_id : int
     ---
     name : varchar(100)
-    data_file : <object>
+    data_file : <object@local>
     notes : varchar(255)
     """
diff --git a/tests/schema_simple.py b/tests/schema_simple.py
index 0d4ebd53..3ac71469 100644
--- a/tests/schema_simple.py
+++ b/tests/schema_simple.py
@@ -250,7 +250,7 @@ class TTestUpdate(dj.Lookup):
     ---
     string_attr     : varchar(255)
     num_attr=null   : float
-    blob_attr       : <djblob>
+    blob_attr       : <blob>
     """
 
     contents = [
diff --git a/tests/unit/test_attribute_type.py b/tests/unit/test_attribute_type.py
deleted file mode 100644
index afc6674a..00000000
--- a/tests/unit/test_attribute_type.py
+++ /dev/null
@@ -1,415 +0,0 @@
-"""
-Tests for the new AttributeType system.
-"""
-
-import pytest
-
-import datajoint as dj
-from datajoint.attribute_type import (
-    AttributeType,
-    _type_registry,
-    get_type,
-    is_type_registered,
-    list_types,
-    register_type,
-    resolve_dtype,
-    unregister_type,
-)
-from datajoint.errors import DataJointError
-
-
-class TestAttributeTypeRegistry:
-    """Tests for the type registry functionality."""
-
-    def setup_method(self):
-        """Clear any test types from registry before each test."""
-        for name in list(_type_registry.keys()):
-            if name.startswith("test_"):
-                del _type_registry[name]
-
-    def teardown_method(self):
-        """Clean up test types after each test."""
-        for name in list(_type_registry.keys()):
-            if name.startswith("test_"):
-                del _type_registry[name]
-
-    def test_register_type_decorator(self):
-        """Test registering a type using the decorator."""
-
-        @register_type
-        class TestType(AttributeType):
-            type_name = "test_decorator"
-            dtype = "longblob"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        assert is_type_registered("test_decorator")
-        assert get_type("test_decorator").type_name == "test_decorator"
-
-    def test_register_type_direct(self):
-        """Test registering a type by calling register_type directly."""
-
-        class TestType(AttributeType):
-            type_name = "test_direct"
-            dtype = "varchar(255)"
-
-            def encode(self, value, *, key=None):
-                return str(value)
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        register_type(TestType)
-        assert is_type_registered("test_direct")
-
-    def test_register_type_idempotent(self):
-        """Test that registering the same type twice is idempotent."""
-
-        @register_type
-        class TestType(AttributeType):
-            type_name = "test_idempotent"
-            dtype = "int"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        # Second registration should not raise
-        register_type(TestType)
-        assert is_type_registered("test_idempotent")
-
-    def test_register_duplicate_name_different_class(self):
-        """Test that registering different classes with same name raises error."""
-
-        @register_type
-        class TestType1(AttributeType):
-            type_name = "test_duplicate"
-            dtype = "int"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        class TestType2(AttributeType):
-            type_name = "test_duplicate"
-            dtype = "varchar(100)"
-
-            def encode(self, value, *, key=None):
-                return str(value)
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        with pytest.raises(DataJointError, match="already registered"):
-            register_type(TestType2)
-
-    def test_unregister_type(self):
-        """Test unregistering a type."""
-
-        @register_type
-        class TestType(AttributeType):
-            type_name = "test_unregister"
-            dtype = "int"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        assert is_type_registered("test_unregister")
-        unregister_type("test_unregister")
-        assert not is_type_registered("test_unregister")
-
-    def test_get_type_not_found(self):
-        """Test that getting an unregistered type raises error."""
-        with pytest.raises(DataJointError, match="Unknown attribute type"):
-            get_type("nonexistent_type")
-
-    def test_list_types(self):
-        """Test listing registered types."""
-
-        @register_type
-        class TestType(AttributeType):
-            type_name = "test_list"
-            dtype = "int"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        types = list_types()
-        assert "test_list" in types
-        assert types == sorted(types)  # Should be sorted
-
-    def test_get_type_strips_brackets(self):
-        """Test that get_type accepts names with or without angle brackets."""
-
-        @register_type
-        class TestType(AttributeType):
-            type_name = "test_brackets"
-            dtype = "int"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        assert get_type("test_brackets") is get_type("<test_brackets>")
-
-
-class TestAttributeTypeValidation:
-    """Tests for the validate method."""
-
-    def setup_method(self):
-        for name in list(_type_registry.keys()):
-            if name.startswith("test_"):
-                del _type_registry[name]
-
-    def teardown_method(self):
-        for name in list(_type_registry.keys()):
-            if name.startswith("test_"):
-                del _type_registry[name]
-
-    def test_validate_called_default(self):
-        """Test that default validate accepts any value."""
-
-        @register_type
-        class TestType(AttributeType):
-            type_name = "test_validate_default"
-            dtype = "longblob"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        t = get_type("test_validate_default")
-        # Default validate should not raise for any value
-        t.validate(None)
-        t.validate(42)
-        t.validate("string")
-        t.validate([1, 2, 3])
-
-    def test_validate_custom(self):
-        """Test custom validation logic."""
-
-        @register_type
-        class PositiveIntType(AttributeType):
-            type_name = "test_positive_int"
-            dtype = "int"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-            def validate(self, value):
-                if not isinstance(value, int):
-                    raise TypeError(f"Expected int, got {type(value).__name__}")
-                if value < 0:
-                    raise ValueError("Value must be positive")
-
-        t = get_type("test_positive_int")
-        t.validate(42)  # Should pass
-
-        with pytest.raises(TypeError):
-            t.validate("not an int")
-
-        with pytest.raises(ValueError):
-            t.validate(-1)
-
-
-class TestTypeChaining:
-    """Tests for type chaining (dtype referencing another custom type)."""
-
-    def setup_method(self):
-        for name in list(_type_registry.keys()):
-            if name.startswith("test_"):
-                del _type_registry[name]
-
-    def teardown_method(self):
-        for name in list(_type_registry.keys()):
-            if name.startswith("test_"):
-                del _type_registry[name]
-
-    def test_resolve_native_dtype(self):
-        """Test resolving a native dtype."""
-        final_dtype, chain, store = resolve_dtype("longblob")
-        assert final_dtype == "longblob"
-        assert chain == []
-        assert store is None
-
-    def test_resolve_custom_dtype(self):
-        """Test resolving a custom dtype."""
-
-        @register_type
-        class TestType(AttributeType):
-            type_name = "test_resolve"
-            dtype = "varchar(100)"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        final_dtype, chain, store = resolve_dtype("<test_resolve>")
-        assert final_dtype == "varchar(100)"
-        assert len(chain) == 1
-        assert chain[0].type_name == "test_resolve"
-        assert store is None
-
-    def test_resolve_chained_dtype(self):
-        """Test resolving a chained dtype."""
-
-        @register_type
-        class InnerType(AttributeType):
-            type_name = "test_inner"
-            dtype = "longblob"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        @register_type
-        class OuterType(AttributeType):
-            type_name = "test_outer"
-            dtype = "<test_inner>"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        final_dtype, chain, store = resolve_dtype("<test_outer>")
-        assert final_dtype == "longblob"
-        assert len(chain) == 2
-        assert chain[0].type_name == "test_outer"
-        assert chain[1].type_name == "test_inner"
-        assert store is None
-
-    def test_circular_reference_detection(self):
-        """Test that circular type references are detected."""
-
-        @register_type
-        class TypeA(AttributeType):
-            type_name = "test_circular_a"
-            dtype = "<test_circular_b>"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        @register_type
-        class TypeB(AttributeType):
-            type_name = "test_circular_b"
-            dtype = "<test_circular_a>"
-
-            def encode(self, value, *, key=None):
-                return value
-
-            def decode(self, stored, *, key=None):
-                return stored
-
-        with pytest.raises(DataJointError, match="Circular type reference"):
-            resolve_dtype("<test_circular_a>")
-
-
-class TestExportsAndAPI:
-    """Test that the public API is properly exported."""
-
-    def test_exports_from_datajoint(self):
-        """Test that AttributeType and helpers are exported from datajoint."""
-        assert hasattr(dj, "AttributeType")
-        assert hasattr(dj, "register_type")
-        assert hasattr(dj, "list_types")
-
-
-class TestDJBlobType:
-    """Tests for the built-in DJBlobType."""
-
-    def test_djblob_is_registered(self):
-        """Test that djblob is automatically registered."""
-        assert is_type_registered("djblob")
-
-    def test_djblob_properties(self):
-        """Test DJBlobType properties."""
-        blob_type = get_type("djblob")
-        assert blob_type.type_name == "djblob"
-        assert blob_type.dtype == "longblob"
-
-    def test_djblob_encode_decode_roundtrip(self):
-        """Test that encode/decode is a proper roundtrip."""
-        import numpy as np
-
-        blob_type = get_type("djblob")
-
-        # Test with various data types
-        test_data = [
-            {"key": "value", "number": 42},
-            [1, 2, 3, 4, 5],
-            np.array([1.0, 2.0, 3.0]),
-            "simple string",
-            (1, 2, 3),
-            None,
-        ]
-
-        for original in test_data:
-            encoded = blob_type.encode(original)
-            assert isinstance(encoded, bytes)
-            decoded = blob_type.decode(encoded)
-            if isinstance(original, np.ndarray):
-                np.testing.assert_array_equal(decoded, original)
-            else:
-                assert decoded == original
-
-    def test_djblob_encode_produces_valid_blob_format(self):
-        """Test that encoded data has valid blob protocol header."""
-        blob_type = get_type("djblob")
-        encoded = blob_type.encode({"test": "data"})
-
-        # Should start with compression prefix or protocol header
-        valid_prefixes = (b"ZL123\0", b"mYm\0", b"dj0\0")
-        assert any(encoded.startswith(p) for p in valid_prefixes)
-
-    def test_djblob_in_list_types(self):
-        """Test that djblob appears in list_types."""
-        types = list_types()
-        assert "djblob" in types
-
-    def test_djblob_handles_serialization(self):
-        """Test that DJBlobType handles serialization internally.
-
-        With the new design:
-        - Plain longblob columns store/return raw bytes (no serialization)
-        - <djblob> handles pack/unpack in encode/decode
-        """
-        blob_type = get_type("djblob")
-
-        # DJBlobType.encode() should produce packed bytes
-        data = {"key": "value"}
-        encoded = blob_type.encode(data)
-        assert isinstance(encoded, bytes)
-
-        # DJBlobType.decode() should unpack back to original
-        decoded = blob_type.decode(encoded)
-        assert decoded == data
diff --git a/tests/unit/test_codecs.py b/tests/unit/test_codecs.py
new file mode 100644
index 00000000..ada62674
--- /dev/null
+++ b/tests/unit/test_codecs.py
@@ -0,0 +1,429 @@
+"""
+Tests for the Codec system.
+"""
+
+import pytest
+
+import datajoint as dj
+from datajoint.codecs import (
+    Codec,
+    _codec_registry,
+    get_codec,
+    is_codec_registered,
+    list_codecs,
+    resolve_dtype,
+    unregister_codec,
+)
+from datajoint.errors import DataJointError
+
+
+class TestCodecRegistry:
+    """Tests for the codec registry functionality."""
+
+    def setup_method(self):
+        """Clear any test codecs from registry before each test."""
+        for name in list(_codec_registry.keys()):
+            if name.startswith("test_"):
+                del _codec_registry[name]
+
+    def teardown_method(self):
+        """Clean up test codecs after each test."""
+        for name in list(_codec_registry.keys()):
+            if name.startswith("test_"):
+                del _codec_registry[name]
+
+    def test_register_codec_auto(self):
+        """Test auto-registration via __init_subclass__."""
+
+        class TestCodec(Codec):
+            name = "test_decorator"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "bytes"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        assert is_codec_registered("test_decorator")
+        assert get_codec("test_decorator").name == "test_decorator"
+
+    def test_register_codec_skip(self):
+        """Test skipping registration with register=False."""
+
+        class TestCodec(Codec, register=False):
+            name = "test_skip"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "varchar(255)"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return str(value)
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        assert not is_codec_registered("test_skip")
+
+    def test_register_codec_idempotent(self):
+        """Test that defining the same codec class twice is idempotent."""
+
+        class TestCodec(Codec):
+            name = "test_idempotent"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "int32"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        # Redefine the same name should not raise (same class)
+        assert is_codec_registered("test_idempotent")
+
+    def test_register_duplicate_name_different_class(self):
+        """Test that registering different classes with same name raises error."""
+
+        class TestCodec1(Codec):
+            name = "test_duplicate"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "int32"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        with pytest.raises(DataJointError, match="already registered"):
+
+            class TestCodec2(Codec):
+                name = "test_duplicate"
+
+                def get_dtype(self, is_external: bool) -> str:
+                    return "varchar(100)"
+
+                def encode(self, value, *, key=None, store_name=None):
+                    return str(value)
+
+                def decode(self, stored, *, key=None):
+                    return stored
+
+    def test_unregister_codec(self):
+        """Test unregistering a codec."""
+
+        class TestCodec(Codec):
+            name = "test_unregister"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "int32"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        assert is_codec_registered("test_unregister")
+        unregister_codec("test_unregister")
+        assert not is_codec_registered("test_unregister")
+
+    def test_get_codec_not_found(self):
+        """Test that getting an unregistered codec raises error."""
+        with pytest.raises(DataJointError, match="Unknown codec"):
+            get_codec("nonexistent_codec")
+
+    def test_list_codecs(self):
+        """Test listing registered codecs."""
+
+        class TestCodec(Codec):
+            name = "test_list"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "int32"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        codecs = list_codecs()
+        assert "test_list" in codecs
+        assert codecs == sorted(codecs)  # Should be sorted
+
+    def test_get_codec_strips_brackets(self):
+        """Test that get_codec accepts names with or without angle brackets."""
+
+        class TestCodec(Codec):
+            name = "test_brackets"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "int32"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        assert get_codec("test_brackets") is get_codec("<test_brackets>")
+
+
+class TestCodecValidation:
+    """Tests for the validate method."""
+
+    def setup_method(self):
+        for name in list(_codec_registry.keys()):
+            if name.startswith("test_"):
+                del _codec_registry[name]
+
+    def teardown_method(self):
+        for name in list(_codec_registry.keys()):
+            if name.startswith("test_"):
+                del _codec_registry[name]
+
+    def test_validate_called_default(self):
+        """Test that default validate accepts any value."""
+
+        class TestCodec(Codec):
+            name = "test_validate_default"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "bytes"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        t = get_codec("test_validate_default")
+        # Default validate should not raise for any value
+        t.validate(None)
+        t.validate(42)
+        t.validate("string")
+        t.validate([1, 2, 3])
+
+    def test_validate_custom(self):
+        """Test custom validation logic."""
+
+        class PositiveIntCodec(Codec):
+            name = "test_positive_int"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "int32"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+            def validate(self, value):
+                if not isinstance(value, int):
+                    raise TypeError(f"Expected int, got {type(value).__name__}")
+                if value < 0:
+                    raise ValueError("Value must be positive")
+
+        t = get_codec("test_positive_int")
+        t.validate(42)  # Should pass
+
+        with pytest.raises(TypeError):
+            t.validate("not an int")
+
+        with pytest.raises(ValueError):
+            t.validate(-1)
+
+
+class TestCodecChaining:
+    """Tests for codec chaining (dtype referencing another codec)."""
+
+    def setup_method(self):
+        for name in list(_codec_registry.keys()):
+            if name.startswith("test_"):
+                del _codec_registry[name]
+
+    def teardown_method(self):
+        for name in list(_codec_registry.keys()):
+            if name.startswith("test_"):
+                del _codec_registry[name]
+
+    def test_resolve_native_dtype(self):
+        """Test resolving a native dtype."""
+        final_dtype, chain, store = resolve_dtype("bytes")
+        assert final_dtype == "bytes"
+        assert chain == []
+        assert store is None
+
+    def test_resolve_custom_dtype(self):
+        """Test resolving a custom dtype."""
+
+        class TestCodec(Codec):
+            name = "test_resolve"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "varchar(100)"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        final_dtype, chain, store = resolve_dtype("<test_resolve>")
+        assert final_dtype == "varchar(100)"
+        assert len(chain) == 1
+        assert chain[0].name == "test_resolve"
+        assert store is None
+
+    def test_resolve_chained_dtype(self):
+        """Test resolving a chained dtype."""
+
+        class InnerCodec(Codec):
+            name = "test_inner"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "bytes"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        class OuterCodec(Codec):
+            name = "test_outer"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<test_inner>"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        final_dtype, chain, store = resolve_dtype("<test_outer>")
+        assert final_dtype == "bytes"
+        assert len(chain) == 2
+        assert chain[0].name == "test_outer"
+        assert chain[1].name == "test_inner"
+        assert store is None
+
+    def test_circular_reference_detection(self):
+        """Test that circular codec references are detected."""
+
+        class CodecA(Codec):
+            name = "test_circular_a"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<test_circular_b>"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        class CodecB(Codec):
+            name = "test_circular_b"
+
+            def get_dtype(self, is_external: bool) -> str:
+                return "<test_circular_a>"
+
+            def encode(self, value, *, key=None, store_name=None):
+                return value
+
+            def decode(self, stored, *, key=None):
+                return stored
+
+        with pytest.raises(DataJointError, match="Circular codec reference"):
+            resolve_dtype("<test_circular_a>")
+
+
+class TestExportsAndAPI:
+    """Test that the public API is properly exported."""
+
+    def test_exports_from_datajoint(self):
+        """Test that Codec and helpers are exported from datajoint."""
+        assert hasattr(dj, "Codec")
+        assert hasattr(dj, "get_codec")
+        assert hasattr(dj, "list_codecs")
+
+
+class TestBlobCodec:
+    """Tests for the built-in BlobCodec."""
+
+    def test_blob_is_registered(self):
+        """Test that blob is automatically registered."""
+        assert is_codec_registered("blob")
+
+    def test_blob_properties(self):
+        """Test BlobCodec properties."""
+        blob_codec = get_codec("blob")
+        assert blob_codec.name == "blob"
+        assert blob_codec.get_dtype(is_external=False) == "bytes"
+        assert blob_codec.get_dtype(is_external=True) == "<hash>"
+
+    def test_blob_encode_decode_roundtrip(self):
+        """Test that encode/decode is a proper roundtrip."""
+        import numpy as np
+
+        blob_codec = get_codec("blob")
+
+        # Test with various data types
+        test_data = [
+            {"key": "value", "number": 42},
+            [1, 2, 3, 4, 5],
+            np.array([1.0, 2.0, 3.0]),
+            "simple string",
+            (1, 2, 3),
+            None,
+        ]
+
+        for original in test_data:
+            encoded = blob_codec.encode(original)
+            assert isinstance(encoded, bytes)
+            decoded = blob_codec.decode(encoded)
+            if isinstance(original, np.ndarray):
+                np.testing.assert_array_equal(decoded, original)
+            else:
+                assert decoded == original
+
+    def test_blob_encode_produces_valid_blob_format(self):
+        """Test that encoded data has valid blob protocol header."""
+        blob_codec = get_codec("blob")
+        encoded = blob_codec.encode({"test": "data"})
+
+        # Should start with compression prefix or protocol header
+        valid_prefixes = (b"ZL123\0", b"mYm\0", b"dj0\0")
+        assert any(encoded.startswith(p) for p in valid_prefixes)
+
+    def test_blob_in_list_codecs(self):
+        """Test that blob appears in list_codecs."""
+        codecs = list_codecs()
+        assert "blob" in codecs
+
+    def test_blob_handles_serialization(self):
+        """Test that BlobCodec handles serialization internally.
+
+        With the new design:
+        - Plain bytes columns store/return raw bytes (no serialization)
+        - <blob> handles pack/unpack in encode/decode
+        """
+        blob_codec = get_codec("blob")
+
+        # BlobCodec.encode() should produce packed bytes
+        data = {"key": "value"}
+        encoded = blob_codec.encode(data)
+        assert isinstance(encoded, bytes)
+
+        # BlobCodec.decode() should unpack back to original
+        decoded = blob_codec.decode(encoded)
+        assert decoded == data
diff --git a/tests/unit/test_settings.py b/tests/unit/test_settings.py
index d7122969..66d817f0 100644
--- a/tests/unit/test_settings.py
+++ b/tests/unit/test_settings.py
@@ -160,7 +160,9 @@ def test_attribute_access(self):
         # Host can be localhost or db (docker), just verify it's a string
         assert isinstance(dj.config.database.host, str)
         assert len(dj.config.database.host) > 0
-        assert dj.config.database.port == 3306
+        # Port may be 3306 (default) or a random port (testcontainers)
+        assert isinstance(dj.config.database.port, int)
+        assert 1 <= dj.config.database.port <= 65535
         # safemode may be modified by conftest fixtures
         assert isinstance(dj.config.safemode, bool)
 
@@ -169,7 +171,9 @@ def test_dict_style_access(self):
         # Host can be localhost or db (docker), just verify it's a string
         assert isinstance(dj.config["database.host"], str)
         assert len(dj.config["database.host"]) > 0
-        assert dj.config["database.port"] == 3306
+        # Port may be 3306 (default) or a random port (testcontainers)
+        assert isinstance(dj.config["database.port"], int)
+        assert 1 <= dj.config["database.port"] <= 65535
         # safemode may be modified by conftest fixtures
         assert isinstance(dj.config["safemode"], bool)