Skip to content

Commit 78434ff

Browse files
authored
Implement readers for compressed lists objects (#68)
1 parent ad74bdf commit 78434ff

25 files changed

+360
-35
lines changed

.github/workflows/run-tests.yml

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,74 @@ name: Test the library
33
on:
44
push:
55
branches:
6-
- master
6+
- master # for legacy repos
7+
- main
78
pull_request:
9+
branches:
10+
- master # for legacy repos
11+
- main
12+
workflow_dispatch: # Allow manually triggering the workflow
13+
schedule:
14+
# Run roughly every 15 days at 00:00 UTC
15+
# (useful to check if updates on dependencies break the package)
16+
- cron: "0 0 1,16 * *"
17+
18+
permissions:
19+
contents: read
20+
21+
concurrency:
22+
group: >-
23+
${{ github.workflow }}-${{ github.ref_type }}-
24+
${{ github.event.pull_request.number || github.sha }}
25+
cancel-in-progress: true
826

927
jobs:
1028
test:
11-
runs-on: ubuntu-latest
1229
strategy:
1330
matrix:
14-
python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ]
15-
16-
name: Python ${{ matrix.python-version }}
31+
python: ["3.10", "3.11", "3.12", "3.13", "3.14"]
32+
platform:
33+
- ubuntu-latest
34+
- macos-latest
35+
# - windows-latest
36+
runs-on: ${{ matrix.platform }}
37+
name: Python ${{ matrix.python }}, ${{ matrix.platform }}
1738
steps:
1839
- uses: actions/checkout@v4
19-
with:
20-
submodules: true
2140

22-
- name: Setup Python
23-
uses: actions/setup-python@v5
41+
- uses: actions/setup-python@v5
42+
id: setup-python
2443
with:
25-
python-version: ${{ matrix.python-version }}
26-
cache: 'pip'
44+
python-version: ${{ matrix.python }}
2745

2846
- name: Get latest CMake
2947
uses: lukka/get-cmake@latest
3048

31-
- name: Test with tox
49+
- name: Install dependencies
3250
run: |
33-
pip install tox
51+
python -m pip install --upgrade pip
52+
pip install tox coverage
53+
54+
- name: Run tests
55+
run: >-
56+
pipx run --python '${{ steps.setup-python.outputs.python-path }}'
3457
tox
58+
-- -rFEx --durations 10 --color yes --cov --cov-branch --cov-report=xml # pytest args
59+
60+
- name: Check for codecov token availability
61+
id: codecov-check
62+
shell: bash
63+
run: |
64+
if [ ${{ secrets.CODECOV_TOKEN }} != '' ]; then
65+
echo "codecov=true" >> $GITHUB_OUTPUT;
66+
else
67+
echo "codecov=false" >> $GITHUB_OUTPUT;
68+
fi
69+
70+
- name: Upload coverage reports to Codecov with GitHub Action
71+
uses: codecov/codecov-action@v5
72+
if: ${{ steps.codecov-check.outputs.codecov == 'true' }}
73+
env:
74+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
75+
slug: ${{ github.repository }}
76+
flags: ${{ matrix.platform }} - py${{ matrix.python }}

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## Version 0.8.0
4+
5+
- Implement parsers for compressed list objects.
6+
37
## Version 0.7.0 - 0.7.3
48

59
- All dependencies are now listed under optional, except for numpy and biocutils.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ version_scheme = "no-guess-dev"
1010
[tool.ruff]
1111
line-length = 120
1212
src = ["src"]
13-
exclude = ["tests"]
13+
# exclude = ["tests"]
1414
extend-ignore = ["F821"]
1515

1616
[tool.ruff.pydocstyle]

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ optional =
7070
summarizedexperiment>=0.4.1
7171
singlecellexperiment>=0.4.1
7272
multiassayexperiment
73+
compressed_lists>=0.3.0
7374

7475
# Add here test requirements (semicolon/line-separated)
7576
testing =

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ def run(self):
2828

2929
def build_cmake(self, ext):
3030
build_temp = pathlib.Path(self.build_temp)
31+
build_temp.mkdir(parents=True, exist_ok=True)
3132
build_lib = pathlib.Path(self.build_lib)
3233
outpath = os.path.join(build_lib.absolute(), ext.name)
3334

35+
build_temp = os.path.join(build_temp, "build")
3436
if not os.path.exists(build_temp):
3537
cmd = [
3638
"cmake",

src/rds2py/generics.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@
6060
"ExperimentList": "rds2py.read_dict.read_dict",
6161
# delayed matrices
6262
"H5SparseMatrix": "rds2py.read_delayed_matrix.read_hdf5_sparse",
63+
# compressed lists
64+
"CompressedIntegerList": "rds2py.read_compressed_list.read_compressed_integer_list",
65+
"PartitioningByEnd": "rds2py.read_compressed_list.read_partitioning_by_end",
66+
"CompressedCharacterList": "rds2py.read_compressed_list.read_compressed_string_list",
67+
"CompressedLogicalList": "rds2py.read_compressed_list.read_compressed_boolean_list",
68+
"CompressedNumericList": "rds2py.read_compressed_list.read_compressed_float_list",
69+
"CompressedSplitDataFrameList": "rds2py.read_compressed_list.read_compressed_frame_list",
70+
"CompressedSplitDFrameList": "rds2py.read_compressed_list.read_compressed_frame_list",
6371
}
6472

6573

src/rds2py/read_compressed_list.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
"""Functions and classes for parsing Compressed List data structures."""
2+
3+
import numpy as np
4+
5+
from .generics import _dispatcher
6+
from .rdsutils import get_class
7+
8+
__author__ = "jkanche"
9+
__copyright__ = "jkanche"
10+
__license__ = "MIT"
11+
12+
13+
def read_partitioning_by_end(robject: dict, **kwargs):
14+
"""Read an partioning by end object.
15+
16+
Args:
17+
robject:
18+
Dictionary containing parsed partioning by end object.
19+
20+
**kwargs:
21+
Additional arguments.
22+
23+
Returns:
24+
A vector containing the partitions.
25+
"""
26+
_cls = get_class(robject)
27+
28+
if _cls not in ["PartitioningByEnd"]:
29+
raise RuntimeError(f"`robject` does not contain not a `PartitioningByEnd` object, contains `{_cls}`.")
30+
31+
ends = _dispatcher(robject["attributes"]["end"], **kwargs)
32+
33+
from compressed_lists import Partitioning
34+
35+
return Partitioning(ends=np.asarray(ends))
36+
37+
38+
def _get_compressed_common_attrs(robject, **kwargs):
39+
if "unlistData" not in robject["attributes"]:
40+
raise ValueError("Object does not contain unlistData, is it really a `CompressedList`?")
41+
unlist_data = _dispatcher(robject["attributes"]["unlistData"], **kwargs)
42+
43+
element_metadata = None
44+
if "elementMetadata" in robject["attributes"]:
45+
element_metadata = _dispatcher(robject["attributes"]["elementMetadata"], **kwargs)
46+
47+
metadata = None
48+
if "metadata" in robject["attributes"]:
49+
metadata = _dispatcher(robject["attributes"]["metadata"], **kwargs)
50+
51+
partition = None
52+
if "partitioning" in robject["attributes"]:
53+
partition = _dispatcher(robject["attributes"]["partitioning"], **kwargs)
54+
55+
return unlist_data, element_metadata, metadata, partition
56+
57+
58+
def read_compressed_integer_list(robject: dict, **kwargs):
59+
"""Read an R compressed integer list.
60+
61+
Args:
62+
robject:
63+
Dictionary containing parsed compressed list.
64+
65+
**kwargs:
66+
Additional arguments.
67+
68+
Returns:
69+
A `CompressedList` from the 'compressed_lists' package.
70+
"""
71+
_cls = get_class(robject)
72+
73+
if _cls not in ["CompressedIntegerList"]:
74+
raise RuntimeError(f"`robject` does not contain not a compressed integer list object, contains `{_cls}`.")
75+
76+
unlist_data, element_metadata, metadata, partition = _get_compressed_common_attrs(robject=robject, **kwargs)
77+
78+
from compressed_lists import CompressedIntegerList
79+
80+
return CompressedIntegerList(
81+
unlist_data=unlist_data, partitioning=partition, element_metadata=element_metadata, metadata=metadata
82+
)
83+
84+
85+
def read_compressed_string_list(robject: dict, **kwargs):
86+
"""Read an R compressed string/character list.
87+
88+
Args:
89+
robject:
90+
Dictionary containing parsed compressed list.
91+
92+
**kwargs:
93+
Additional arguments.
94+
95+
Returns:
96+
A `CompressedList` from the 'compressed_lists' package.
97+
"""
98+
_cls = get_class(robject)
99+
100+
if _cls not in ["CompressedCharacterList"]:
101+
raise RuntimeError(f"`robject` does not contain not a compressed string list object, contains `{_cls}`.")
102+
103+
unlist_data, element_metadata, metadata, partition = _get_compressed_common_attrs(robject=robject, **kwargs)
104+
105+
from compressed_lists import CompressedCharacterList
106+
107+
return CompressedCharacterList(
108+
unlist_data=unlist_data, partitioning=partition, element_metadata=element_metadata, metadata=metadata
109+
)
110+
111+
112+
def read_compressed_character_list(robject: dict, **kwargs):
113+
"""Read an R compressed string/character list.
114+
115+
Args:
116+
robject:
117+
Dictionary containing parsed compressed string list.
118+
119+
**kwargs:
120+
Additional arguments.
121+
122+
Returns:
123+
A `CompressedList` from the 'compressed_lists' package.
124+
"""
125+
return read_compressed_string_list(robject, **kwargs)
126+
127+
128+
def read_compressed_boolean_list(robject: dict, **kwargs):
129+
"""Read an R compressed boolean list.
130+
131+
Args:
132+
robject:
133+
Dictionary containing parsed compressed list.
134+
135+
**kwargs:
136+
Additional arguments.
137+
138+
Returns:
139+
A `CompressedList` from the 'compressed_lists' package.
140+
"""
141+
_cls = get_class(robject)
142+
143+
if _cls not in ["CompressedLogicalList"]:
144+
raise RuntimeError(f"`robject` does not contain not a compressed boolean list object, contains `{_cls}`.")
145+
146+
unlist_data, element_metadata, metadata, partition = _get_compressed_common_attrs(robject=robject, **kwargs)
147+
148+
from compressed_lists import CompressedBooleanList
149+
150+
return CompressedBooleanList(
151+
unlist_data=unlist_data, partitioning=partition, element_metadata=element_metadata, metadata=metadata
152+
)
153+
154+
155+
def read_compressed_float_list(robject: dict, **kwargs):
156+
"""Read an R compressed float list.
157+
158+
Args:
159+
robject:
160+
Dictionary containing parsed compressed list.
161+
162+
**kwargs:
163+
Additional arguments.
164+
165+
Returns:
166+
A `CompressedList` from the 'compressed_lists' package.
167+
"""
168+
_cls = get_class(robject)
169+
170+
if _cls not in ["CompressedNumericList"]:
171+
raise RuntimeError(f"`robject` does not contain not a compressed float list object, contains `{_cls}`.")
172+
173+
unlist_data, element_metadata, metadata, partition = _get_compressed_common_attrs(robject=robject, **kwargs)
174+
175+
from compressed_lists import CompressedFloatList
176+
177+
return CompressedFloatList(
178+
unlist_data=unlist_data, partitioning=partition, element_metadata=element_metadata, metadata=metadata
179+
)
180+
181+
182+
def read_compressed_frame_list(robject: dict, **kwargs):
183+
"""Read an R compressed dataframe list.
184+
185+
Args:
186+
robject:
187+
Dictionary containing parsed compressed list.
188+
189+
**kwargs:
190+
Additional arguments.
191+
192+
Returns:
193+
A `CompressedList` from the 'compressed_lists' package.
194+
"""
195+
_cls = get_class(robject)
196+
197+
if _cls not in ["CompressedSplitDataFrameList", "CompressedSplitDFrameList"]:
198+
raise RuntimeError(f"`robject` does not contain not a compressed dataframe list object, contains `{_cls}`.")
199+
200+
unlist_data, element_metadata, metadata, partition = _get_compressed_common_attrs(robject=robject, **kwargs)
201+
202+
from compressed_lists import CompressedSplitBiocFrameList
203+
204+
return CompressedSplitBiocFrameList(
205+
unlist_data=unlist_data, partitioning=partition, element_metadata=element_metadata, metadata=metadata
206+
)

tests/data/compressedlist_char.rds

284 Bytes
Binary file not shown.

tests/data/compressedlist_int.rds

272 Bytes
Binary file not shown.
260 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)