Skip to content

Mondo Ontology not downloading #503

@kmanpearl

Description

@kmanpearl

Not sure if this is an issue caused by the issues we were having with Mondo having terms with a missing field or if it's a new problem.

code:

from obnb.data import DisGeNET
disease_labels = DisGeNET(root='data')

output:

---------------------------------------------------------------------------
BadGzipFile                               Traceback (most recent call last)
Cell In[9], line 1
----> 1 disease_labels = DisGeNET(root='data')

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/disgenet.py:33, in DisGeNET.__init__(self, root, dsi_min, dsi_max, dpi_min, dpi_max, min_size, max_size, overlap, jaccard, data_sources, gene_id_converter, **kwargs)
     30 self.jaccard = jaccard
     31 self.overlap = overlap
---> 33 super().__init__(
     34     root,
     35     annotation_factory=DisGeNETAnnotation,
     36     ontology_factory=MondoDiseaseOntology,
     37     annotation_kwargs={
     38         "data_sources": data_sources,
     39         "dsi_min": dsi_min,
     40         "dsi_max": dsi_max,
     41         "dpi_min": dpi_min,
     42         "dpi_max": dpi_max,
     43         "gene_id_converter": gene_id_converter,
     44     },
     45     ontology_kwargs={"xref_prefix": "UMLS"},
     46     **kwargs,
     47 )

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/base.py:30, in BaseAnnotatedOntologyData.__init__(self, root, annotation_factory, ontology_factory, annotation_kwargs, ontology_kwargs, **kwargs)
     28 self.annotation_kwargs = annotation_kwargs
     29 self.ontology_kwargs = ontology_kwargs
---> 30 super().__init__(root, **kwargs)

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:96, in BaseData.__init__(self, root, version, redownload, reprocess, retransform, log_level, pre_transform, transform, cache_transform, download_cache, gene_id_converter, **kwargs)
     94     with log_file_context(self.plogger, self.info_log_path):
     95         self._download()
---> 96         self._process()
     97 else:
     98     self._download_archive()

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:274, in BaseData._process(self)
    272 # Process data
    273 self.plogger.info(f"Start processing {self.classname}...")
--> 274 self.process()
    276 # Pre-transform data
    277 if self.pre_transform is not None:

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/base.py:53, in BaseAnnotatedOntologyData.process(self)
     45 # NOTE: Reprocess is not a valid option for annotation and ontology
     46 # data objects as we do not save the processed data. Similarly,
     47 # retransform is invalid as there is not transformation for them yet.
     48 opts = {
     49     "redownload": self.redownload,
     50     "version": self.version,
     51     "log_level": self.log_level,
     52 }
---> 53 ann = self.annotation_factory(self.root, **self.annotation_kwargs, **opts)
     54 ont = self.ontology_factory(self.root, **self.ontology_kwargs, **opts)
     56 annot = ann.data

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/disgenet.py:89, in DisGeNETAnnotation.__init__(self, root, data_sources, dsi_min, dsi_max, dpi_min, dpi_max, **kwargs)
     87 self.dpi_min = dpi_min
     88 self.dpi_max = dpi_max
---> 89 super().__init__(root, **kwargs)

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/base.py:14, in BaseAnnotationData.__init__(self, root, **kwargs)
     12 def __init__(self, root: str, **kwargs):
     13     """Initialize BaseAnnotationData."""
---> 14     super().__init__(root, **kwargs)

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:95, in BaseData.__init__(self, root, version, redownload, reprocess, retransform, log_level, pre_transform, transform, cache_transform, download_cache, gene_id_converter, **kwargs)
     93 if version == "latest":
     94     with log_file_context(self.plogger, self.info_log_path):
---> 95         self._download()
     96         self._process()
     97 else:

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:260, in BaseData._download(self)
    258 if self.redownload or not self.download_completed():
    259     self.plogger.info(f"Start downloading {self.classname}...")
--> 260     self.download()

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/base.py:30, in BaseAnnotationData.download(self)
     23 """Download raw annotation table.
     24 
     25 Note:
     26     The raw file is assumed to be gzipped.
     27 
     28 """
     29 self.plogger.info(f"Download annotation from: {self.annotation_url}")
---> 30 download_unzip(
     31     self.annotation_url,
     32     self.raw_dir,
     33     zip_type=self.annotation_file_zip_type,
     34     rename=self.raw_files[0],
     35     logger=self.plogger,
     36 )

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/util/download.py:115, in download_unzip(url, root, zip_type, rename, logger)
    113 elif zip_type == "gzip":
    114     with open(path := osp.join(root, filename), "wb") as f:
--> 115         f.write(gzip.decompress(content))
    116     logger.info(f"File saved to {path!r}")
    117 elif zip_type == "none":

File ~/miniconda3/envs/study_bias/lib/python3.12/gzip.py:627, in decompress(data)
    625 while True:
    626     fp = io.BytesIO(data)
--> 627     if _read_gzip_header(fp) is None:
    628         return b"".join(decompressed_members)
    629     # Use a zlib raw deflate compressor

File ~/miniconda3/envs/study_bias/lib/python3.12/gzip.py:456, in _read_gzip_header(fp)
    453     return None
    455 if magic != b'\037\213':
--> 456     raise BadGzipFile('Not a gzipped file (%r)' % magic)
    458 (method, flag, last_mtime) = struct.unpack("<BBIxx", _read_exact(fp, 8))
    459 if method != 8:

BadGzipFile: Not a gzipped file (b'<!')

Other potentially helpful info:

contents of data/DisGeNET/info/run.log:

[INFO][2025-02-03 11:53:54,869][base][_process] Start processing DisGeNET...
[INFO][2025-02-03 11:53:54,872][base][_download] Start downloading DisGeNETAnnotation...
[INFO][2025-02-03 11:53:54,872][base][download] Download annotation from: https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:54,873][download][download_unzip] Downloading zip archive from https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:56,125][download][download_unzip] Download completed, start unpacking...

contents of data/DisGeNETAnnotation/info:

[INFO][2025-02-03 11:53:54,872][base][_download] Start downloading DisGeNETAnnotation...
[INFO][2025-02-03 11:53:54,872][base][download] Download annotation from: https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:54,873][download][download_unzip] Downloading zip archive from https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:56,125][download][download_unzip] Download completed, start unpacking...

data/DisGeNETAnnotation/raw/all_gene_disease_associations.tsv file exits. data/DisGeNET/raw/ is empty

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions