Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ RUN --mount=type=cache,target=/opt/conda/pkgs,sharing=locked \
&& mamba install -n catapro_env -c conda-forge rdkit=2024.03.6 -y \
&& conda run -n catapro_env pip install -r docker-requirements/catapro_requirements.txt

# ── CatPred ───────────────────────────────────────────────────────────────────
FROM base AS env-catpred
COPY docker-requirements/catpred_requirements.txt ./docker-requirements/
RUN --mount=type=cache,target=/opt/conda/pkgs,sharing=locked \
--mount=type=cache,id=webkinpred-pip-py310,target=/root/.cache/pip,sharing=locked \
mamba create -n catpred_env python=3.10.15 -c conda-forge -y \
&& mamba install -n catpred_env -c conda-forge rdkit=2024.03.6 -y \
&& conda run -n catpred_env pip install -r docker-requirements/catpred_requirements.txt

# ── pseq2sites ────────────────────────────────────────────────────────────────
FROM base AS env-pseq2sites
RUN --mount=type=cache,target=/opt/conda/pkgs,sharing=locked \
Expand Down Expand Up @@ -155,6 +164,7 @@ COPY --from=env-eitlem /opt/conda/envs/eitlem_env /opt/conda/envs/eitlem_en
COPY --from=env-turnup /opt/conda/envs/turnup_env /opt/conda/envs/turnup_env
COPY --from=env-unikp /opt/conda/envs/unikp /opt/conda/envs/unikp
COPY --from=env-catapro /opt/conda/envs/catapro_env /opt/conda/envs/catapro_env
COPY --from=env-catpred /opt/conda/envs/catpred_env /opt/conda/envs/catpred_env
COPY --from=env-pseq2sites /opt/conda/envs/pseq2sites /opt/conda/envs/pseq2sites
COPY --from=env-esm /opt/conda/envs/esm /opt/conda/envs/esm
COPY --from=env-esmc /opt/conda/envs/esmc /opt/conda/envs/esmc
Expand All @@ -174,6 +184,7 @@ RUN find /opt/conda -name "*.pyc" -delete \
COPY . .

RUN mkdir -p /app/models/EITLEM/Weights \
/app/models/CatPred \
/app/models/TurNup/data/saved_models \
/app/models/UniKP-main/models \
/app/media/sequence_info \
Expand Down
6 changes: 4 additions & 2 deletions api/embeddings/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@
"implemented": True,
"conda_env": "esm",
"python_path_key": "esm2", # key in config PYTHON_PATHS
"used_by": ["KinForm-H", "KinForm-L"],
"used_by": ["KinForm-H", "KinForm-L", "CatPred"],
"notes": (
"Invoked as a subprocess by KinForm. The python path is passed via "
"the KINFORM_ESM_PATH environment variable. Multi-layer embeddings "
"are extracted in a single model-load pass."
"are extracted in a single model-load pass. CatPred uses per-residue "
"ESM2 features, so it bridges into a method-specific cache format "
"rather than reusing the shared mean-vector cache directly."
),
},

Expand Down
50 changes: 50 additions & 0 deletions api/methods/catpred.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# api/methods/catpred.py
#
# Method descriptor for CatPred.

from api.methods.base import MethodDescriptor, SubprocessEngineConfig


descriptor = MethodDescriptor(
key="CatPred",
display_name="CatPred",
authors=(
"Veda Sheersh Boorla, Somtirtha Santra, Costas D. Maranas"
),
publication_title=(
"CatPred: A comprehensive framework for deep learning in vitro enzyme kinetic parameters"
),
citation_url="https://www.nature.com/articles/s41467-025-57215-9",
repo_url="https://github.com/maranasgroup/CatPred",
more_info=(
"CatPred currently integrates kcat and Km through a local CPU inference "
"adapter. Ki remains outside the current webKinPred target model."
),

supports=["kcat", "Km"],
input_format="single",
output_cols={
"kcat": "kcat (1/s)",
"Km": "KM (mM)",
},
max_seq_len=2048,
col_to_kwarg={"Substrate": "substrates"},
target_kwargs={
"kcat": {"kinetics_type": "KCAT"},
"Km": {"kinetics_type": "KM"},
},
subprocess=SubprocessEngineConfig(
python_path_key="CatPred",
script_key="CatPred",
data_path_env={
"CATPRED_REPO_ROOT": "CatPred",
"CATPRED_MEDIA_PATH": "media",
"CATPRED_TOOLS_PATH": "tools",
"PYTHONPATH": "CatPred",
},
extra_env={
"PROTEIN_EMBED_USE_CPU": "1",
},
),
embeddings_used=["esm2"],
)
1 change: 1 addition & 0 deletions docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ services:
- ./models/DLKcat/DeeplearningApproach:/app/models/DLKcat/DeeplearningApproach:ro
- ./models/KinForm/results/trained_models:/app/models/KinForm/results/trained_models:ro
- ./models/CataPro:/app/models/CataPro:ro
- ./models/CatPred:/app/models/CatPred:ro
environment:
<<: *common-env
depends_on:
Expand Down
17 changes: 17 additions & 0 deletions docker-requirements/catpred_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Requirements for CatPred (Python 3.10)
torch==2.4.1
fair-esm==2.0.0
transformers==4.47.1
sentencepiece==0.2.0
numpy==1.26.4
pandas==2.2.3
pandas-flavor==0.6.0
scikit-learn==1.5.2
scipy==1.14.1
tqdm==4.67.1
typed-argument-parser==1.10.1
descriptastorus==2.8.0
rotary-embedding-torch==0.6.5
hyperopt==0.2.7
matplotlib==3.9.2
tensorboardX==2.6.2.2
20 changes: 20 additions & 0 deletions models/CatPred/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# CatPred

This directory is reserved for the CatPred integration.

Expected contents:
- a checkout of the CatPred repository rooted at `models/CatPred/`
- the adapter entrypoint at `models/CatPred/catpred/integration/webkinpred_adapter.py`

Local development can instead point webKinPred at an external CatPred checkout:

```bash
export WEBKINPRED_CATPRED_ROOT="/absolute/path/to/CatPred"
export WEBKINPRED_CATPRED_PYTHON="/absolute/path/to/CatPred/.venv/bin/python"
```

Docker/runtime notes:
- `webKinPred/config_docker.py` defaults to `/app/models/CatPred`
- the CatPred subprocess descriptor sets `CATPRED_REPO_ROOT`, `CATPRED_MEDIA_PATH`, and `CATPRED_TOOLS_PATH`
- CatPred kcat/Km use per-residue ESM2 features and cache them under
`media/sequence_info/esm2_last/per_residue/{seq_id}.pt`
2 changes: 2 additions & 0 deletions webKinPred/config_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@


_DATA_PATH_REL = {
"CatPred": "models/CatPred",
"DLKcat": "models/DLKcat/DeeplearningApproach/Data",
"DLKcat_Results": "models/DLKcat/DeeplearningApproach/Results",
"EITLEM": "models/EITLEM",
Expand All @@ -23,6 +24,7 @@


_PREDICTION_SCRIPT_REL = {
"CatPred": "models/CatPred/catpred/integration/webkinpred_adapter.py",
"DLKcat": "models/DLKcat/DeeplearningApproach/Code/example/prediction_for_input.py",
"EITLEM": "models/EITLEM/Code/eitlem_prediction_script_batch.py",
"TurNup": "models/TurNup/code/kcat_prediction_batch.py",
Expand Down
8 changes: 8 additions & 0 deletions webKinPred/config_docker.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

from webKinPred.config_base import (
DEFAULT_ALLOWED_FRONTEND_IPS,
SERVER_LIMIT,
Expand All @@ -13,6 +15,7 @@
FASTAS_DIR = f"{BASE_PATH}/fastas"

PYTHON_PATHS = {
"CatPred": "/opt/conda/envs/catpred_env/bin/python",
"DLKcat": "/opt/conda/envs/dlkcat_env/bin/python",
"EITLEM": "/opt/conda/envs/eitlem_env/bin/python",
"TurNup": "/opt/conda/envs/turnup_env/bin/python",
Expand All @@ -28,6 +31,11 @@
DATA_PATHS = build_data_paths(BASE_PATH)
PREDICTION_SCRIPTS = build_prediction_scripts(BASE_PATH)

CATPRED_ROOT = os.environ.get("WEBKINPRED_CATPRED_ROOT")
if CATPRED_ROOT:
DATA_PATHS["CatPred"] = CATPRED_ROOT
PREDICTION_SCRIPTS["CatPred"] = f"{CATPRED_ROOT}/catpred/integration/webkinpred_adapter.py"

SIMILARITY_DATASETS = build_similarity_datasets(FASTAS_DIR)
TARGET_DBS = {label: item["target_db"] for label, item in SIMILARITY_DATASETS.items()}

Expand Down
11 changes: 11 additions & 0 deletions webKinPred/config_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def _env_python(env_name: str) -> str:


PYTHON_PATHS = {
"CatPred": _env_python("catpred_env"),
"DLKcat": _env_python("dlkcat_env"),
"EITLEM": _env_python("eitlem_env"),
"TurNup": _env_python("turnup_env"),
Expand All @@ -41,6 +42,16 @@ def _env_python(env_name: str) -> str:
DATA_PATHS = build_data_paths(BASE_PATH)
PREDICTION_SCRIPTS = build_prediction_scripts(BASE_PATH)

CATPRED_ROOT = os.environ.get("WEBKINPRED_CATPRED_ROOT")
if CATPRED_ROOT:
catpred_root = str(Path(CATPRED_ROOT).resolve())
DATA_PATHS["CatPred"] = catpred_root
PREDICTION_SCRIPTS["CatPred"] = str(Path(catpred_root) / "catpred" / "integration" / "webkinpred_adapter.py")
PYTHON_PATHS["CatPred"] = os.environ.get(
"WEBKINPRED_CATPRED_PYTHON",
_env_python("catpred_env"),
)

SIMILARITY_DATASETS = build_similarity_datasets(FASTAS_DIR)
TARGET_DBS = {label: item["target_db"] for label, item in SIMILARITY_DATASETS.items()}

Expand Down