Source code for ExposoGraph.db_clients.ctd
"""CTD (Comparative Toxicogenomics Database) chemical-gene interaction client.
Queries the CTD public data via their batch query API to retrieve
chemical-gene interactions relevant to carcinogen metabolism.
"""
from __future__ import annotations
import csv
import io
import logging
from dataclasses import dataclass, field
from typing import Optional
logger = logging.getLogger(__name__)
_BASE_URL = "https://ctdbase.org/tools/batchQuery.go"
[docs]
@dataclass
class ChemicalGeneInteraction:
"""A single chemical-gene interaction from CTD."""
chemical_name: str
chemical_id: str
gene_symbol: str
gene_id: str
organism: str = ""
interaction: str = ""
pubmed_ids: list[str] = field(default_factory=list)
[docs]
class CTDClient:
"""Client for querying CTD chemical-gene interactions.
Parameters
----------
base_url:
Override the CTD batch query URL (useful for testing).
timeout:
HTTP request timeout in seconds.
"""
def __init__(
self,
base_url: str = _BASE_URL,
timeout: int = 60,
) -> None:
try:
import requests as _requests # noqa: F401
except ModuleNotFoundError as exc: # pragma: no cover
raise RuntimeError(
"The 'requests' package is required for CTD lookups. "
"Install with: pip install ExposoGraph[db]"
) from exc
self.base_url = base_url
self.timeout = timeout
[docs]
def get_chemical_gene_interactions(
self,
chemical_name: str,
*,
organism: str = "Homo sapiens",
) -> list[ChemicalGeneInteraction]:
"""Fetch chemical-gene interactions for a given chemical.
Parameters
----------
chemical_name:
Chemical name to query (e.g. ``"Benzo(a)pyrene"``).
organism:
Organism filter. Defaults to ``"Homo sapiens"``.
"""
import requests
params = {
"inputType": "chem",
"inputTerms": chemical_name,
"report": "cgixns",
"format": "tsv",
}
resp = requests.get(self.base_url, params=params, timeout=self.timeout)
resp.raise_for_status()
return self._parse_interactions(resp.text, organism=organism)
def _parse_interactions(
self,
tsv_text: str,
*,
organism: str = "Homo sapiens",
) -> list[ChemicalGeneInteraction]:
"""Parse CTD TSV response into interaction objects."""
interactions: list[ChemicalGeneInteraction] = []
# Skip comment lines starting with #
lines = [line for line in tsv_text.splitlines() if not line.startswith("#")]
if not lines:
return interactions
reader = csv.reader(io.StringIO("\n".join(lines)), delimiter="\t")
for row in reader:
if len(row) < 6:
continue
row_organism = row[4].strip() if len(row) > 4 else ""
if organism and row_organism != organism:
continue
pmids = row[7].split("|") if len(row) > 7 and row[7] else []
interactions.append(
ChemicalGeneInteraction(
chemical_name=row[0].strip(),
chemical_id=row[1].strip(),
gene_symbol=row[2].strip(),
gene_id=row[3].strip(),
organism=row_organism,
interaction=row[5].strip() if len(row) > 5 else "",
pubmed_ids=[p.strip() for p in pmids],
)
)
return interactions
[docs]
def get_gene_interactions(
self,
gene_symbol: str,
*,
organism: str = "Homo sapiens",
) -> list[ChemicalGeneInteraction]:
"""Fetch chemical-gene interactions for a given gene.
Parameters
----------
gene_symbol:
Gene symbol to query (e.g. ``"CYP1A1"``).
organism:
Organism filter. Defaults to ``"Homo sapiens"``.
"""
import requests
params = {
"inputType": "gene",
"inputTerms": gene_symbol,
"report": "cgixns",
"format": "tsv",
}
resp = requests.get(self.base_url, params=params, timeout=self.timeout)
resp.raise_for_status()
return self._parse_interactions(resp.text, organism=organism)