Source code for ExposoGraph.db_clients.ctd

"""CTD (Comparative Toxicogenomics Database) chemical-gene interaction client.

Queries the CTD public data via their batch query API to retrieve
chemical-gene interactions relevant to carcinogen metabolism.
"""

from __future__ import annotations

import csv
import io
import logging
from dataclasses import dataclass, field
from typing import Optional

logger = logging.getLogger(__name__)

_BASE_URL = "https://ctdbase.org/tools/batchQuery.go"


[docs] @dataclass class ChemicalGeneInteraction: """A single chemical-gene interaction from CTD.""" chemical_name: str chemical_id: str gene_symbol: str gene_id: str organism: str = "" interaction: str = "" pubmed_ids: list[str] = field(default_factory=list)
[docs] class CTDClient: """Client for querying CTD chemical-gene interactions. Parameters ---------- base_url: Override the CTD batch query URL (useful for testing). timeout: HTTP request timeout in seconds. """ def __init__( self, base_url: str = _BASE_URL, timeout: int = 60, ) -> None: try: import requests as _requests # noqa: F401 except ModuleNotFoundError as exc: # pragma: no cover raise RuntimeError( "The 'requests' package is required for CTD lookups. " "Install with: pip install ExposoGraph[db]" ) from exc self.base_url = base_url self.timeout = timeout
[docs] def get_chemical_gene_interactions( self, chemical_name: str, *, organism: str = "Homo sapiens", ) -> list[ChemicalGeneInteraction]: """Fetch chemical-gene interactions for a given chemical. Parameters ---------- chemical_name: Chemical name to query (e.g. ``"Benzo(a)pyrene"``). organism: Organism filter. Defaults to ``"Homo sapiens"``. """ import requests params = { "inputType": "chem", "inputTerms": chemical_name, "report": "cgixns", "format": "tsv", } resp = requests.get(self.base_url, params=params, timeout=self.timeout) resp.raise_for_status() return self._parse_interactions(resp.text, organism=organism)
def _parse_interactions( self, tsv_text: str, *, organism: str = "Homo sapiens", ) -> list[ChemicalGeneInteraction]: """Parse CTD TSV response into interaction objects.""" interactions: list[ChemicalGeneInteraction] = [] # Skip comment lines starting with # lines = [line for line in tsv_text.splitlines() if not line.startswith("#")] if not lines: return interactions reader = csv.reader(io.StringIO("\n".join(lines)), delimiter="\t") for row in reader: if len(row) < 6: continue row_organism = row[4].strip() if len(row) > 4 else "" if organism and row_organism != organism: continue pmids = row[7].split("|") if len(row) > 7 and row[7] else [] interactions.append( ChemicalGeneInteraction( chemical_name=row[0].strip(), chemical_id=row[1].strip(), gene_symbol=row[2].strip(), gene_id=row[3].strip(), organism=row_organism, interaction=row[5].strip() if len(row) > 5 else "", pubmed_ids=[p.strip() for p in pmids], ) ) return interactions
[docs] def get_gene_interactions( self, gene_symbol: str, *, organism: str = "Homo sapiens", ) -> list[ChemicalGeneInteraction]: """Fetch chemical-gene interactions for a given gene. Parameters ---------- gene_symbol: Gene symbol to query (e.g. ``"CYP1A1"``). organism: Organism filter. Defaults to ``"Homo sapiens"``. """ import requests params = { "inputType": "gene", "inputTerms": gene_symbol, "report": "cgixns", "format": "tsv", } resp = requests.get(self.base_url, params=params, timeout=self.timeout) resp.raise_for_status() return self._parse_interactions(resp.text, organism=organism)