"""Reusable seeded example graphs for ExposoGraph."""
from __future__ import annotations
from collections import Counter
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
from .config import GraphMode, GraphVisibility
from .db_clients.iarc import IARCClassifier
from .engine import GraphEngine
from .exporter import (
parse_graph_data_js,
to_graph_data_js,
to_interactive_html,
to_json,
to_plotly_html,
)
from .grounding import ground_node
from .models import (
Edge,
EdgeType,
KnowledgeGraph,
MatchStatus,
Node,
NodeType,
ProvenanceRecord,
)
from .reference_data import ACTIVITY_SCORES, REFERENCE_KEGG_PATHWAYS, build_full_panel
Spec = dict[str, str]
[docs]
@dataclass(frozen=True)
class ArchitectureInventoryGroup:
name: str
members: tuple[str, ...]
count: int
[docs]
@dataclass(frozen=True)
class ArchitectureSummary:
node_count: int
edge_count: int
node_type_count: int
edge_type_count: int
node_type_counts: dict[str, int]
edge_type_counts: dict[str, int]
carcinogens: tuple[str, ...]
enzymes: tuple[str, ...]
metabolites: tuple[str, ...]
dna_adducts: tuple[str, ...]
pathway_labels: tuple[str, ...]
carcinogen_classes: tuple[ArchitectureInventoryGroup, ...]
enzyme_categories: tuple[ArchitectureInventoryGroup, ...]
_SHOWCASE_NAMESPACE = "curated_showcase"
_CURATED_SOURCE = "Curated showcase"
_DEFAULT_BUNDLE_PATH = Path("ExposoGraph/temp_kg/graph-data.js")
_FULL_LEGENDS_REFERENCE_GRAPH = Path(__file__).resolve().parent / "reference_graphs" / "full_legends_graph.js"
_IARC = IARCClassifier()
_FULL_LEGENDS_ID_REMAP: dict[str, str] = {
"8oxodG": "Oxo_dG",
"AFB1_epox": "AFB1_epoxide",
"AFB1_N7G": "AFB1_Gua",
"BENZ": "Benzene",
"BNZ": "Benzidine",
"BaP_78diol": "BaP_diol",
"BaP_78epox": "BaP_epoxide",
"BenzO": "Benzene_oxide",
"BQ": "Benzoquinone",
"CEO": "Chloroethylene_oxide",
"EO": "EthyleneOxide",
"TESTO": "Testosterone",
"VCM": "VinylChloride",
"NOHPHIP": "NOH_PhIP",
"NOH4ABP": "NOH_4ABP",
"NOHBNZ": "NOH_Benzidine",
"4OHE2": "HydroxyE2",
"A4": "Androstenedione",
"6bOHT": "HydroxyTestosterone",
"T_gluc": "Testosterone_gluc",
"NDMA_alpha": "NDMA_hydroxyl",
"NNK_alpha": "NNK_hydroxyl",
"O6MeG": "O6_methyl_dG",
"KEGG980": "hsa00980",
"KEGG5208": "hsa05208",
"KEGG5204": "hsa05204",
"KEGG_ANDRO": "hsa00140",
}
_PHASE_I_HORMONE_GENES = ("CYP17A1", "SRD5A1", "SRD5A2", "CYP19A1", "AKR1C3")
_PHASE_II_HORMONE_GENES = ("COMT", "SULT1E1", "UGT2B17", "UGT2B7")
_CARCINOGEN_CLASS_TITLES: tuple[tuple[str, str], ...] = (
("PAH", "PAH"),
("HCA", "HCA"),
("Aromatic_Amine", "Aromatic Amines"),
("Nitrosamine", "Nitrosamines"),
("Mycotoxin", "Mycotoxins"),
("Estrogen", "Estrogens"),
("Androgen", "Androgens"),
("Solvent", "Solvents"),
("Alkylating", "Alkylating Agents"),
)
def _entity_spec(
node_id: str,
label: str,
detail: str,
*,
group: str | None = None,
reactivity: str | None = None,
) -> Spec:
spec = {
"id": node_id,
"label": label,
"detail": detail,
}
if group is not None:
spec["group"] = group
if reactivity is not None:
spec["reactivity"] = reactivity
return spec
def _relation_spec(
source: str,
target: str,
edge_type: str,
carcinogen: str | None,
label: str,
*,
custom_predicate: str | None = None,
) -> Spec:
spec = {
"source": source,
"target": target,
"type": edge_type,
"label": label,
}
if carcinogen is not None:
spec["carcinogen"] = carcinogen
if custom_predicate is not None:
spec["custom_predicate"] = custom_predicate
return spec
_CARCINOGEN_SPECS: tuple[Spec, ...] = (
_entity_spec(
"BaP",
"Benzo[a]pyrene",
"Reference PAH carcinogen used for the canonical activation-to-adduct chain.",
group="PAH",
),
_entity_spec(
"DMBA",
"DMBA",
(
"Reference PAH-like polycyclic carcinogen included to mirror the "
"manuscript class example."
),
group="PAH",
),
_entity_spec(
"PhIP",
"PhIP",
"Representative heterocyclic amine produced during high-temperature cooking.",
group="HCA",
),
_entity_spec(
"MeIQx",
"MeIQx",
(
"Representative heterocyclic amine activated through "
"N-hydroxylation and esterification."
),
group="HCA",
),
_entity_spec(
"4ABP",
"4-Aminobiphenyl",
(
"Representative aromatic amine requiring N-hydroxylation and "
"acetyl-transfer activation."
),
group="Aromatic_Amine",
),
_entity_spec(
"Benzidine",
"Benzidine",
"Representative aromatic amine included for class-level alignment.",
group="Aromatic_Amine",
),
_entity_spec(
"NNK",
"NNK",
(
"Tobacco-specific nitrosamine represented as a "
"pyridyloxobutylating carcinogen context."
),
group="Nitrosamine",
),
_entity_spec(
"NDMA",
"NDMA",
"Small-molecule nitrosamine represented via methylating chemistry.",
group="Nitrosamine",
),
_entity_spec(
"AFB1",
"Aflatoxin B1",
(
"Mycotoxin represented through CYP-mediated epoxidation and "
"guanine adduct formation."
),
group="Mycotoxin",
),
_entity_spec(
"E2",
"17beta-Estradiol",
"Endogenous estrogen represented in the hormone-metabolism reference layer.",
group="Estrogen",
),
_entity_spec(
"Testosterone",
"Testosterone",
(
"Androgen precursor node used to anchor the steroid-hormone "
"metabolism branch."
),
group="Androgen",
),
_entity_spec(
"DHT",
"5a-DHT",
"Dihydrotestosterone node included to mirror the manuscript example.",
group="Androgen",
),
_entity_spec(
"Benzene",
"Benzene",
(
"Volatile solvent carcinogen represented through CYP2E1 oxidation "
"and ROS-linked damage."
),
group="Solvent",
),
_entity_spec(
"VinylChloride",
"Vinyl Chloride",
"Solvent carcinogen represented through etheno-adduct chemistry.",
group="Solvent",
),
_entity_spec(
"EthyleneOxide",
"Ethylene Oxide",
"Alkylating carcinogen represented through hydroxyethyl adduct formation.",
group="Alkylating",
),
)
_METABOLITE_SPECS: tuple[Spec, ...] = (
_entity_spec(
"BaP_epoxide",
"BaP-7,8-epoxide",
"Initial PAH epoxide intermediate formed during BaP activation.",
reactivity="Intermediate",
),
_entity_spec(
"BaP_diol",
"BaP-7,8-diol",
"Hydrolysis product of the BaP epoxide intermediate.",
reactivity="Intermediate",
),
_entity_spec(
"BPDE",
"BPDE",
"Ultimate diol-epoxide carcinogen formed from BaP.",
reactivity="High",
),
_entity_spec(
"BPDE_GSH",
"BPDE-GSH conjugate",
"Glutathione-conjugated detoxification product of BPDE.",
reactivity="Low",
),
_entity_spec(
"DMBA_epoxide",
"DMBA-3,4-epoxide",
"Initial epoxide intermediate formed during DMBA activation.",
reactivity="Intermediate",
),
_entity_spec(
"DMBA_diol",
"DMBA-3,4-diol",
"Hydrolyzed DMBA intermediate prior to diol-epoxide formation.",
reactivity="Intermediate",
),
_entity_spec(
"DMBA_diol_epoxide",
"DMBA-diol-epoxide",
"Ultimate DMBA metabolite used to anchor the PAH class expansion.",
reactivity="High",
),
_entity_spec(
"NOH_PhIP",
"N-hydroxy-PhIP",
"Phase I activated PhIP intermediate prior to sulfate ester formation.",
reactivity="Intermediate",
),
_entity_spec(
"PhIP_sulfate",
"PhIP-N3-sulfate",
"Reactive sulfate ester representing HCA bioactivation.",
reactivity="High",
),
_entity_spec(
"NOH_MeIQx",
"N-hydroxy-MeIQx",
"N-hydroxylated MeIQx activation intermediate.",
reactivity="Intermediate",
),
_entity_spec(
"MeIQx_acetoxy",
"MeIQx-N-acetoxy ester",
"Reactive ester representing MeIQx activation.",
reactivity="High",
),
_entity_spec(
"NOH_4ABP",
"N-hydroxy-4-ABP",
"N-hydroxylated 4-aminobiphenyl activation intermediate.",
reactivity="Intermediate",
),
_entity_spec(
"ABP_acetoxy",
"4-ABP-N-acetoxy ester",
"Reactive aromatic-amine ester leading to guanine adduct formation.",
reactivity="High",
),
_entity_spec(
"NOH_Benzidine",
"N-hydroxy-benzidine",
"Benzidine activation intermediate represented for class completeness.",
reactivity="Intermediate",
),
_entity_spec(
"Benzidine_ester",
"Benzidine-diacetoxy ester",
"Reactive benzidine ester included as an aromatic-amine endpoint.",
reactivity="High",
),
_entity_spec(
"NNK_pob",
"NNK pyridyloxobutylating intermediate",
"Representative NNK activation product leading to POB adducts.",
reactivity="High",
),
_entity_spec(
"NNK_hydroxyl",
"NNK alpha-hydroxylated intermediate",
"Primary NNK activation intermediate for the nitrosamine branch.",
reactivity="Intermediate",
),
_entity_spec(
"NDMA_hydroxyl",
"NDMA alpha-hydroxylated intermediate",
"Primary NDMA activation intermediate.",
reactivity="Intermediate",
),
_entity_spec(
"Methyldiazonium",
"Methyldiazonium ion",
"Reactive methylating species used to represent NDMA alkylation.",
reactivity="High",
),
_entity_spec(
"AFB1_epoxide",
"AFB1-8,9-epoxide",
"Canonical aflatoxin epoxide intermediate.",
reactivity="High",
),
_entity_spec(
"HydroxyE2",
"4-Hydroxyestradiol",
(
"Catechol estrogen intermediate formed during CYP1B1-mediated "
"estrogen activation."
),
reactivity="Intermediate",
),
_entity_spec(
"E2_quinone",
"Estradiol-3,4-quinone",
"Reactive estrogen quinone that yields depurinating DNA adducts.",
reactivity="High",
),
_entity_spec(
"Testosterone_gluc",
"Testosterone-17-glucuronide",
"Representative androgen detoxification product.",
reactivity="Low",
),
_entity_spec(
"DHT_gluc",
"DHT-17-glucuronide",
"Representative DHT conjugate used for androgen detoxification.",
reactivity="Low",
),
_entity_spec(
"Benzene_oxide",
"Benzene oxide",
"Early benzene oxidation product linked to downstream quinone chemistry.",
reactivity="Intermediate",
),
_entity_spec(
"Benzoquinone",
"Benzoquinone",
"Representative ROS-linked benzene metabolite.",
reactivity="High",
),
_entity_spec(
"Chloroethylene_oxide",
"Chloroethylene oxide",
"Reactive vinyl chloride intermediate that yields etheno adducts.",
reactivity="High",
),
_entity_spec(
"Hydroxyethylating_species",
"2-hydroxyethylating species",
"Reactive ethylene oxide-derived alkylating intermediate.",
reactivity="High",
),
)
_DNA_ADDUCT_SPECS: tuple[Spec, ...] = (
_entity_spec(
"BPDE_dG",
"BPDE-N2-dG",
"Canonical bulky PAH DNA adduct.",
),
_entity_spec(
"DMBA_dA",
"DMBA-dA adduct",
"Representative DMBA-derived DNA adduct.",
),
_entity_spec(
"PhIP_dG",
"PhIP-C8-dG",
"Representative HCA DNA adduct formed from activated PhIP.",
),
_entity_spec(
"ABP_dG",
"dG-C8-4-ABP",
"Representative aromatic-amine guanine adduct.",
),
_entity_spec(
"POB_dG",
"O6-POB-dG",
"Representative NNK-derived pyridyloxobutyl DNA adduct.",
),
_entity_spec(
"O6_methyl_dG",
"O6-methyl-dG",
"Representative NDMA-derived methyl DNA adduct.",
),
_entity_spec(
"AFB1_Gua",
"AFB1-N7-Gua",
"Canonical aflatoxin-derived guanine adduct.",
),
_entity_spec(
"E2_Ade",
"4-OHE2-1-N3Ade",
"Representative depurinating estrogen-DNA adduct.",
),
_entity_spec(
"Oxo_dG",
"8-oxo-dG",
"Representative oxidative DNA lesion linked to ROS pathway activity.",
),
_entity_spec(
"EthenoG",
"N2,3-ethenoguanine",
"Representative etheno adduct associated with vinyl chloride exposure.",
),
_entity_spec(
"HEG",
"N7-(2-hydroxyethyl)guanine",
"Representative hydroxyethyl DNA adduct associated with ethylene oxide.",
),
)
_BASE_CARCINOGEN_SPECS = {spec["id"]: spec for spec in _CARCINOGEN_SPECS}
_BASE_METABOLITE_SPECS = {spec["id"]: spec for spec in _METABOLITE_SPECS}
_BASE_DNA_ADDUCT_SPECS = {spec["id"]: spec for spec in _DNA_ADDUCT_SPECS}
_ANDROGEN_TISSUE_SPECS: tuple[Spec, ...] = (
_entity_spec(
"Prostate",
"Prostate tissue",
"Primary androgen-responsive tissue represented for the prostate-focused module.",
),
_entity_spec(
"PeripheralHormoneTissues",
"Peripheral hormone-metabolism tissues",
(
"Peripheral tissues where local androgen conversion contributes "
"to hormone exposure context."
),
),
_entity_spec(
"HormoneResponsiveTissues",
"Hormone-responsive tissues",
(
"Breast, adipose, and related tissues where aromatase-linked "
"estrogen formation is relevant."
),
),
)
_ANDROGEN_ENZYME_SPECS: tuple[Spec, ...] = (
_entity_spec(
"CYP3A5",
"CYP3A5",
(
"Testosterone 6beta-hydroxylation enzyme with prostate-relevant "
"androgen-clearance and androgen-receptor modulation context."
),
),
)
_ANDROGEN_GENE_SPECS: tuple[Spec, ...] = (
{
"id": "AR",
"label": "Androgen receptor (AR)",
"detail": (
"Nuclear androgen receptor represented as the signaling endpoint "
"for testosterone and DHT binding in prostate-linked proliferation."
),
"tissue": "prostate, hormone-responsive tissues",
"source_db": _CURATED_SOURCE,
},
{
"id": "SRD5A2_V89L",
"label": "SRD5A2 V89L",
"detail": (
"Variant annotation for the common SRD5A2 V89L polymorphism used to "
"track reduced DHT-forming activity in tooltip and detail views."
),
"variant": "V89L",
"phenotype": "Reduced 5alpha-reductase activity",
"tissue": "prostate",
"source_db": "PubMed",
},
{
"id": "SRD5A2_A49T",
"label": "SRD5A2 A49T",
"detail": (
"Variant annotation for the SRD5A2 A49T polymorphism used to track "
"increased DHT-forming activity in the androgen module."
),
"variant": "A49T",
"phenotype": "Increased 5alpha-reductase activity",
"tissue": "prostate",
"source_db": "PubMed",
},
{
"id": "CYP19A1_repeat_length",
"label": "CYP19A1 repeat-length variant",
"detail": (
"Curated placeholder for CYP19A1 repeat-length polymorphisms "
"associated with altered aromatase expression."
),
"variant": "Repeat-length polymorphism",
"phenotype": "Expression-dependent aromatase activity shift",
"tissue": "hormone-responsive tissues",
"source_db": "PubMed",
},
{
"id": "UGT2B17_copy_number_deletion",
"label": "UGT2B17 copy number deletion",
"detail": (
"Copy-number deletion annotation used to capture loss of UGT2B17-"
"mediated androgen glucuronidation."
),
"variant": "Copy number deletion",
"phenotype": "Absent or markedly reduced androgen glucuronidation",
"tissue": "prostate, liver",
"source_db": "PubMed",
},
)
_ANDROGEN_METABOLITE_SPECS: tuple[Spec, ...] = (
_entity_spec(
"Androstenedione",
"Androstenedione",
(
"Steroid intermediate represented as the immediate AKR1C3 "
"substrate for testosterone formation."
),
reactivity="Low",
),
_entity_spec(
"HydroxyTestosterone",
"6beta-Hydroxytestosterone",
"Representative CYP3A5-linked hydroxylated androgen metabolite.",
reactivity="Low",
),
_BASE_METABOLITE_SPECS["HydroxyE2"],
_BASE_METABOLITE_SPECS["E2_quinone"],
_BASE_METABOLITE_SPECS["Testosterone_gluc"],
_BASE_METABOLITE_SPECS["DHT_gluc"],
)
_ANDROGEN_DNA_ADDUCT_SPECS: tuple[Spec, ...] = (
_BASE_DNA_ADDUCT_SPECS["E2_Ade"],
_entity_spec(
"E2_Gua",
"4-OHE2-guanine depurinating adduct",
"Representative depurinating estrogen-DNA adduct at guanine residues.",
),
)
_ANDROGEN_PATHWAY_SPECS: tuple[Spec, ...] = (
{
"pathway_id": "hsa00140",
"label": "Steroid hormone biosynthesis",
"focus": (
"Hormone-metabolism context for androgen synthesis, DHT "
"conversion, and aromatase bridging."
),
},
{
"pathway_id": "hsa05204",
"label": "Chemical carcinogenesis - DNA adducts",
"focus": (
"Estrogen-quinone DNA-adduct context used to bridge the androgen "
"and estrogen modules."
),
},
{
"pathway_id": "AR_signal_program",
"label": "AR proliferative transcriptional program",
"focus": "Curated signaling endpoint for androgen receptor-mediated proliferative drive.",
},
)
_ANDROGEN_CARCINOGEN_IDS: tuple[str, ...] = ("Testosterone", "DHT", "E2")
_ANDROGEN_PANEL_ENZYME_IDS: tuple[str, ...] = (
"CYP17A1",
"SRD5A1",
"SRD5A2",
"CYP19A1",
"AKR1C3",
"CYP1B1",
"UGT2B7",
"UGT2B17",
)
_PATHWAY_MEMBERSHIP: dict[str, tuple[str, ...]] = {
"hsa00980": (
"BaP",
"BPDE",
"BPDE_GSH",
"PhIP",
"PhIP_sulfate",
"AFB1",
"AFB1_epoxide",
"Benzene",
"CYP1A1",
"GSTM1",
),
"hsa00140": (
"E2",
"Testosterone",
"DHT",
"HydroxyE2",
"E2_quinone",
"Testosterone_gluc",
"DHT_gluc",
"CYP17A1",
"SRD5A2",
"CYP19A1",
),
"hsa05204": (
"BaP",
"BPDE",
"BPDE_dG",
"PhIP_dG",
"O6_methyl_dG",
"AFB1_Gua",
"EthenoG",
"MGMT",
),
"hsa05208": (
"Benzene",
"Benzoquinone",
"Oxo_dG",
"E2",
"COMT",
"OGG1",
),
}
_EDGE_SPECS: tuple[Spec, ...] = (
_relation_spec("CYP1A1", "BaP_epoxide", "ACTIVATES", "BaP", "BaP epoxidation"),
_relation_spec(
"CYP1A2",
"BaP_epoxide",
"ACTIVATES",
"BaP",
"Secondary BaP epoxidation",
),
_relation_spec(
"CYP1B1",
"BaP_epoxide",
"ACTIVATES",
"BaP",
"Extrahepatic BaP epoxidation",
),
_relation_spec(
"EPHX1",
"BaP_diol",
"DETOXIFIES",
"BaP",
"Epoxide hydrolysis",
),
_relation_spec(
"CYP1A1",
"BPDE",
"ACTIVATES",
"BaP",
"Second epoxidation to BPDE",
),
_relation_spec(
"CYP1B1",
"BPDE",
"ACTIVATES",
"BaP",
"Second epoxidation to BPDE",
),
_relation_spec(
"GSTM1",
"BPDE_GSH",
"DETOXIFIES",
"BaP",
"Glutathione conjugation",
),
_relation_spec(
"GSTP1",
"BPDE_GSH",
"DETOXIFIES",
"BaP",
"Glutathione conjugation",
),
_relation_spec(
"ABCB1",
"BPDE_GSH",
"TRANSPORTS",
"BaP",
"Conjugate efflux",
),
_relation_spec(
"BPDE",
"BPDE_dG",
"FORMS_ADDUCT",
"BaP",
"Bulky guanine adduct formation",
),
_relation_spec(
"XPC",
"BPDE_dG",
"REPAIRS",
"BaP",
"Damage recognition",
),
_relation_spec(
"ERCC2",
"BPDE_dG",
"REPAIRS",
"BaP",
"NER helicase repair",
),
_relation_spec(
"CYP1A1",
"DMBA_epoxide",
"ACTIVATES",
"DMBA",
"DMBA epoxidation",
),
_relation_spec(
"CYP1B1",
"DMBA_epoxide",
"ACTIVATES",
"DMBA",
"DMBA epoxidation",
),
_relation_spec(
"EPHX1",
"DMBA_diol",
"DETOXIFIES",
"DMBA",
"Epoxide hydrolysis",
),
_relation_spec(
"CYP1A1",
"DMBA_diol_epoxide",
"ACTIVATES",
"DMBA",
"Diol-epoxide formation",
),
_relation_spec(
"CYP1B1",
"DMBA_diol_epoxide",
"ACTIVATES",
"DMBA",
"Diol-epoxide formation",
),
_relation_spec(
"DMBA_diol_epoxide",
"DMBA_dA",
"FORMS_ADDUCT",
"DMBA",
"DNA adduct formation",
),
_relation_spec(
"XPC",
"DMBA_dA",
"REPAIRS",
"DMBA",
"Bulky adduct recognition",
),
_relation_spec(
"ERCC2",
"DMBA_dA",
"REPAIRS",
"DMBA",
"Bulky adduct excision",
),
_relation_spec(
"CYP1A2",
"NOH_PhIP",
"ACTIVATES",
"PhIP",
"N-hydroxylation",
),
_relation_spec(
"SULT1A1",
"PhIP_sulfate",
"ACTIVATES",
"PhIP",
"Reactive sulfate ester formation",
),
_relation_spec(
"ABCG2",
"PhIP_sulfate",
"TRANSPORTS",
"PhIP",
"Efflux of conjugated HCA metabolite",
),
_relation_spec(
"PhIP_sulfate",
"PhIP_dG",
"FORMS_ADDUCT",
"PhIP",
"C8-dG adduct formation",
),
_relation_spec(
"XPC",
"PhIP_dG",
"REPAIRS",
"PhIP",
"Damage recognition",
),
_relation_spec("ERCC2", "PhIP_dG", "REPAIRS", "PhIP", "NER excision"),
_relation_spec(
"CYP1A2",
"NOH_MeIQx",
"ACTIVATES",
"MeIQx",
"N-hydroxylation",
),
_relation_spec(
"NAT1",
"MeIQx_acetoxy",
"ACTIVATES",
"MeIQx",
"Reactive acetoxy ester formation",
),
_relation_spec(
"MeIQx_acetoxy",
"PhIP_dG",
"FORMS_ADDUCT",
"MeIQx",
"Representative HCA adduct formation",
),
_relation_spec(
"CYP1A2",
"NOH_4ABP",
"ACTIVATES",
"4ABP",
"N-hydroxylation",
),
_relation_spec(
"NAT2",
"ABP_acetoxy",
"ACTIVATES",
"4ABP",
"Reactive acetoxy ester formation",
),
_relation_spec(
"ABP_acetoxy",
"ABP_dG",
"FORMS_ADDUCT",
"4ABP",
"Guanine adduct formation",
),
_relation_spec(
"XPC",
"ABP_dG",
"REPAIRS",
"4ABP",
"Damage recognition",
),
_relation_spec("ERCC2", "ABP_dG", "REPAIRS", "4ABP", "NER excision"),
_relation_spec(
"CYP1A2",
"NOH_Benzidine",
"ACTIVATES",
"Benzidine",
"Benzidine N-hydroxylation",
),
_relation_spec(
"NAT1",
"Benzidine_ester",
"ACTIVATES",
"Benzidine",
"Reactive ester formation",
),
_relation_spec(
"CYP2A6",
"NNK_hydroxyl",
"ACTIVATES",
"NNK",
"Alpha-hydroxylation",
),
_relation_spec(
"CYP2A13",
"NNK_hydroxyl",
"ACTIVATES",
"NNK",
"Pulmonary alpha-hydroxylation",
),
_relation_spec(
"CYP2D6",
"NNK_pob",
"ACTIVATES",
"NNK",
"Pyridyloxobutylating intermediate formation",
),
_relation_spec(
"NNK_pob",
"POB_dG",
"FORMS_ADDUCT",
"NNK",
"POB adduct formation",
),
_relation_spec(
"MGMT",
"POB_dG",
"REPAIRS",
"NNK",
"Direct reversal",
),
_relation_spec(
"CYP2E1",
"NDMA_hydroxyl",
"ACTIVATES",
"NDMA",
"Alpha-hydroxylation",
),
_relation_spec(
"CYP2A6",
"Methyldiazonium",
"ACTIVATES",
"NDMA",
"Secondary methylating route",
),
_relation_spec(
"Methyldiazonium",
"O6_methyl_dG",
"FORMS_ADDUCT",
"NDMA",
"Methyl adduct formation",
),
_relation_spec(
"MGMT",
"O6_methyl_dG",
"REPAIRS",
"NDMA",
"Direct reversal",
),
_relation_spec(
"CYP3A4",
"AFB1_epoxide",
"ACTIVATES",
"AFB1",
"Aflatoxin epoxidation",
),
_relation_spec(
"CYP1A2",
"AFB1_epoxide",
"ACTIVATES",
"AFB1",
"Secondary epoxidation",
),
_relation_spec(
"AFB1_epoxide",
"AFB1_Gua",
"FORMS_ADDUCT",
"AFB1",
"AFB1-N7-Gua formation",
),
_relation_spec(
"XPC",
"AFB1_Gua",
"REPAIRS",
"AFB1",
"Damage recognition",
),
_relation_spec("ERCC2", "AFB1_Gua", "REPAIRS", "AFB1", "NER excision"),
_relation_spec(
"CYP1B1",
"HydroxyE2",
"ACTIVATES",
"E2",
"Catechol estrogen formation",
),
_relation_spec(
"CYP1B1",
"E2_quinone",
"ACTIVATES",
"E2",
"Estrogen quinone formation",
),
_relation_spec(
"UGT2B7",
"Testosterone_gluc",
"DETOXIFIES",
"Testosterone",
"Glucuronidation",
),
_relation_spec(
"UGT2B17",
"DHT_gluc",
"DETOXIFIES",
"DHT",
"Glucuronidation",
),
_relation_spec(
"ABCC2",
"Testosterone_gluc",
"TRANSPORTS",
"Testosterone",
"Conjugate efflux",
),
_relation_spec(
"ABCC2",
"DHT_gluc",
"TRANSPORTS",
"DHT",
"Conjugate efflux",
),
_relation_spec(
"E2_quinone",
"E2_Ade",
"FORMS_ADDUCT",
"E2",
"Depurinating adduct formation",
),
_relation_spec(
"XRCC1",
"E2_Ade",
"REPAIRS",
"E2",
"BER scaffold response",
),
_relation_spec(
"CYP2E1",
"Benzene_oxide",
"ACTIVATES",
"Benzene",
"Benzene oxidation",
),
_relation_spec(
"CYP2E1",
"Benzoquinone",
"ACTIVATES",
"Benzene",
"Quinone-forming oxidation",
),
_relation_spec(
"Benzoquinone",
"Oxo_dG",
"FORMS_ADDUCT",
"Benzene",
"ROS-linked oxidative lesion",
),
_relation_spec(
"OGG1",
"Oxo_dG",
"REPAIRS",
"Benzene",
"Base excision repair",
),
_relation_spec(
"XRCC1",
"Oxo_dG",
"REPAIRS",
"Benzene",
"BER scaffold response",
),
_relation_spec(
"CYP2E1",
"Chloroethylene_oxide",
"ACTIVATES",
"VinylChloride",
"Vinyl chloride oxidation",
),
_relation_spec(
"Chloroethylene_oxide",
"EthenoG",
"FORMS_ADDUCT",
"VinylChloride",
"Etheno adduct formation",
),
_relation_spec(
"ERCC2",
"EthenoG",
"REPAIRS",
"VinylChloride",
"Damage response",
),
_relation_spec(
"Hydroxyethylating_species",
"HEG",
"FORMS_ADDUCT",
"EthyleneOxide",
"Hydroxyethyl adduct formation",
),
_relation_spec(
"XRCC1",
"HEG",
"REPAIRS",
"EthyleneOxide",
"BER scaffold response",
),
)
_ANDROGEN_EDGE_SPECS: tuple[Spec, ...] = (
_relation_spec(
"CYP17A1",
"Androstenedione",
"ACTIVATES",
"Testosterone",
"Androgen precursor synthesis",
),
_relation_spec(
"AKR1C3",
"Testosterone",
"CUSTOM",
"Testosterone",
"Androstenedione reduction to testosterone",
custom_predicate="REDUCES_TO_TESTOSTERONE",
),
_relation_spec(
"SRD5A1",
"DHT",
"CUSTOM",
"Testosterone",
"Peripheral testosterone reduction to DHT",
custom_predicate="CONVERTS_TO_DHT",
),
_relation_spec(
"SRD5A2",
"DHT",
"CUSTOM",
"Testosterone",
"Prostate testosterone reduction to DHT",
custom_predicate="CONVERTS_TO_DHT",
),
_relation_spec(
"CYP19A1",
"E2",
"CUSTOM",
"Testosterone",
"Aromatization to estradiol",
custom_predicate="AROMATIZES_TO_ESTRADIOL",
),
_relation_spec(
"CYP1B1",
"HydroxyE2",
"ACTIVATES",
"E2",
"Catechol estrogen formation",
),
_relation_spec(
"CYP1B1",
"E2_quinone",
"ACTIVATES",
"E2",
"Estrogen quinone formation",
),
_relation_spec(
"E2_quinone",
"E2_Ade",
"FORMS_ADDUCT",
"E2",
"Depurinating adenine adduct formation",
),
_relation_spec(
"E2_quinone",
"E2_Gua",
"FORMS_ADDUCT",
"E2",
"Depurinating guanine adduct formation",
),
_relation_spec(
"CYP3A5",
"HydroxyTestosterone",
"DETOXIFIES",
"Testosterone",
"6beta-hydroxylation",
),
_relation_spec(
"UGT2B7",
"Testosterone_gluc",
"DETOXIFIES",
"Testosterone",
"Glucuronidation",
),
_relation_spec(
"UGT2B17",
"DHT_gluc",
"DETOXIFIES",
"DHT",
"Glucuronidation",
),
_relation_spec(
"Testosterone",
"AR",
"CUSTOM",
"Testosterone",
"Lower-affinity androgen receptor binding",
custom_predicate="BINDS_RECEPTOR",
),
_relation_spec(
"DHT",
"AR",
"CUSTOM",
"DHT",
"High-affinity androgen receptor binding",
custom_predicate="BINDS_RECEPTOR",
),
_relation_spec(
"AR",
"AR_signal_program",
"CUSTOM",
None,
"AR-driven proliferative transcription program",
custom_predicate="ACTIVATES_TRANSCRIPTION",
),
_relation_spec(
"SRD5A2",
"Prostate",
"EXPRESSED_IN",
None,
"Prostate-enriched expression context",
),
_relation_spec(
"AKR1C3",
"Prostate",
"EXPRESSED_IN",
None,
"Prostate-local androgen activation context",
),
_relation_spec(
"UGT2B17",
"Prostate",
"EXPRESSED_IN",
None,
"Prostate androgen-clearance context",
),
_relation_spec(
"CYP3A5",
"Prostate",
"EXPRESSED_IN",
None,
"Prostate androgen-clearance context",
),
_relation_spec(
"SRD5A1",
"PeripheralHormoneTissues",
"EXPRESSED_IN",
None,
"Peripheral testosterone-conversion context",
),
_relation_spec(
"CYP19A1",
"HormoneResponsiveTissues",
"EXPRESSED_IN",
None,
"Hormone-responsive aromatase context",
),
_relation_spec(
"AR",
"Prostate",
"EXPRESSED_IN",
None,
"Prostate androgen-receptor signaling context",
),
_relation_spec(
"SRD5A2_V89L",
"SRD5A2",
"ENCODES",
None,
"Variant-bearing SRD5A2 locus",
),
_relation_spec(
"SRD5A2_A49T",
"SRD5A2",
"ENCODES",
None,
"Variant-bearing SRD5A2 locus",
),
_relation_spec(
"CYP19A1_repeat_length",
"CYP19A1",
"ENCODES",
None,
"Variant-bearing CYP19A1 locus",
),
_relation_spec(
"UGT2B17_copy_number_deletion",
"UGT2B17",
"ENCODES",
None,
"Variant-bearing UGT2B17 locus",
),
_relation_spec(
"Testosterone",
"hsa00140",
"PATHWAY",
None,
"Pathway membership: Steroid hormone biosynthesis",
),
_relation_spec(
"DHT",
"hsa00140",
"PATHWAY",
None,
"Pathway membership: Steroid hormone biosynthesis",
),
_relation_spec(
"E2",
"hsa00140",
"PATHWAY",
None,
"Pathway membership: Steroid hormone biosynthesis",
),
_relation_spec(
"CYP17A1",
"hsa00140",
"PATHWAY",
None,
"Pathway membership: Steroid hormone biosynthesis",
),
_relation_spec(
"SRD5A1",
"hsa00140",
"PATHWAY",
None,
"Pathway membership: Steroid hormone biosynthesis",
),
_relation_spec(
"SRD5A2",
"hsa00140",
"PATHWAY",
None,
"Pathway membership: Steroid hormone biosynthesis",
),
_relation_spec(
"CYP19A1",
"hsa00140",
"PATHWAY",
None,
"Pathway membership: Steroid hormone biosynthesis",
),
_relation_spec(
"AKR1C3",
"hsa00140",
"PATHWAY",
None,
"Pathway membership: Steroid hormone biosynthesis",
),
_relation_spec(
"CYP3A5",
"hsa00140",
"PATHWAY",
None,
"Pathway membership: Steroid hormone biosynthesis",
),
_relation_spec(
"HydroxyE2",
"hsa05204",
"PATHWAY",
None,
"Pathway membership: Chemical carcinogenesis - DNA adducts",
),
_relation_spec(
"E2_quinone",
"hsa05204",
"PATHWAY",
None,
"Pathway membership: Chemical carcinogenesis - DNA adducts",
),
_relation_spec(
"E2_Ade",
"hsa05204",
"PATHWAY",
None,
"Pathway membership: Chemical carcinogenesis - DNA adducts",
),
_relation_spec(
"E2_Gua",
"hsa05204",
"PATHWAY",
None,
"Pathway membership: Chemical carcinogenesis - DNA adducts",
),
_relation_spec(
"DHT",
"AR_signal_program",
"PATHWAY",
None,
"Pathway membership: AR proliferative transcriptional program",
),
_relation_spec(
"AR",
"AR_signal_program",
"PATHWAY",
None,
"Pathway membership: AR proliferative transcriptional program",
),
)
def _curated_ref(
record_id: str,
citation: str,
evidence: str,
*,
source_db: str = _CURATED_SOURCE,
) -> ProvenanceRecord:
return ProvenanceRecord(
source_db=source_db,
record_id=record_id,
citation=citation,
evidence=evidence,
)
def _kegg_ref(pathway_id: str, label: str) -> ProvenanceRecord:
return ProvenanceRecord(
source_db="KEGG",
record_id=pathway_id,
citation=f"KEGG pathway record for {label}",
url=f"https://www.kegg.jp/entry/{pathway_id}",
evidence="Curated KEGG reference pathway tracked by ExposoGraph.",
)
def _representative_activity_score(gene_id: str) -> float | None:
rows = ACTIVITY_SCORES.get(gene_id) or []
numeric_values = [row["value"] for row in rows if isinstance(row.get("value"), (int, float))]
if not numeric_values:
return None
return float(max(numeric_values))
def _annotated_panel_nodes() -> list[Node]:
nodes: list[Node] = []
for node in build_full_panel().nodes:
score = _representative_activity_score(node.id)
if score is None:
nodes.append(node)
continue
nodes.append(node.model_copy(update={"activity_score": score}))
return nodes
def _canonical_curated_node(
*,
node_id: str,
label: str,
node_type: NodeType,
detail: str,
provenance: Iterable[ProvenanceRecord],
group: str | None = None,
reactivity: str | None = None,
**extra_fields: object,
) -> Node:
node = Node(
id=node_id,
label=label,
type=node_type,
detail=detail,
group=group,
reactivity=reactivity,
match_status=MatchStatus.CANONICAL,
canonical_id=node_id,
canonical_label=label,
canonical_namespace=_SHOWCASE_NAMESPACE,
provenance=list(provenance),
)
if extra_fields:
return node.model_copy(update=extra_fields)
return node
def _carcinogen_node(spec: Spec) -> Node:
citation = f"Curated carcinogen entry for {spec['label']}"
base = Node(
id=spec["id"],
label=spec["label"],
type=NodeType.CARCINOGEN,
group=spec["group"],
detail=spec["detail"],
provenance=[_curated_ref(spec["id"], citation, spec["detail"])],
)
grounded = ground_node(base, classifier=_IARC)
if grounded.match_status == MatchStatus.UNMATCHED:
return grounded.model_copy(
update={
"match_status": MatchStatus.CANONICAL,
"canonical_id": grounded.id,
"canonical_label": grounded.label,
"canonical_namespace": _SHOWCASE_NAMESPACE,
},
)
return grounded
def _metabolite_node(spec: Spec) -> Node:
citation = f"Curated metabolite entry for {spec['label']}"
return _canonical_curated_node(
node_id=spec["id"],
label=spec["label"],
node_type=NodeType.METABOLITE,
detail=spec["detail"],
reactivity=spec["reactivity"],
provenance=[_curated_ref(spec["id"], citation, spec["detail"])],
)
def _adduct_node(spec: Spec) -> Node:
citation = f"Curated DNA adduct entry for {spec['label']}"
return _canonical_curated_node(
node_id=spec["id"],
label=spec["label"],
node_type=NodeType.DNA_ADDUCT,
detail=spec["detail"],
provenance=[_curated_ref(spec["id"], citation, spec["detail"])],
)
def _pathway_nodes() -> list[Node]:
return [
_canonical_curated_node(
node_id=pathway["pathway_id"],
label=pathway["label"],
node_type=NodeType.PATHWAY,
detail=pathway["focus"],
provenance=[_kegg_ref(pathway["pathway_id"], pathway["label"])],
)
for pathway in REFERENCE_KEGG_PATHWAYS
]
def _gene_node(spec: Spec) -> Node:
citation = f"Curated gene entry for {spec['label']}"
source_db = spec.get("source_db", _CURATED_SOURCE)
return _canonical_curated_node(
node_id=spec["id"],
label=spec["label"],
node_type=NodeType.GENE,
detail=spec["detail"],
provenance=[
_curated_ref(spec["id"], citation, spec["detail"], source_db=source_db),
],
source_db=source_db,
tissue=spec.get("tissue"),
variant=spec.get("variant"),
phenotype=spec.get("phenotype"),
)
def _tissue_node(spec: Spec) -> Node:
citation = f"Curated tissue-context entry for {spec['label']}"
return _canonical_curated_node(
node_id=spec["id"],
label=spec["label"],
node_type=NodeType.TISSUE,
detail=spec["detail"],
provenance=[_curated_ref(spec["id"], citation, spec["detail"])],
tissue=spec["label"],
)
def _module_enzyme_node(spec: Spec) -> Node:
citation = f"Curated enzyme entry for {spec['label']}"
return _canonical_curated_node(
node_id=spec["id"],
label=spec["label"],
node_type=NodeType.ENZYME,
detail=spec["detail"],
provenance=[_curated_ref(spec["id"], citation, spec["detail"])],
phase="I",
role="Mixed",
tissue="prostate, liver, intestine",
)
def _module_pathway_node(spec: Spec) -> Node:
pathway_id = spec["pathway_id"]
if pathway_id.startswith("hsa"):
provenance = [_kegg_ref(pathway_id, spec["label"])]
source_db = "KEGG"
else:
provenance = [
_curated_ref(pathway_id, f"Curated pathway entry for {spec['label']}", spec["focus"])
]
source_db = _CURATED_SOURCE
return _canonical_curated_node(
node_id=pathway_id,
label=spec["label"],
node_type=NodeType.PATHWAY,
detail=spec["focus"],
provenance=provenance,
source_db=source_db,
)
def _relation(spec: Spec) -> Edge:
record_id = f"{spec['source']}->{spec['target']}"
edge_type = EdgeType(spec["type"])
if edge_type == EdgeType.PATHWAY and str(spec["target"]).startswith("hsa"):
pathway_label = next(
(
entry["label"]
for entry in REFERENCE_KEGG_PATHWAYS
if entry["pathway_id"] == spec["target"]
),
spec["target"],
)
provenance = [_kegg_ref(spec["target"], pathway_label)]
else:
provenance = [_curated_ref(record_id, spec["label"], spec["label"])]
if edge_type == EdgeType.CUSTOM:
return Edge(
source=spec["source"],
target=spec["target"],
type=edge_type,
label=spec["label"],
carcinogen=spec.get("carcinogen"),
provenance=provenance,
custom_predicate=spec["custom_predicate"],
match_status=MatchStatus.CUSTOM,
)
return Edge(
source=spec["source"],
target=spec["target"],
type=edge_type,
label=spec["label"],
carcinogen=spec.get("carcinogen"),
provenance=provenance,
)
def _sorted_labels(nodes: Iterable[Node], node_type: NodeType) -> tuple[str, ...]:
return tuple(sorted(node.label for node in nodes if node.type == node_type))
def _inventory_group(name: str, members: Iterable[str]) -> ArchitectureInventoryGroup:
ordered_members = tuple(sorted(members))
return ArchitectureInventoryGroup(
name=name,
members=ordered_members,
count=len(ordered_members),
)
def _carcinogen_class_groups(nodes: Iterable[Node]) -> tuple[ArchitectureInventoryGroup, ...]:
labels_by_group: dict[str, list[str]] = {}
for node in nodes:
if node.type != NodeType.CARCINOGEN:
continue
group = str(node.group or "").strip()
labels_by_group.setdefault(group, []).append(node.label)
return tuple(
_inventory_group(title, labels_by_group.get(group, ()))
for group, title in _CARCINOGEN_CLASS_TITLES
)
def _enzyme_category_groups(panel_nodes: Iterable[Node]) -> tuple[ArchitectureInventoryGroup, ...]:
panel = list(panel_nodes)
return (
_inventory_group("Phase I", (node.label for node in panel if node.phase == "I")),
_inventory_group("Phase II", (node.label for node in panel if node.phase == "II")),
_inventory_group("Phase III", (node.label for node in panel if node.phase == "III")),
_inventory_group(
"DNA Repair",
(
node.label
for node in panel
if str(node.group or "").startswith("DNA Repair")
),
),
)
def _build_legacy_full_legends_graph() -> KnowledgeGraph:
panel_nodes = _annotated_panel_nodes()
carcinogen_nodes = [_carcinogen_node(spec) for spec in _CARCINOGEN_SPECS]
metabolite_nodes = [_metabolite_node(spec) for spec in _METABOLITE_SPECS]
adduct_nodes = [_adduct_node(spec) for spec in _DNA_ADDUCT_SPECS]
pathway_nodes = _pathway_nodes()
pathway_label_by_id = {
entry["pathway_id"]: entry["label"] for entry in REFERENCE_KEGG_PATHWAYS
}
pathway_edges = [
Edge(
source=node_id,
target=pathway_id,
type=EdgeType.PATHWAY,
label=f"Pathway membership: {pathway_label_by_id[pathway_id]}",
source_db="KEGG",
provenance=[_kegg_ref(pathway_id, pathway_label_by_id[pathway_id])],
)
for pathway_id, node_ids in _PATHWAY_MEMBERSHIP.items()
for node_id in node_ids
]
return KnowledgeGraph(
nodes=[
*panel_nodes,
*carcinogen_nodes,
*metabolite_nodes,
*adduct_nodes,
*pathway_nodes,
],
edges=[*[_relation(spec) for spec in _EDGE_SPECS], *pathway_edges],
)
def _edge_identity(edge: Edge) -> tuple[str, str, str, str | None, str | None]:
return (
edge.source,
edge.target,
edge.type.value,
edge.custom_predicate,
edge.carcinogen,
)
def _canonical_showcase_id(node_id: str) -> str:
return _FULL_LEGENDS_ID_REMAP.get(node_id, node_id)
def _normalize_reference_graph_ids(graph: KnowledgeGraph) -> KnowledgeGraph:
node_by_id: dict[str, Node] = {}
for node in graph.nodes:
payload = node.model_dump()
payload["id"] = _canonical_showcase_id(node.id)
if payload.get("canonical_id"):
payload["canonical_id"] = _canonical_showcase_id(str(payload["canonical_id"]))
normalized = Node(**payload)
node_by_id.setdefault(normalized.id, normalized)
edge_by_identity: dict[tuple[str, str, str, str | None, str | None], Edge] = {}
for edge in graph.edges:
payload = edge.model_dump()
payload["source"] = _canonical_showcase_id(edge.source)
payload["target"] = _canonical_showcase_id(edge.target)
if edge.carcinogen:
payload["carcinogen"] = _canonical_showcase_id(edge.carcinogen)
normalized_edge = Edge(**payload)
edge_by_identity.setdefault(_edge_identity(normalized_edge), normalized_edge)
return KnowledgeGraph(
nodes=list(node_by_id.values()),
edges=list(edge_by_identity.values()),
)
def _overlay_legacy_showcase_metadata(
graph: KnowledgeGraph,
legacy_graph: KnowledgeGraph,
) -> KnowledgeGraph:
legacy_node_by_id = {node.id: node for node in legacy_graph.nodes}
legacy_edge_by_identity = {
_edge_identity(edge): edge for edge in legacy_graph.edges
}
return KnowledgeGraph(
nodes=[legacy_node_by_id.get(node.id, node) for node in graph.nodes],
edges=[legacy_edge_by_identity.get(_edge_identity(edge), edge) for edge in graph.edges],
)
def _build_base_full_legends_graph() -> KnowledgeGraph:
legacy_graph = _build_legacy_full_legends_graph()
if not _FULL_LEGENDS_REFERENCE_GRAPH.exists():
return legacy_graph
reference_graph = parse_graph_data_js(_FULL_LEGENDS_REFERENCE_GRAPH)
normalized_graph = _normalize_reference_graph_ids(reference_graph)
return _overlay_legacy_showcase_metadata(normalized_graph, legacy_graph)
def _merge_graphs(*graphs: KnowledgeGraph) -> KnowledgeGraph:
node_by_id: dict[str, Node] = {}
for graph in graphs:
for node in graph.nodes:
node_by_id.setdefault(node.id, node)
edge_by_identity: dict[tuple[str, str, str, str | None, str | None], Edge] = {}
for graph in graphs:
for edge in graph.edges:
edge_by_identity.setdefault(_edge_identity(edge), edge)
return KnowledgeGraph(
nodes=list(node_by_id.values()),
edges=list(edge_by_identity.values()),
)
[docs]
def build_androgen_module_graph() -> KnowledgeGraph:
"""Return an optional androgen-metabolism module with receptor and variant context."""
panel_lookup = {node.id: node for node in _annotated_panel_nodes()}
shared_panel_nodes = [panel_lookup[node_id] for node_id in _ANDROGEN_PANEL_ENZYME_IDS]
carcinogen_nodes = [
_carcinogen_node(_BASE_CARCINOGEN_SPECS[node_id])
for node_id in _ANDROGEN_CARCINOGEN_IDS
]
metabolite_nodes = [_metabolite_node(spec) for spec in _ANDROGEN_METABOLITE_SPECS]
adduct_nodes = [_adduct_node(spec) for spec in _ANDROGEN_DNA_ADDUCT_SPECS]
pathway_nodes = [_module_pathway_node(spec) for spec in _ANDROGEN_PATHWAY_SPECS]
gene_nodes = [_gene_node(spec) for spec in _ANDROGEN_GENE_SPECS]
tissue_nodes = [_tissue_node(spec) for spec in _ANDROGEN_TISSUE_SPECS]
extra_enzyme_nodes = [_module_enzyme_node(spec) for spec in _ANDROGEN_ENZYME_SPECS]
return KnowledgeGraph(
nodes=[
*shared_panel_nodes,
*extra_enzyme_nodes,
*carcinogen_nodes,
*metabolite_nodes,
*adduct_nodes,
*pathway_nodes,
*gene_nodes,
*tissue_nodes,
],
edges=[_relation(spec) for spec in _ANDROGEN_EDGE_SPECS],
)
[docs]
def build_androgen_module_engine(
*,
mode: GraphMode | str = GraphMode.EXPLORATORY,
) -> GraphEngine:
"""Load :func:`build_androgen_module_graph` into a :class:`GraphEngine`."""
engine = GraphEngine()
engine.load(build_androgen_module_graph(), mode=mode)
return engine
[docs]
def build_full_legends_graph(
*,
include_androgen_module: bool = False,
) -> KnowledgeGraph:
"""Return a curated showcase graph aligned with the manuscript architecture."""
base_graph = _build_base_full_legends_graph()
if not include_androgen_module:
return base_graph
return _merge_graphs(base_graph, build_androgen_module_graph())
[docs]
def build_full_legends_architecture_summary(
*,
include_androgen_module: bool = False,
) -> ArchitectureSummary:
"""Return a typed architecture summary derived from the seeded showcase graph."""
graph = build_full_legends_graph(include_androgen_module=include_androgen_module)
enzyme_nodes = [node for node in graph.nodes if node.type == NodeType.ENZYME]
node_type_counter = Counter(node.type.value for node in graph.nodes)
edge_type_counter = Counter(edge.type.value for edge in graph.edges)
node_type_counts = {
node_type.value: node_type_counter[node_type.value]
for node_type in NodeType
if node_type_counter.get(node_type.value)
}
edge_type_counts = {
edge_type.value: edge_type_counter[edge_type.value]
for edge_type in EdgeType
if edge_type_counter.get(edge_type.value)
}
return ArchitectureSummary(
node_count=len(graph.nodes),
edge_count=len(graph.edges),
node_type_count=len(node_type_counter),
edge_type_count=len(edge_type_counter),
node_type_counts=node_type_counts,
edge_type_counts=edge_type_counts,
carcinogens=_sorted_labels(graph.nodes, NodeType.CARCINOGEN),
enzymes=_sorted_labels(graph.nodes, NodeType.ENZYME),
metabolites=_sorted_labels(graph.nodes, NodeType.METABOLITE),
dna_adducts=_sorted_labels(graph.nodes, NodeType.DNA_ADDUCT),
pathway_labels=_sorted_labels(graph.nodes, NodeType.PATHWAY),
carcinogen_classes=_carcinogen_class_groups(graph.nodes),
enzyme_categories=_enzyme_category_groups(enzyme_nodes),
)
[docs]
def build_full_legends_engine(
*,
mode: GraphMode | str = GraphMode.EXPLORATORY,
include_androgen_module: bool = False,
) -> GraphEngine:
"""Load :func:`build_full_legends_graph` into a :class:`GraphEngine`."""
engine = GraphEngine()
engine.load(
build_full_legends_graph(include_androgen_module=include_androgen_module),
mode=mode,
)
return engine
[docs]
def write_full_legends_exports(
output_dir: str | Path = "exports",
*,
visibility: GraphVisibility | str = GraphVisibility.ALL,
include_androgen_module: bool = False,
bundle_dir: str | Path | None = None,
) -> dict[str, Path]:
"""Write HTML, JSON, Plotly, and bundled viewer artifacts."""
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
engine = build_full_legends_engine(include_androgen_module=include_androgen_module)
artifact_stem = (
"run_kg_full_legends_androgen"
if include_androgen_module
else "run_kg_full_legends"
)
html_path = output_dir / f"{artifact_stem}.html"
json_path = output_dir / f"{artifact_stem}.json"
plotly_path = output_dir / f"{artifact_stem}_plotly.html"
to_interactive_html(engine, html_path, visibility=visibility)
to_json(engine, json_path, visibility=visibility)
to_plotly_html(engine, plotly_path, visibility=visibility)
bundle_target = (
Path(bundle_dir)
if bundle_dir is not None
else (
output_dir / f"{artifact_stem}.js"
if include_androgen_module
else _DEFAULT_BUNDLE_PATH
)
)
if bundle_target.suffix == ".js":
bundle_target.parent.mkdir(parents=True, exist_ok=True)
to_graph_data_js(engine, bundle_target, visibility=visibility)
return {
"html": html_path,
"json": json_path,
"plotly_html": plotly_path,
"graph_data_js": bundle_target,
}