Source code for pyctd.manager.query

# -*- coding: utf-8 -*-

from pandas import read_sql
from sqlalchemy import distinct

from . import models
from .database import BaseDbManager


[docs]class QueryManager(BaseDbManager): """Query interface to database.""" def _limit_and_df(self, query, limit, as_df=False): """adds a limit (limit==None := no limit) to any query and allow a return as pandas.DataFrame :param bool as_df: if is set to True results return as pandas.DataFrame :param `sqlalchemy.orm.query.Query` query: SQL Alchemy query :param int limit: maximum number of results :return: query result of pyctd.manager.models.XY objects """ if limit: query = query.limit(limit) if as_df: results = read_sql(query.statement, self.engine) else: results = query.all() return results @staticmethod def _join_gene(query, gene_name, gene_symbol, gene_id): """helper function to add a query join to Gene model :param `sqlalchemy.orm.query.Query` query: SQL Alchemy query :param str gene_name: gene name :param str gene_symbol: gene symbol :param int gene_id: NCBI Gene identifier :return: `sqlalchemy.orm.query.Query` object """ if gene_name or gene_symbol: query = query.join(models.Gene) if gene_symbol: query = query.filter(models.Gene.gene_symbol.like(gene_symbol)) if gene_name: query = query.filter(models.Gene.gene_name.like(gene_name)) if gene_id: query = query.filter(models.Gene.gene_id.like(gene_id)) return query @staticmethod def _join_chemical(query, cas_rn, chemical_id, chemical_name, chemical_definition): """helper function to add a query join to Chemical model :param `sqlalchemy.orm.query.Query` query: SQL Alchemy query :param cas_rn: :param chemical_id: :param chemical_name: :param chemical_definition: :return: `sqlalchemy.orm.query.Query` object """ if cas_rn or chemical_id or chemical_name or chemical_definition: query = query.join(models.Chemical) if cas_rn: query = query.filter(models.Chemical.cas_rn.like(cas_rn)) if chemical_id: query = query.filter(models.Chemical.chemical_id == chemical_id) if chemical_name: query = query.filter(models.Chemical.chemical_name.like(chemical_name)) if chemical_definition: query = query.filter(models.Chemical.definition.like(chemical_definition)) return query @staticmethod def _join_disease(query, disease_definition, disease_id, disease_name): """helper function to add a query join to Disease model :param sqlalchemy.orm.query.Query query: SQL Alchemy query :param disease_definition: :param str disease_id: see :attr:`models.Disease.disease_id` :param disease_name: :rtype: sqlalchemy.orm.query.Query """ if disease_definition or disease_id or disease_name: query = query.join(models.Disease) if disease_definition: query = query.filter(models.Disease.definition.like(disease_definition)) if disease_id: query = query.filter(models.Disease.disease_id == disease_id) if disease_name: query = query.filter(models.Disease.disease_name.like(disease_name)) return query @staticmethod def _join_pathway(query, pathway_id, pathway_name): """helper function to add a query join to Pathway model :param `sqlalchemy.orm.query.Query` query: SQL Alchemy query :param str pathway_id: pathway identifier :param str pathway_name: pathway name :return: `sqlalchemy.orm.query.Query` object """ if pathway_id or pathway_name: if pathway_id: query = query.filter(models.Pathway.pathway_id.like(pathway_id)) if pathway_name: query = query.filter(models.Pathway.pathway_name.like(pathway_name)) return query
[docs] def get_disease(self, disease_name=None, disease_id=None, definition=None, parent_ids=None, tree_numbers=None, parent_tree_numbers=None, slim_mapping=None, synonym=None, alt_disease_id=None, limit=None, as_df=False): """ Get diseases :param bool as_df: if set to True result returns as `pandas.DataFrame` :param int limit: maximum number of results :param str disease_name: disease name :param str disease_id: disease identifier :param str definition: definition of disease :param str parent_ids: parent identifiers, delimiter | :param str tree_numbers: tree numbers, delimiter | :param str parent_tree_numbers: parent tree numbers, delimiter :param str slim_mapping: term derived from the MeSH tree structure for the “Diseases” [C] branch, \ that classifies MEDIC diseases into high-level categories :param str synonym: disease synonyms :param str alt_disease_id: alternative disease identifiers :return: list of :class:`pyctd.manager.models.Disease` object .. seealso:: :class:`pyctd.manager.models.Disease` .. todo:: normalize parent_ids, tree_numbers and parent_tree_numbers in :class:`pyctd.manager.models.Disease` """ q = self.session.query(models.Disease) if disease_name: q = q.filter(models.Disease.disease_name.like(disease_name)) if disease_id: q = q.filter(models.Disease.disease_id == disease_id) if definition: q = q.filter(models.Disease.definition.like(definition)) if parent_ids: q = q.filter(models.Disease.parent_ids.like(parent_ids)) if tree_numbers: q = q.filter(models.Disease.tree_numbers.like(tree_numbers)) if parent_tree_numbers: q = q.filter(models.Disease.parent_tree_numbers.like(parent_tree_numbers)) if slim_mapping: q = q.join(models.DiseaseSlimmapping).filter(models.DiseaseSlimmapping.slim_mapping.like(slim_mapping)) if synonym: q = q.join(models.DiseaseSynonym).filter(models.DiseaseSynonym.synonym.like(synonym)) if alt_disease_id: q = q.join(models.DiseaseAltdiseaseid).filter(models.DiseaseAltdiseaseid.alt_disease_id == alt_disease_id) return self._limit_and_df(q, limit, as_df)
[docs] def get_gene(self, gene_name=None, gene_symbol=None, gene_id=None, synonym=None, uniprot_id=None, pharmgkb_id=None, biogrid_id=None, alt_gene_id=None, limit=None, as_df=False): """Get genes :param bool as_df: if set to True result returns as `pandas.DataFrame` :param alt_gene_id: :param str gene_name: gene name :param str gene_symbol: HGNC gene symbol :param int gene_id: NCBI Entrez Gene identifier :param str synonym: Synonym :param str uniprot_id: UniProt primary accession number :param str pharmgkb_id: PharmGKB identifier :param int biogrid_id: BioGRID identifier :param int limit: maximum of results :rtype: list[models.Gene] """ q = self.session.query(models.Gene) if gene_symbol: q = q.filter(models.Gene.gene_symbol.like(gene_symbol)) if gene_name: q = q.filter(models.Gene.gene_name.like(gene_name)) if gene_id: q = q.filter(models.Gene.gene_id.like(gene_id)) if synonym: q = q.join(models.GeneSynonym).filter(models.GeneSynonym.synonym == synonym) if uniprot_id: q = q.join(models.GeneUniprot).filter(models.GeneUniprot.uniprot_id == uniprot_id) if pharmgkb_id: q = q.join(models.GenePharmgkb).filter(models.GenePharmgkb.pharmgkb_id == pharmgkb_id) if biogrid_id: q = q.join(models.GeneBiogrid).filter(models.GeneBiogrid.biogrid_id == biogrid_id) if alt_gene_id: q = q.join(models.GeneAltGeneId.alt_gene_id == alt_gene_id) return self._limit_and_df(q, limit, as_df)
[docs] def get_pathway(self, pathway_name=None, pathway_id=None, limit=None, as_df=False): """Get pathway .. note:: Format of pathway_id is KEGG:X* or REACTOME:X* . X* stands for a sequence of digits :param bool as_df: if set to True result returns as `pandas.DataFrame` :param str pathway_name: pathway name :param str pathway_id: KEGG or REACTOME identifier :param int limit: maximum number of results :return: list of :class:`pyctd.manager.models.Pathway` objects .. seealso:: :class:`pyctd.manager.models.Pathway` """ q = self.session.query(models.Pathway) if pathway_name: q = q.filter(models.Pathway.pathway_name.like(pathway_name)) if pathway_id: q = q.filter(models.Pathway.pathway_id.like(pathway_id)) return self._limit_and_df(q, limit, as_df)
[docs] def get_chemical(self, chemical_name=None, chemical_id=None, cas_rn=None, drugbank_id=None, parent_id=None, parent_tree_number=None, tree_number=None, synonym=None, limit=None, as_df=False): """Get chemical :param bool as_df: if set to True result returns as `pandas.DataFrame` :param str chemical_name: chemical name :param str chemical_id: cehmical identifier :param str cas_rn: CAS registry number :param str drugbank_id: DrugBank identifier :param str parent_id: identifiers of the parent terms :param str parent_tree_number: identifiers of the parent nodes :param str tree_number: identifiers of the chemical's nodes :param str synonym: chemical synonym :param int limit: maximum number of results :return: list of :class:`pyctd.manager.models.Chemical` objects .. seealso:: :class:`pyctd.manager.models.Chemical` """ q = self.session.query(models.Chemical) if chemical_name: q = q.filter(models.Chemical.chemical_name.like(chemical_name)) if chemical_id: q = q.filter(models.Chemical.chemical_id == chemical_id) if cas_rn: q = q.filter(models.Chemical.cas_rn == cas_rn) if drugbank_id: q = q.join(models.ChemicalDrugbank).filter(models.ChemicalDrugbank.drugbank_id == drugbank_id) if parent_id: q = q.join(models.ChemicalParentid).filter(models.ChemicalParentid.parent_id == parent_id) if tree_number: q = q.join(models.ChemicalTreenumber) \ .filter(models.ChemicalTreenumber.tree_number == tree_number) if parent_tree_number: q = q.join(models.ChemicalParenttreenumber) \ .filter(models.ChemicalParenttreenumber.parent_tree_number == parent_tree_number) if synonym: q = q.join(models.ChemicalSynonym).filter(models.ChemicalSynonym.synonym.like(synonym)) return self._limit_and_df(q, limit, as_df)
[docs] def get_chem_gene_interaction_actions(self, gene_name=None, gene_symbol=None, gene_id=None, limit=None, cas_rn=None, chemical_id=None, chemical_name=None, organism_id=None, interaction_sentence=None, chemical_definition=None, gene_form=None, interaction_action=None, as_df=False): """Get all interactions for chemicals on a gene or biological entity (linked to this gene). Chemicals can interact on different types of biological entities linked to a gene. A list of allowed entities linked to a gene can be retrieved via the attribute :attr:`~.gene_forms`. Interactions are classified by a combination of interaction ('affects', 'decreases', 'increases') and actions ('activity', 'expression', ... ). A complete list of all allowed interaction_actions can be retrieved via the attribute :attr:`~.interaction_actions`. :param bool as_df: if set to True result returns as `pandas.DataFrame` :param str interaction_sentence: sentence describing the interactions :param int organism_id: NCBI TaxTree identifier. Example: 9606 for Human. :param str chemical_name: chemical name :param str chemical_id: chemical identifier :param str cas_rn: CAS registry number :param str chemical_definition: :param str gene_symbol: HGNC gene symbol :param str gene_name: gene name :param int gene_id: NCBI Entrez Gene identifier :param str gene_form: gene form :param str interaction_action: combination of interaction and actions :param int limit: maximum number of results :rtype: list[models.ChemGeneIxn] .. seealso:: :class:`pyctd.manager.models.ChemGeneIxn` which is linked to: :class:`pyctd.manager.models.Chemical` :class:`pyctd.manager.models.Gene` :class:`pyctd.manager.models.ChemGeneIxnPubmed` Available interaction_actions and gene_forms :func:`pyctd.manager.database.Query.interaction_actions` :func:`pyctd.manager.database.Query.gene_forms` """ q = self.session.query(models.ChemGeneIxn) if organism_id: q = q.filter(models.ChemGeneIxn.organism_id == organism_id) if interaction_sentence: q = q.filter(models.ChemGeneIxn.interaction == interaction_sentence) if gene_form: q = q.join(models.ChemGeneIxnGeneForm).filter(models.ChemGeneIxnGeneForm.gene_form == gene_form) if interaction_action: q = q.join(models.ChemGeneIxnInteractionAction) \ .filter(models.ChemGeneIxnInteractionAction.interaction_action.like(interaction_action)) q = self._join_gene(query=q, gene_name=gene_name, gene_symbol=gene_symbol, gene_id=gene_id) q = self._join_chemical(query=q, cas_rn=cas_rn, chemical_id=chemical_id, chemical_name=chemical_name, chemical_definition=chemical_definition) return self._limit_and_df(q, limit, as_df)
@property def gene_forms(self): """ :return: List of strings for all available gene forms :rtype: list[str] """ q = self.session.query(distinct(models.ChemGeneIxnGeneForm.gene_form)) return [x[0] for x in q.all()] @property def interaction_actions(self): """ :return: List of strings for allowed interaction/actions combinations :rtype: list[str] """ r = self.session.query(distinct(models.ChemGeneIxnInteractionAction.interaction_action)).all() return [x[0] for x in r] @property def actions(self): """Gets the list of allowed actions :rtype: list[str] """ r = self.session.query(models.Action).all() return [x.type_name for x in r] @property def pathways(self): """Get all pathways :rtype: list[models.Pathway] """ return self.session.query(models.Pathway).all()
[docs] def get_gene_disease(self, direct_evidence=None, inference_chemical_name=None, inference_score=None, gene_name=None, gene_symbol=None, gene_id=None, disease_name=None, disease_id=None, disease_definition=None, limit=None, as_df=False): """Get gene–disease associations :param bool as_df: if set to True result returns as `pandas.DataFrame` :param int gene_id: gene identifier :param str gene_symbol: gene symbol :param str gene_name: gene name :param str direct_evidence: direct evidence :param str inference_chemical_name: inference_chemical_name :param float inference_score: inference score :param str inference_chemical_name: chemical name :param disease_name: disease name :param disease_id: disease identifier :param disease_definition: disease definition :param int limit: maximum number of results :return: list of :class:`pyctd.manager.database.models.GeneDisease` objects .. seealso:: :class:`pyctd.manager.models.GeneDisease` which is linked to: :class:`pyctd.manager.models.Chemical` :class:`pyctd.manager.models.Gene` """ q = self.session.query(models.GeneDisease) if direct_evidence: q = q.filter(models.GeneDisease.direct_evidence == direct_evidence) if inference_chemical_name: q = q.filter(models.GeneDisease.inference_chemical_name == inference_chemical_name) if inference_score: q = q.filter(models.GeneDisease.inference_score == inference_score) q = self._join_disease(query=q, disease_definition=disease_definition, disease_id=disease_id, disease_name=disease_name) q = self._join_gene(q, gene_name=gene_name, gene_symbol=gene_symbol, gene_id=gene_id) return self._limit_and_df(q, limit, as_df)
@property def direct_evidences(self): """ :return: All available direct evidences for gene disease correlations :rtype: list """ q = self.session.query(distinct(models.GeneDisease.direct_evidence)) return q.all()
[docs] def get_disease_pathways(self, disease_id=None, disease_name=None, pathway_id=None, pathway_name=None, disease_definition=None, limit=None, as_df=False): """Get disease pathway link :param bool as_df: if set to True result returns as `pandas.DataFrame` :param disease_id: :param disease_name: :param pathway_id: :param pathway_name: :param disease_definition: :param int limit: maximum number of results :return: list of :class:`pyctd.manager.database.models.DiseasePathway` objects .. seealso:: :class:`pyctd.manager.models.DiseasePathway` which is linked to: :class:`pyctd.manager.models.Disease` :class:`pyctd.manager.models.Pathway` """ q = self.session.query(models.DiseasePathway) q = self._join_disease(query=q, disease_id=disease_id, disease_name=disease_name, disease_definition=disease_definition) q = self._join_pathway(query=q, pathway_id=pathway_id, pathway_name=pathway_name) return self._limit_and_df(q, limit, as_df)
[docs] def get_chemical_diseases(self, direct_evidence=None, inference_gene_symbol=None, inference_score=None, inference_score_operator=None, cas_rn=None, chemical_name=None, chemical_id=None, chemical_definition=None, disease_definition=None, disease_id=None, disease_name=None, limit=None, as_df=False): """Get chemical–disease associations with inference gene :param direct_evidence: direct evidence :param inference_gene_symbol: inference gene symbol :param inference_score: inference score :param inference_score_operator: inference score operator :param cas_rn: :param chemical_name: chemical name :param chemical_id: :param chemical_definition: :param disease_definition: :param disease_id: :param disease_name: disease name :param int limit: maximum number of results :param bool as_df: if set to True result returns as `pandas.DataFrame` :return: list of :class:`pyctd.manager.database.models.ChemicalDisease` objects .. seealso:: :class:`pyctd.manager.models.ChemicalDisease` which is linked to: :class:`pyctd.manager.models.Disease` :class:`pyctd.manager.models.Chemical` """ q = self.session.query(models.ChemicalDisease) if direct_evidence: q = q.filter(models.ChemicalDisease.direct_evidence.like(direct_evidence)) if inference_gene_symbol: q = q.filter(models.ChemicalDisease.inference_gene_symbol.like(inference_gene_symbol)) if inference_score: if inference_score_operator == ">": q = q.filter_by(models.ChemicalDisease.inference_score > inference_score) elif inference_score_operator == "<": q = q.filter_by(models.ChemicalDisease.inference_score > inference_score) q = self._join_chemical(q, cas_rn=cas_rn, chemical_id=chemical_id, chemical_name=chemical_name, chemical_definition=chemical_definition) q = self._join_disease(q, disease_definition=disease_definition, disease_id=disease_id, disease_name=disease_name) return self._limit_and_df(q, limit, as_df)
[docs] def get_gene_pathways(self, gene_name=None, gene_symbol=None, gene_id=None, pathway_id=None, pathway_name=None, limit=None, as_df=False): """Get gene pathway link :param bool as_df: if set to True result returns as `pandas.DataFrame` :param str gene_name: gene name :param str gene_symbol: gene symbol :param int gene_id: NCBI Gene identifier :param pathway_id: :param str pathway_name: pathway name :param int limit: maximum number of results :return: list of :class:`pyctd.manager.database.models.GenePathway` objects .. seealso:: :class:`pyctd.manager.models.GenePathway` which is linked to: :class:`pyctd.manager.models.Gene` :class:`pyctd.manager.models.Pathway` """ q = self.session.query(models.GenePathway) q = self._join_gene(q, gene_name=gene_name, gene_symbol=gene_symbol, gene_id=gene_id) q = self._join_pathway(q, pathway_id=pathway_id, pathway_name=pathway_name) return self._limit_and_df(q, limit, as_df)
# TODO documentation of get_go_enriched__by__chemical_name
[docs] def get_go_enriched__by__chemical_name(self, chemical_name, limit=None, as_df=False): """ :param chemical_name: :param limit: :param as_df: :return: """ q = self.session.query(models.ChemGoEnriched) \ .join(models.Chemical) \ .filter(models.Chemical.chemical_name == chemical_name) \ .order_by(models.ChemGoEnriched.highest_go_level.desc(), models.ChemGoEnriched.corrected_p_value) return self._limit_and_df(q, limit, as_df)
# TODO documentation of get_pathway_enriched__by__chemical_name
[docs] def get_pathway_enriched__by__chemical_name(self, chemical_name, limit=None, as_df=False): """ :param chemical_name: :param limit: :param as_df: :return: """ q = self.session.query(models.ChemPathwayEnriched) \ .join(models.Chemical) \ .filter(models.Chemical.chemical_name == chemical_name) \ .order_by(models.ChemPathwayEnriched.corrected_p_value) return self._limit_and_df(q, limit, as_df)
[docs] def get_therapeutic_chemical__by__disease_name(self, disease_name, limit=None, as_df=False): """ Get therapeutic chemical by disease name :param bool as_df: if set to True result returns as `pandas.DataFrame` :param int limit: maximum number of results :param str disease_name: disease name :return: therapeutic chemical :rtype: list[models.ChemicalDisease] """ q = self.session.query(models.ChemicalDisease) \ .join(models.Disease) \ .filter(models.Disease.disease_name == disease_name, models.ChemicalDisease.direct_evidence == 'therapeutic') return self._limit_and_df(q, limit, as_df)
# TODO documentation of get_marker_chemical__by__disease_name
[docs] def get_marker_chemical__by__disease_name(self, disease_name, limit=None, as_df=False): """ :param disease_name: :param limit: :param as_df: :return: """ q = self.session.query(models.ChemicalDisease) \ .join(models.Disease) \ .filter(models.Disease.disease_name == disease_name, models.ChemicalDisease.direct_evidence == 'marker/mechanism') return self._limit_and_df(q, limit, as_df)
# TODO documentation of get_chemical__by__disease
[docs] def get_chemical__by__disease(self, disease_name, limit=None, as_df=False): """ :param disease_name: :param limit: :param as_df: :return: """ q = self.session.query(models.ChemicalDisease) \ .join(models.Disease) \ .filter(models.Disease.disease_name == disease_name) \ .order_by(models.ChemicalDisease.inference_score.desc()) return self._limit_and_df(q, limit, as_df)
# TODO documentation of get_action
[docs] def get_action(self, limit=None, as_df=False): """ :param limit: :param as_df: :return: """ q = self.session.query(models.Action) return self._limit_and_df(q, limit, as_df)
# TODO documentation of get_exposure_event def get_exposure_event(self): raise NotImplemented