diff --git a/naming_conventions.py b/naming_conventions.py new file mode 100644 index 0000000..bafeb95 --- /dev/null +++ b/naming_conventions.py @@ -0,0 +1,14 @@ + +class NamingConvention(): + @staticmethod + def full_id(id): + if NamingConvention.is_complex(id): + return "GO:0032991" # protein-containing complex + return "UniProtKB:" + id + + @staticmethod + def is_complex(id): + if id.startswith("SIGNOR-C"): + return True + else: + return False \ No newline at end of file diff --git a/pathway_connections.py b/pathway_connections.py new file mode 100644 index 0000000..1efd9f9 --- /dev/null +++ b/pathway_connections.py @@ -0,0 +1,248 @@ +import csv +from ontobio.vocabulary.relations import OboRO +from rdflib.term import URIRef +from prefixcommons.curie_util import expand_uri +from signor_complex import SignorComplexFactory +from naming_conventions import NamingConvention + +ro = OboRO() + +ENABLED_BY = URIRef(expand_uri(ro.enabled_by)) + +MECHANISM_GO_MAPPING = { + "acetylation" : "GO:0061733", + "binding" : "GO:0005515", + "chemical activation" : None, + "chemical inhibition" : None, + "cleavage" : "GO:0008233", + "deacetylation" : "GO:0033558", + "demethylation" : "GO:0032451", + "dephosphorylation" : "GO:0004721", + "destabilization" : "GO:0003674", + "desumoylation" : "GO:0070140", + "deubiquitination" : "GO:0004843", + "glycosylation" : "GO:0016757", + "GAP" : "GO:0005096", + "GEF" : "GO:0005085", + "hydroxylation" : "GO:0036140", + "lipidation" : "GO:0016747", + "methylation" : "GO:0008276", + "neddylation" : "GO:0061663", + "oxidation" : "GO:0003674", + "palmitoylation" : "GO:0016409", + "phosphorylation" : "GO:0004672", + "post transcriptional regulation" : "GO:0035925", + "relocalization" : "GO:0003674", + "small molecule catalysis" : None, + "stabilization" : "GO:0003674", + "sumoylation" : "GO:0061665", + "transcriptional activation" : "GO:0003700", + "transcriptional regulation" : "GO:0003700", + "transcriptional repression" : "GO:0003700", + "trimethylation (histone)" : "GO:0003674", + "tyrosination" : "GO:0004835", + "ubiquitination" : "GO:0061630" +} + +complex_csv_filename = "SIGNOR_complexes.csv" +COMPLEXES = SignorComplexFactory(complex_csv_filename).complexes + +class PathwayConnection(): + def __init__(self, id_a, id_b, mechanism, effect, direct, relation, pmid, linenum): + self.id_a = id_a + self.id_b = id_b + self.effect = effect + self.direct = direct + self.relation = relation + self.pmid = pmid + self.linenum = linenum + self.complex_a = None + try: + if NamingConvention.is_complex(self.id_a) and self.id_a in COMPLEXES: + self.complex_a = COMPLEXES[self.id_a] + except TypeError as err: + print(self.id_a) + raise err + self.complex_b = None + if NamingConvention.is_complex(self.id_b) and self.id_b in COMPLEXES: + self.complex_b = COMPLEXES[self.id_b] + + if self.direct == "NO": + mechanism_term = "GO:0003674" + elif self.direct == "YES": + mechanism_term = MECHANISM_GO_MAPPING[mechanism] + # self.mechanism["term"] = MECHANISM_GO_MAPPING["stabilization"] + self.mechanism = { "name" : mechanism, "uri" : None, "term" : mechanism_term } + self.regulated_activity = { "name" : None, "uri" : None, "term" : None } + + self.individuals = {} + + def print(self): + print("[UniProtKB:{ida}] <- enabled_by – [{mechanism}] – [{relation}]-> [{regulated_activity}] – enabled_by-> [UniProtKB:{idb}]".format(ida=self.id_a, + mechanism=self.mechanism["term"], + relation=self.relation, + regulated_activity=self.regulated_activity["term"], + idb=self.id_b)) + + def declare_entities(self, model): + # Classes + if self.full_id_a() not in model.classes: + model.declare_class(self.full_id_a()) + if self.full_id_b() not in model.classes: + model.declare_class(self.full_id_b()) + + # Individuals + # if self.full_id_a() not in model.individuals: + if self.full_id_a() not in self.individuals: + if self.a_is_complex(): + uri_a = self.complex_a.declare_entities(model) + else: + uri_a = model.declare_individual(self.full_id_a()) + self.individuals[self.full_id_a()] = uri_a + if self.full_id_b() not in self.individuals and self.regulated_activity["uri"] is None: + if self.b_is_complex(): + uri_b = self.complex_b.declare_entities(model) + else: + uri_b = model.declare_individual(self.full_id_b()) + self.individuals[self.full_id_b()] = uri_b + self.regulated_activity["uri"] = model.declare_individual(self.regulated_activity["term"]) + else: + for t in model.writer.writer.graph.triples((self.regulated_activity["uri"],ENABLED_BY,None)): + self.individuals[self.full_id_b()] = t[2] + + self.mechanism["uri"] = model.declare_individual(self.mechanism["term"]) + self.individuals[self.mechanism["term"]] = self.mechanism["uri"] + self.individuals[self.regulated_activity["term"]] = self.regulated_activity["uri"] + + + return model + + def full_id_a(self): + return NamingConvention.full_id(self.id_a) + def full_id_b(self): + return NamingConvention.full_id(self.id_b) + + def a_is_complex(self): + return NamingConvention.is_complex(self.id_a) + def b_is_complex(self): + return NamingConvention.is_complex(self.id_b) + + def clone(self): + new_connection = PathwayConnection(self.id_a, self.id_b, self.mechanism["name"], self.effect, self.direct, self.relation, self.pmid, self.linenum) + new_connection.mechanism = self.mechanism + return new_connection + + def equals(self, pathway_connection, check_ref=False): + if self.id_a == pathway_connection.id_a and self.id_b == pathway_connection.id_b and self.mechanism == pathway_connection.mechanism and self.relation == pathway_connection.relation and self.regulated_activity == pathway_connection.regulated_activity: + if check_ref: + if set(self.pmid) == set(pathway_connection.pmid): + return True + else: + return True + return False + + def full_statement_bnode_in_model(self, model): + # Find all existing URI's for IDA, IDB, mech, and reg. Check if statements exist for these URI combos. Might need SPARQL or further triple querying refinement (e.g. triple annotated with "owl:NamedIndividual") + # mechanism["term"] ENABLED_BY self.id_a + # regulated_activity["term"] ENABLED_BY self.id_b + # mechanism["term"] REGULATES regulated_activity["term"] + graph = model.writer.writer.graph + + a_enables_triples = [] + for id_a in model.uri_list_for_individual(self.full_id_a()): + for mech_uri in model.uri_list_for_individual(self.mechanism["term"]): + if (mech_uri, ENABLED_BY, id_a) in graph: + a_enables_triples.append((mech_uri, ENABLED_BY, id_a)) + + b_enables_triples = [] + for id_b in model.uri_list_for_individual(self.full_id_b()): + for reg_act in model.uri_list_for_individual(self.regulated_activity["term"]): + if (reg_act, ENABLED_BY, id_b) in graph: + b_enables_triples.append((reg_act, ENABLED_BY, id_b)) + + for a_triple in a_enables_triples: + for b_triple in b_enables_triples: + candidate_reg_triple = (a_triple[0], URIRef(expand_uri(self.relation)), b_triple[0]) + if candidate_reg_triple in graph: + return candidate_reg_triple + +class PathwayConnectionSet(): + def __init__(self, filename): + self.connections = [] + linenum = 0 + + with open(filename, "r") as f: + data = list(csv.DictReader(f, delimiter="\t")) + for line in data: + linenum += 1 + + # If up-regulates (including any variants of this), use RO:0002629 if DIRECT, and use RO:0002213 if not DIRECT + relation = None + if line["EFFECT"].startswith("up-regulates"): + if line["DIRECT"] == "YES": + relation = "RO:0002629" + elif line["DIRECT"] == "NO": + relation = "RO:0002213" + # If down-regulates (including any variants of this), use RO:0002630 if DIRECT, and use RO:0002212 if not DIRECT + if line["EFFECT"].startswith("down-regulates"): + if line["DIRECT"] == "YES": + relation = "RO:0002630" + elif line["DIRECT"] == "NO": + relation = "RO:0002212" + # If unknown, use RO:0002211 + if line["EFFECT"] == "unknown": + relation = "RO:0002211" + # If form_complex, ignore these lines for now + if line["EFFECT"] == "form_complex": + continue + + pc = PathwayConnection( + line["IDA"], + line["IDB"], + line["MECHANISM"], + line["EFFECT"], + line["DIRECT"], + relation, + [line["PMID"]], + linenum + ) + + # if not (pc.id_a.startswith("SIGNOR") or pc.id_b.startswith("SIGNOR") or line["TYPEA"] == "phenotype" or line["TYPEB"] == "phenotype"): + acceptable_types = ['protein','complex'] + if line["TYPEA"] in acceptable_types and line["TYPEB"] in acceptable_types: + self.append(pc) + + + def append(self, pathway_connection): + self.connections.append(pathway_connection) + + def append_reference(self, pathway_connection): + connection = self.find(pathway_connection) + connection.pmid = set(connection.pmid) | set(pathway_connection.pmid) + + def contains(self, pathway_connection, check_ref=False): + for connection in self.connections: + if connection.equals(pathway_connection, check_ref=check_ref): + return True + return False + + def find(self, pathway_connection, check_ref=False): + for connection in self.connections: + if connection.equals(pathway_connection, check_ref=check_ref): + return connection + + def find_by_id_a(self, id): + pcs = [] + for pc in self.connections: + if pc.id_a == id: + pcs.append(pc) + return pcs + + def find_other_regulated_activity(self, id_b): + regulated_pcs = self.find_by_id_a(id_b) + filtered_reg_pcs = [] + for pc in regulated_pcs: + if pc.mechanism["term"] != "GO:0003674": + filtered_reg_pcs.append(pc) + if len(filtered_reg_pcs) > 0: + return filtered_reg_pcs[0] \ No newline at end of file diff --git a/pathway_importer.py b/pathway_importer.py index a0054e2..40e6a8e 100644 --- a/pathway_importer.py +++ b/pathway_importer.py @@ -1,190 +1,17 @@ -import csv from gocamgen.gocamgen import GoCamModel -from signor_complex import SignorComplexFactory from ontobio.vocabulary.relations import OboRO from rdflib.term import URIRef from rdflib.namespace import Namespace, OWL from prefixcommons.curie_util import expand_uri +from pathway_connections import PathwayConnectionSet +import argparse ro = OboRO() - ENABLED_BY = URIRef(expand_uri(ro.enabled_by)) -MECHANISM_GO_MAPPING = { - "acetylation" : "GO:0061733", - "binding" : "GO:0005515", - "chemical activation" : None, - "chemical inhibition" : None, - "cleavage" : "GO:0008233", - "deacetylation" : "GO:0033558", - "demethylation" : "GO:0032451", - "dephosphorylation" : "GO:0004721", - "destabilization" : "GO:0003674", - "desumoylation" : "GO:0070140", - "deubiquitination" : "GO:0004843", - "glycosylation" : "GO:0016757", - "GAP" : "GO:0005096", - "GEF" : "GO:0005085", - "hydroxylation" : "GO:0036140", - "lipidation" : "GO:0016747", - "methylation" : "GO:0008276", - "neddylation" : "GO:0061663", - "oxidation" : "GO:0003674", - "palmitoylation" : "GO:0016409", - "phosphorylation" : "GO:0004672", - "post transcriptional regulation" : "GO:0035925", - "relocalization" : "GO:0003674", - "small molecule catalysis" : None, - "stabilization" : "GO:0003674", - "sumoylation" : "GO:0061665", - "transcriptional activation" : "GO:0003700", - "transcriptional regulation" : "GO:0003700", - "transcriptional repression" : "GO:0003700", - "trimethylation (histone)" : "GO:0003674", - "tyrosination" : "GO:0004835", - "ubiquitination" : "GO:0061630" -} - -class PathwayConnection(): - def __init__(self, id_a, id_b, mechanism, effect, direct, relation, pmid, linenum): - self.id_a = id_a - self.id_b = id_b - self.effect = effect - self.direct = direct - self.relation = relation - self.pmid = pmid - self.linenum = linenum - - if self.direct == "NO": - mechanism_term = "GO:0003674" - elif self.direct == "YES": - mechanism_term = MECHANISM_GO_MAPPING[mechanism] - # self.mechanism["term"] = MECHANISM_GO_MAPPING["stabilization"] - self.mechanism = { "name" : mechanism, "uri" : None, "term" : mechanism_term } - self.regulated_activity = { "name" : None, "uri" : None, "term" : None } - - self.individuals = {} - - def print(self): - print("[UniProtKB:{ida}] <- enabled_by – [{mechanism}] – [{relation}]-> [{regulated_activity}] – enabled_by-> [UniProtKB:{idb}]".format(ida=self.id_a, - mechanism=self.mechanism["term"], - relation=self.relation, - regulated_activity=self.regulated_activity["term"], - idb=self.id_b)) - - def declare_entities(self, model): - # Classes - if self.full_id_a() not in model.classes: - model.declare_class(self.full_id_a()) - if self.full_id_b() not in model.classes: - model.declare_class(self.full_id_b()) - - # Individuals - # if self.full_id_a() not in model.individuals: - if self.full_id_a() not in self.individuals: - uri_a = model.declare_individual(self.full_id_a()) - # model.individuals[self.full_id_a()] = uri_a - self.individuals[self.full_id_a()] = uri_a - if self.full_id_b() not in self.individuals and self.regulated_activity["uri"] is None: - uri_b = model.declare_individual(self.full_id_b()) - # model.individuals[self.full_id_b()] = uri_b - self.individuals[self.full_id_b()] = uri_b - self.regulated_activity["uri"] = model.declare_individual(regulated_activity_term) - else: - for t in model.writer.writer.graph.triples((self.regulated_activity["uri"],ENABLED_BY,None)): - self.individuals[self.full_id_b()] = t[2] - - self.mechanism["uri"] = model.declare_individual(self.mechanism["term"]) - self.individuals[self.mechanism["term"]] = self.mechanism["uri"] - self.individuals[self.regulated_activity["term"]] = self.regulated_activity["uri"] - - - return model - - def full_id_a(self): - return self.full_id(self.id_a) - def full_id_b(self): - return self.full_id(self.id_b) - - def full_id(self, id): - return "UniProtKB:" + id - - def clone(self): - new_connection = PathwayConnection(self.id_a, self.id_b, self.mechanism["name"], self.effect, self.direct, self.relation, self.pmid, self.linenum) - new_connection.mechanism = self.mechanism - return new_connection - - def equals(self, pathway_connection, check_ref=False): - if self.id_a == pathway_connection.id_a and self.id_b == pathway_connection.id_b and self.mechanism == pathway_connection.mechanism and self.relation == pathway_connection.relation and self.regulated_activity == pathway_connection.regulated_activity: - if check_ref: - if set(self.pmid) == set(pathway_connection.pmid): - return True - else: - return True - return False - - def full_statement_bnode_in_model(self, model): - # Find all existing URI's for IDA, IDB, mech, and reg. Check if statements exist for these URI combos. Might need SPARQL or further triple querying refinement (e.g. triple annotated with "owl:NamedIndividual") - # mechanism["term"] ENABLED_BY self.id_a - # regulated_activity["term"] ENABLED_BY self.id_b - # mechanism["term"] REGULATES regulated_activity["term"] - graph = model.writer.writer.graph - - a_enables_triples = [] - for id_a in model.uri_list_for_individual(self.full_id_a()): - for mech_uri in model.uri_list_for_individual(self.mechanism["term"]): - if (mech_uri, ENABLED_BY, id_a) in graph: - a_enables_triples.append((mech_uri, ENABLED_BY, id_a)) - - b_enables_triples = [] - for id_b in model.uri_list_for_individual(self.full_id_b()): - for reg_act in model.uri_list_for_individual(self.regulated_activity["term"]): - if (reg_act, ENABLED_BY, id_b) in graph: - b_enables_triples.append((reg_act, ENABLED_BY, id_b)) - - for a_triple in a_enables_triples: - for b_triple in b_enables_triples: - candidate_reg_triple = (a_triple[0], URIRef(expand_uri(self.relation)), b_triple[0]) - if candidate_reg_triple in graph: - return candidate_reg_triple - -class PathwayConnectionSet(): - def __init__(self): - self.connections = [] - - def append(self, pathway_connection): - self.connections.append(pathway_connection) - - def append_reference(self, pathway_connection): - connection = self.find(pathway_connection) - connection.pmid = set(connection.pmid) | set(pathway_connection.pmid) - - def contains(self, pathway_connection, check_ref=False): - for connection in self.connections: - if connection.equals(pathway_connection, check_ref=check_ref): - return True - return False - - def find(self, pathway_connection, check_ref=False): - for connection in self.connections: - if connection.equals(pathway_connection, check_ref=check_ref): - return connection - - def find_other_regulated_activity(self, id_b): - regulated_pcs = find_by_id_a(self.connections, id_b) - filtered_reg_pcs = [] - for pc in regulated_pcs: - if pc.mechanism["term"] != "GO:0003674": - filtered_reg_pcs.append(pc) - if len(filtered_reg_pcs) > 0: - return filtered_reg_pcs[0] - -def find_by_id_a(pc_list, id): - pcs = [] - for pc in pc_list: - if pc.id_a == id: - pcs.append(pc) - return pcs +parser = argparse.ArgumentParser() +parser.add_argument('-f', "--filename", type=str, required=True, + help="Input filename of Signor pathway data") def model_contains_statement(model, subject_uri, rel, object_id): for uri in model.uri_list_for_individual(object_id): @@ -216,130 +43,95 @@ def test_label_finding(model): axiom_counter += 1 -model = GoCamModel("test.ttl") -p_connections = PathwayConnectionSet() -linenum = 1 -complex_csv_filename = "SIGNOR_complexes.csv" -complexes = SignorComplexFactory(complex_csv_filename).complexes - -with open("SIGNOR-G2-M_trans_02_03_18.tsv", "r") as f: - data = list(csv.DictReader(f, delimiter="\t")) - - for line in data: - linenum += 1 - - # If up-regulates (including any variants of this), use RO:0002629 if DIRECT, and use RO:0002213 if not DIRECT - relation = None - if line["EFFECT"].startswith("up-regulates"): - if line["DIRECT"] == "YES": - relation = "RO:0002629" - elif line["DIRECT"] == "NO": - relation = "RO:0002213" - # If down-regulates (including any variants of this), use RO:0002630 if DIRECT, and use RO:0002212 if not DIRECT - if line["EFFECT"].startswith("down-regulates"): - if line["DIRECT"] == "YES": - relation = "RO:0002630" - elif line["DIRECT"] == "NO": - relation = "RO:0002212" - # If unknown, use RO:0002211 - if line["EFFECT"] == "unknown": - relation = "RO:0002211" - # If form_complex, ignore these lines for now - if line["EFFECT"] == "form_complex": +def main(): + args = parser.parse_args() + + model = GoCamModel("superfamily_test.ttl") + # p_connections = PathwayConnectionSet("SIGNOR-G2-M_trans_02_03_18.tsv") + p_connections = PathwayConnectionSet(args.filename) + linenum = 1 + # complex_csv_filename = "SIGNOR_complexes.csv" + # complexes = SignorComplexFactory(complex_csv_filename).complexes + + total_pcs = len(p_connections.connections) + print(total_pcs) + skipped_count = 0 + + # fill in regulated activities + for pc in p_connections.connections: + # if pc.id_a.startswith("SIGNOR") or pc.id_b.startswith("SIGNOR"): + # # for now to see how model first looks - skip complexes + # continue + regulated_activity_pc = p_connections.find_other_regulated_activity(pc.id_b) # find_by_id_a(p_connections.connections, pc.id_b) - regulated_activity_pc.mechanism["term"] + if regulated_activity_pc is not None: + regulated_activity_term = regulated_activity_pc.mechanism["term"] + # regulated_activity_term_uri = regulated_activity_pc.individuals[regulated_activity_pc.mechanism["term"]] + regulated_activity_term_uri = regulated_activity_pc.mechanism["uri"] + else: + regulated_activity_term = "GO:0003674" + regulated_activity_term_uri = None + connection_clone = pc.clone() + connection_clone.regulated_activity["term"] = regulated_activity_term + if connection_clone.regulated_activity["term"] == None or p_connections.contains(connection_clone, check_ref=True): + skipped_count += 1 continue - - pc = PathwayConnection( - line["IDA"], - line["IDB"], - line["MECHANISM"], - line["EFFECT"], - line["DIRECT"], - relation, - [line["PMID"]], - linenum - ) - - if not (pc.id_a.startswith("SIGNOR") or pc.id_b.startswith("SIGNOR")): - p_connections.append(pc) - -total_pcs = len(p_connections.connections) -print(total_pcs) -skipped_count = 0 - -# fill in regulated activities -for pc in p_connections.connections: - if pc.id_a.startswith("SIGNOR") or pc.id_b.startswith("SIGNOR"): - # for now to see how model first looks - skip complexes - continue - regulated_activity_pc = p_connections.find_other_regulated_activity(pc.id_b) # find_by_id_a(p_connections.connections, pc.id_b) - regulated_activity_pc.mechanism["term"] - if regulated_activity_pc is not None: - regulated_activity_term = regulated_activity_pc.mechanism["term"] - # regulated_activity_term_uri = regulated_activity_pc.individuals[regulated_activity_pc.mechanism["term"]] - regulated_activity_term_uri = regulated_activity_pc.mechanism["uri"] - else: - regulated_activity_term = "GO:0003674" - regulated_activity_term_uri = None - connection_clone = pc.clone() - connection_clone.regulated_activity["term"] = regulated_activity_term - if connection_clone.regulated_activity["term"] == None or p_connections.contains(connection_clone, check_ref=True): - skipped_count += 1 - continue - else: - pc.regulated_activity["term"] = regulated_activity_term - pc.regulated_activity["uri"] = regulated_activity_term_uri - # pc.individuals[pc.regulated_activity["term"]] = regulated_activity_term_uri - - # model = pc.declare_entities(model) - - # enabled_by_stmt_a = model.writer.emit(model.individuals[pc.mechanism_go_term], ENABLED_BY, model.individuals[pc.full_id_a()]) - # if pc.mechanism["term"] in pc.individuals and not model_contains_statement(model, pc.individuals[pc.mechanism["term"]], ENABLED_BY, pc.full_id_a()): - full_statement = pc.full_statement_bnode_in_model(model) - # if pc.mechanism["term"] in pc.individuals and full_statement is None: - if full_statement is None: - print("Hey " + pc.full_id_a()) - # if pc.id_a == "Q13315" and pc.id_b == "P38398": - # print("Dang " + pc.pmid[0]) - model = pc.declare_entities(model) - - enabled_by_stmt_a_triple = (pc.mechanism["uri"], ENABLED_BY, pc.individuals[pc.full_id_a()]) - if enabled_by_stmt_a_triple in model.writer.writer.graph: - enabled_by_stmt_a = next(model.writer.writer.graph.triples(enabled_by_stmt_a_triple)) else: - enabled_by_stmt_a = model.writer.emit(enabled_by_stmt_a_triple[0], enabled_by_stmt_a_triple[1], enabled_by_stmt_a_triple[2]) - axiom_a = model.add_axiom(enabled_by_stmt_a) - enabled_by_stmt_b_triple = (pc.regulated_activity["uri"], ENABLED_BY, pc.individuals[pc.full_id_b()]) - if enabled_by_stmt_b_triple in model.writer.writer.graph: - enabled_by_stmt_b = next(model.writer.writer.graph.triples(enabled_by_stmt_b_triple)) + pc.regulated_activity["term"] = regulated_activity_term + pc.regulated_activity["uri"] = regulated_activity_term_uri + # pc.individuals[pc.regulated_activity["term"]] = regulated_activity_term_uri + + # model = pc.declare_entities(model) + + # enabled_by_stmt_a = model.writer.emit(model.individuals[pc.mechanism_go_term], ENABLED_BY, model.individuals[pc.full_id_a()]) + # if pc.mechanism["term"] in pc.individuals and not model_contains_statement(model, pc.individuals[pc.mechanism["term"]], ENABLED_BY, pc.full_id_a()): + full_statement = pc.full_statement_bnode_in_model(model) + # if pc.mechanism["term"] in pc.individuals and full_statement is None: + if full_statement is None: + print("Hey " + pc.full_id_a()) + # if pc.id_a == "Q13315" and pc.id_b == "P38398": + # print("Dang " + pc.pmid[0]) + model = pc.declare_entities(model) + + enabled_by_stmt_a_triple = (pc.mechanism["uri"], ENABLED_BY, pc.individuals[pc.full_id_a()]) + if enabled_by_stmt_a_triple in model.writer.writer.graph: + enabled_by_stmt_a = next(model.writer.writer.graph.triples(enabled_by_stmt_a_triple)) + else: + enabled_by_stmt_a = model.writer.emit(enabled_by_stmt_a_triple[0], enabled_by_stmt_a_triple[1], enabled_by_stmt_a_triple[2]) + axiom_a = model.add_axiom(enabled_by_stmt_a) + enabled_by_stmt_b_triple = (pc.regulated_activity["uri"], ENABLED_BY, pc.individuals[pc.full_id_b()]) + if enabled_by_stmt_b_triple in model.writer.writer.graph: + enabled_by_stmt_b = next(model.writer.writer.graph.triples(enabled_by_stmt_b_triple)) + else: + enabled_by_stmt_b = model.writer.emit(enabled_by_stmt_b_triple[0], enabled_by_stmt_b_triple[1], enabled_by_stmt_b_triple[2]) + axiom_b = model.add_axiom(enabled_by_stmt_b) + + # Connect the two activities + # source_id = model.individuals[pc.mechanism_go_term] + try: + source_id = pc.mechanism["uri"] + except KeyError as err: + pc.print() + print(pc.individuals) + raise err + property_id = URIRef(expand_uri(pc.relation)) + # target_id = model.individuals[pc.regulated_activity_term] + target_id = pc.regulated_activity["uri"] + if not model_contains_statement(model, source_id, property_id, pc.regulated_activity["term"]): + # Annotate source MF GO term NamedIndividual with relation code-target MF term URI + model.writer.emit(source_id, property_id, target_id) + # Add axiom (Source=MF term URI, Property=relation code, Target=MF term URI) + relation_axiom = model.writer.emit_axiom(source_id, property_id, target_id) + model.add_evidence(relation_axiom, "EXP", ["PMID:" + pmid for pmid in pc.pmid]) else: - enabled_by_stmt_b = model.writer.emit(enabled_by_stmt_b_triple[0], enabled_by_stmt_b_triple[1], enabled_by_stmt_b_triple[2]) - axiom_b = model.add_axiom(enabled_by_stmt_b) - - # Connect the two activities - # source_id = model.individuals[pc.mechanism_go_term] - try: - source_id = pc.mechanism["uri"] - except KeyError as err: - pc.print() - print(pc.individuals) - raise err - property_id = URIRef(expand_uri(pc.relation)) - # target_id = model.individuals[pc.regulated_activity_term] - target_id = pc.regulated_activity["uri"] - if not model_contains_statement(model, source_id, property_id, pc.regulated_activity["term"]): - # Annotate source MF GO term NamedIndividual with relation code-target MF term URI - model.writer.emit(source_id, property_id, target_id) - # Add axiom (Source=MF term URI, Property=relation code, Target=MF term URI) - relation_axiom = model.writer.emit_axiom(source_id, property_id, target_id) - model.add_evidence(relation_axiom, "EXP", ["PMID:" + pmid for pmid in pc.pmid]) - else: - print("2") + print("2") - # pc.print() + # pc.print() -with open(model.filepath, 'wb') as f: - model.writer.writer.serialize(destination=f) + with open(model.filepath, 'wb') as f: + model.writer.writer.serialize(destination=f) -print(skipped_count) + print(skipped_count) if __name__ == '__main__': + main() print("hey") \ No newline at end of file diff --git a/signor_complex.py b/signor_complex.py index e19ed7a..6f9da50 100644 --- a/signor_complex.py +++ b/signor_complex.py @@ -1,16 +1,48 @@ import csv +from naming_conventions import NamingConvention +from rdflib.term import URIRef, Literal +from rdflib.namespace import RDFS +from prefixcommons.curie_util import expand_uri complexes = [] -class SignorComplex(): +# class SignorComplex(): +class SignorGrouping(): def __init__(self, signor_id, name, entities): self.id = signor_id self.name = name self.entities = entities -class SignorComplexFactory(): +class SignorComplex(SignorGrouping): + def declare_entities(self, model): + uri = model.declare_individual("GO:0032991") + model.writer.writer.graph.add((uri, RDFS.label, Literal(str(self.name)))) + for entity in self.entities: + entity_full_id = NamingConvention.full_id(entity) + entity_uri = model.declare_individual(entity_full_id) + part_of_stmt = model.writer.emit(uri, URIRef(expand_uri("BFO:0000051")), entity_uri) + model.add_axiom(part_of_stmt) + "uri BFO:0000051 entity_uri" + return uri + +class SignorProteinFamily(SignorGrouping): + def declare_entities(self, model): + uri = model.declare_individual("GO:0032991") + model.writer.writer.graph.add((uri, RDFS.label, Literal(str(self.name)))) + for entity in self.entities: + entity_full_id = NamingConvention.full_id(entity) + entity_uri = model.declare_individual(entity_full_id) + part_of_stmt = model.writer.emit(uri, URIRef(expand_uri("BFO:0000051")), entity_uri) + model.add_axiom(part_of_stmt) + "uri BFO:0000051 entity_uri" + return uri + +class SignorGroupingFactory(): + NAME_FIELD = None + GROUPING_CLASS = None + def __init__(self, filename): - self.complexes = [] + self.grouping = {} with open(filename, "r") as f: data = list(csv.DictReader(f, delimiter=";")) @@ -20,7 +52,43 @@ def __init__(self, filename): for entity in line['LIST OF ENTITIES'].split(", "): entities.append(entity.strip()) - # print(line) - sig_complex = SignorComplex(line['SIGNOR ID'], line['COMPLEX NAME'], entities) - complexes.append(sig_complex) \ No newline at end of file + args = {"signor_id" : line['SIGNOR ID'], "name" : line[self.NAME_FIELD], "entities" : entities} + sig_grouping = eval(self.GROUPING_CLASS)(**args) + self.grouping[sig_grouping.id] = sig_grouping + +class SignorComplexFactory(SignorGroupingFactory): + def __init__(self, filename): + self.NAME_FIELD = "COMPLEX NAME" + self.GROUPING_CLASS = "SignorComplex" + SignorGroupingFactory.__init__(self, filename) + self.complexes = self.grouping + +class SignorProteinFamilyFactory(SignorGroupingFactory): + def __init__(self, filename): + self.NAME_FIELD = "PROT. FAMILY NAME" + self.GROUPING_CLASS = "SignorProteinFamily" + SignorGroupingFactory.__init__(self, filename) + self.families = self.grouping + +def main(): + complex_list = SignorComplexFactory("SIGNOR_complexes.csv") + complex = complex_list.complexes["SIGNOR-C87"] + complex_cc = "GO:0032991" + # create individual for go term + for entity in complex.entities: + relation = "complex_cc has_part " + entity + # create individual for entity + # state relation + print(relation) + pathway_line_mech_or_reg_activity = "pc.whatevs" + # state "pathway_line_mech_or_reg_activity ENABLED_BY complex_cc" + + pf_list = SignorProteinFamilyFactory("SIGNOR_PF.csv") + pf = pf_list.families["SIGNOR-PF11"] + print("PF list for " + pf.name + ":") + for entity in pf.entities: + print(entity) + +if __name__ == "__main__": + main() \ No newline at end of file