# -*- coding: utf-8 -*- """ @author: Victor Barres Define knowledge representations for TCG. For now only implements semantic net. Uses NetworkX module to represent the semantic net. """ from __future__ import division import matplotlib.pyplot as plt import networkx as nx #################### ### SEMANTIC NET ### #################### class K_ENT(object): """ Knowledge entity Data: - id (INT): Unique identifier of the knowlege entity. - meaning: meaning associated with the knowledge entity. Note: - '==' has to be defined for meaning. """ ID_NEXT = 1 # Global knowledge entity counter def __init__(self, name='', meaning=''): self.id = K_ENT.ID_NEXT K_ENT.ID_NEXT += 1 self.name = name self.meaning = meaning def __eq__(self, other): is_equal = (isinstance(other, self.__class__) and (self.id == other.id) and (self.meaning == other.meaning)) return is_equal class K_REL(object): """ Knowledge relation. Only 2 place relations are allowed. Relations are defined as directed. Data: - type (STR): Type of relation. - pFrom (K_ENT): Source knowledge entity. - pTo (K_ENT): Target knowledge entity. """ def __init__(self, aType = 'undefined', from_ent = None, to_ent = None): self.type = aType self.pFrom = from_ent self.pTo= to_ent def __eq__(self, other): is_equal = (isinstance(other, self.__class__) and (self.type == other.type) and (self.pFrom == other.pFrom) and (self.pTo == other.pTo)) return is_equal def __str__(self): p = "%s %s %s" % (self.pFrom.name, self.type, self.pTo.name) return p class K_NET(object): """ Semantic network. Data: - nodes ([K_ENT]): List of knowledge entity. - edges ([K_REL]): List of knowledge relations. - graph (networkx.DiGraph): A NetworkX implementation of the semantic net. Each node has an additional attribute meaning = k_ent.meaning Each edge has an additional attribute type = k_rel.type """ def __init__(self, nodes=[], edges=[]): self.nodes = nodes[:] self.edges = edges[:] self.graph = None def clear(self): """ Clear all. """ self.nodes = [] self.edges = [] self._create_NX_graph() def add_ent(self, k_ent): """ Add a knowledge entity to the semantic network Args: k_ent (K_ENT): A knowlege entity """ # Check validity if(not(isinstance(k_ent, K_ENT)) or not(k_ent.meaning)): return False # Check duplication if self.find_meaning(k_ent.meaning): return False # Add new semantic entity self.nodes.append(k_ent) self._create_NX_graph() return True def add_relation(self, k_rel): """ Add a relation to the semantic network Args: k_rel (K_REL): A knowledge relation """ # Check validity if (not(isinstance(k_rel, K_REL)) or (k_rel.type == 'undefined') or not(k_rel.pFrom) or not(k_rel.pTo)): return False # Check duplication for r in self.edges: if r == k_rel: return False # Check that source and target of relation are defined. if not(self.find_meaning(k_rel.pFrom.meaning)) or not(self.find_meaning(k_rel.pTo.meaning)): return False # Add new relation self.edges.append(k_rel) self._create_NX_graph() return True def shortest_path(self, from_ent, to_ent, rel_types=['is_a']): """ Returns the length of the shortest path, if it exists, between from_cpt and to_cpt in the k_net graph. Only considers the edges of type belonging to rel_types. If no path exists, returns -1 Relies on NetworkX implementation of path length Args: - from_ent (K_ENT): Origin - to_ent (K_ENT): Target """ path_len = -1 graph = self.graph.copy() # Not efficient... #Only keep the relevant edges to_remove =[] for u,v,d in graph.edges_iter(data=True): if not(d['type'] in rel_types): to_remove.append((u,v)) graph.remove_edges_from(to_remove) try: path_len = nx.shortest_path_length(graph, source=from_ent.id, target=to_ent.id, weight=None) except nx.NetworkXNoPath: return path_len return path_len def find_meaning(self, meaning): """ Find k_ent with meaning "meaning". Returns the entity if found, else returns None. Args: - meaning (): Meaning of a knowledge entity. """ for n in self.nodes: if n.meaning == meaning: return n return None def satisfy_rel(self, ent1, rel_type, ent2): """ """ if ent1 and ent2: if not(self.graph.has_edge(ent1.id, ent2.id)): return [] else: edge_data = self.graph.get_edge_data(ent1.id, ent2.id) if not(rel_type): return [(ent1, edge_data['type'], ent2)] elif edge_data['type'] == rel_type: return [(ent1, rel_type, ent2)] else: return [] elif ent1 and not(ent2): successors = self.graph.successors(ent1.id) res = [] for s in successors: node_data = self.graph.node[s] ent2 = self.find_meaning(node_data['meaning']) res.extend(self.satisfy_rel(ent1, rel_type, ent2)) return res elif not(ent1) and ent2: predecessors = self.graph.predecessors(ent2.id) res = [] for p in predecessors: node_data = self.graph.node[p] ent1 = self.find_meaning(node_data['meaning']) res.extend(self.satisfy_rel(ent1, rel_type, ent2)) return res else: res = [] for ent1 in self.nodes: for ent2 in self.nodes: res.extend(self.satisfy_rel(ent1, rel_type, ent2)) return res def similarity(self, ent1, ent2): """ Returns a similarity score between ent1 and ent2. Uses path similarity. Args: - ent1 (K_ENT): - ent2 (K_ENT): Examples for is-a taxonomy (e.g. Wordnet): Path similarity: 1/(L+1), L=shortest path distance Leackock-Chodrow Similarity: -log(L/2*D) where L=shortest path length, D=taxonomy depth Wu-Palmer Similarlity: 2*depth(lcs)/(depth(s1) + depth(s2)), lcs = Least Common Subsumer. Resnik Similarity (Corpus dependent): IC(lcs) Lin Similarity (Corpus dependent): 2*IC(lcs)/(IC(s1) + IC(s2)) Jiang & Conrath Similarity: 1/jcn_distance, jcn_distance = IC(s1) + IC(s2) - 2 * IC(lcs). See: http://maraca.d.umn.edu/umls_similarity/similarity_measures.html Note: - ONLY PATH SIMILARITY IMPLEMENTED - Question: What does it mean how similar is DOG to ANIMAL? Using path lengths, DALMATIAN being an hyponym of DOG, is necessarily less similar to ANIMAL than DOG... """ L = self.shortest_path(ent1, ent2) if L == -1: # Case no path found sim = 0 else: sim = 1.0/(1.0 + L) return sim def match(self, ent1, ent2, match_type = "is_a"): """ Check if ent1 matches ent2. Type = "is_a": ent1 matches ent2 if ent2 is a hyponym of ent2 (or equal to ent2) Type = "equal": ent1 matches ent2 iff ent1 is equal to ent2. Args: - ent1 (K_ENT) - ent2 (K_ENT) - match_type (STR): "is_a" or "equal" Notes: - In the currrent version matching is boolean. No impact of distance on similarity. See similarity() """ dist = self.shortest_path(ent1, ent2) if (match_type == "is_a" and dist >= 0): return True elif (match_type == "equal" and dist == 0): return True return False def show(self): """ """ plt.figure() node_labels = dict((n, d) for n,d in self.graph.nodes(data=True)) pos = nx.spring_layout(self.graph) nx.draw_networkx(self.graph, pos=pos, with_labels= False, node_color='g') nx.draw_networkx_labels(self.graph, pos=pos, labels= node_labels) def _create_NX_graph(self): graph = nx.DiGraph() for node in self.nodes: graph.add_node(node.id, meaning=node.meaning) for edge in self.edges: graph.add_edge(edge.pFrom.id, edge.pTo.id, type= edge.type) self.graph = graph def _has_entity(self, ent_name): """ Returns entity iff there is a entity with name "name". Args: - entt_name (STR): """ for n in self.nodes: if n.name == ent_name: return n return None