Skip to content

Commit

Permalink
Amending docker container further to account for Graql changes. Lots …
Browse files Browse the repository at this point in the history
…of graql related enhancements for the graql client and pattern matching logic
  • Loading branch information
neomatrix369 committed Feb 6, 2020
1 parent d410ff4 commit 6e322e2
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 25 deletions.
9 changes: 5 additions & 4 deletions examples/data/databases/graph/grakn/graql/English-to-Graql.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,29 @@ Since September 10th, which customers called the person with phone number +86 92

### Graql

match",
match
$customer isa person, has phone-number $phone-number;
$company isa company, has name "Telecom";
(customer: $customer, provider: $company) isa contract;
$target isa person, has phone-number "+86 921 547 9004";
(caller: $customer, callee: $target) isa call, has started-at $started-at;
$min-date == 2018-09-10T00:00:00; $started-at > $min-date;
get $phone-number;"
get $phone-number;

### English

Get me the customers of company “Telecom” who called the target person with phone number +86 921 547 9004 from September 14th onwards.

### Graql

match",
match
$customer isa person, has phone-number $phone-number;
$company isa company, has name "Telecom";
(customer: $customer, provider: $company) isa contract;
$target isa person, has phone-number "+86 921 547 9004";
(caller: $customer, callee: $target) isa call, has started-at $started-at;
$min-date == 2018-09-14T00:00:00; $started-at > $min-date;
get $phone-number;"
get $phone-number;

## Query 3

Expand Down Expand Up @@ -115,6 +115,7 @@ Who are the common contacts of customers with phone numbers +7 171 898 0853 and
## Query 6

### English

Who are the customers who 1) have all called each other and 2) have all called person with phone number +48 894 777 5173 at least once?

or
Expand Down
159 changes: 159 additions & 0 deletions examples/data/databases/graph/grakn/graql/grakn_console_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#
# Credits to GrakLabs for creating the original version
# Original version can be found at https://github.com/graknlabs/examples/tree/master/phone_calls/python/queries.py
#
# coding=utf-8
from grakn.client import GraknClient

def print_to_log(title, content):
print("~~~~~~~~~~~~~~~~~~\n")
print(title)
print(content)
print("~~~~~~~~~~~~~~~~~~\n")

def execute_user_query(user_input, transaction):
iterator = transaction.query(user_input[1])
answers = iterator.collect_concepts()
result = [answer.value() for answer in answers]

print_to_log(user_input[2], result)

return result

def random_error_decorator_messages():
from random import randint
messages = [
"Argh! We have an issue, but don't fret! It end ups up well",
"Oh no! Houton, we are not in Texas anymore!",
"Cow patter! Now this is not so cool. But I will get through fine!",
"Hallo, Hallo! Fawlty towers again!",
"Not again! I just seal the problems, Oh well it's just an exception!",
]

a_random_number = randint(0, len(messages))
return messages[a_random_number]

graql_queries = [
[ "SCHEMA",
"match $x sub thing; get;",
"The schema of the keyspace is as shown"
],
[
"CUSTOMERS_CALLED_SINCE",
"""
match
$customer isa person, has phone-number $phone-number;
$company isa company, has name "Telecom";
(customer: $customer, provider: $company) isa contract;
$target isa person, has phone-number "+86 921 547 9004";
(caller: $customer, callee: $target) isa call, has started-at $started-at;
$min-date == 2018-09-10T00:00:00; $started-at > $min-date;
get $phone-number;
""",
"These are numbers of the customers who called +86 921 547 9004 since 2018-09-10T00:00:00"
],
[
"OVER_50_PHONE_CALLS_CAMBRIDGE",
"""
match
$potential_caller isa person, has city "London", has age > 50;
$company isa company, has name "Telecom";
(customer: $potential_caller, provider: $company) isa contract;
$pattern-callee isa person, has age < 20;
(caller: $potential_caller, callee: $pattern-callee) isa call, has started-at $pattern-call-date;
$target isa person, has phone-number $phone-number;
not { (customer: $target, provider: $company) isa contract; };
(caller: $potential_caller, callee: $target) isa call, has started-at $target-call-date;
$target-call-date > $pattern-call-date;
get $phone-number
""",
"Here are the phone numbers of the people (London calls)"
],
[
"UNDER_20_PHONE_CALLS_LONDON",
"""
match
$potential_caller isa person, has city "Cambridge", has age > 50;
$company isa company, has name "Telecom";
(customer: $potential_caller, provider: $company) isa contract;
$pattern-callee isa person, has age < 20;
(caller: $potential_caller, callee: $pattern-callee) isa call, has started-at $pattern-call-date;
$target isa person, has phone-number $phone-number;
not { (customer: $target, provider: $company) isa contract; };
(caller: $potential_caller, callee: $target) isa call, has started-at $target-call-date;
$target-call-date > $pattern-call-date;
get $phone-number
""",
"Here are the phone numbers of the people (Cambridge calls)"
],
[
"COMMON_CUSTOMERS_MULTIPLE_NUMBERS",
"""
match
$common-contact isa person, has phone-number $phone-number;
$customer-a isa person, has phone-number "+7 171 898 0853";
$customer-b isa person, has phone-number "+370 351 224 5176";
(caller: $customer-a, callee: $common-contact) isa call;
(caller: $customer-b, callee: $common-contact) isa call;
get $phone-number;
""",
"Here are the numbers of the common customers"
],
[
"COMMON_CUSTOMERS_SINGLE_NUMBER",
"""
match
$target isa person, has phone-number "+48 894 777 5173";
$company isa company, has name "Telecom";
$customer-a isa person, has phone-number $phone-number-a;
(customer: $customer-a, provider: $company) isa contract;
(caller: $customer-a, callee: $target) isa call;
$customer-b isa person, has phone-number $phone-number-b;
(customer: $customer-b, provider: $company) isa contract;
(caller: $customer-b, callee: $target) isa call;
(caller: $customer-a, callee: $customer-b) isa call;
get $phone-number-a, $phone-number-b;
""",
"The customers who have called the single number are"
],
]

def process_user_input(user_input):
print("Let me think, will take a moment, please be patient...")
try:
execute_user_query(graql_queries[0], transaction)
except Exception as ex:
print("")
print(random_error_decorator_messages())
print("Execution halted, due to an error:")
print(ex)
print("")


if __name__ == "__main__":

'''
The code below:
- creates a Grakn client > session > transaction connected to the phone_calls keyspace
- runs a query based on the user's input
- closes the session
'''

keyspace_name = "phone_calls"
client = GraknClient(uri="localhost:48555")
session = client.session(keyspace=keyspace_name)
## create a transaction to talk to the Grakn server
transaction = session.transaction().read()
cache = {}

## get user's question selection
user_input = ""
while True:
print("")
print("Enter/paste your query in English or Graql, Ctrl-D or Ctrl-Z ( windows ) to save it! (Let the force be with us!)")
user_input = input()
user_input = user_input.replace("\t", " ")
if user_input.lower().strip() == "exit":
print("Hastla vista! See you soon!")
break
process_user_input(user_input)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from fuzzywuzzy import fuzz
import pandas as pd

schema_queries = {
schema_queries_in_english = {
'List the schema in this keyspace': [
'Show me the schema',
'List the schema',
Expand Down Expand Up @@ -60,9 +60,9 @@

print('Iterating through schema queries')
comparison_results = []
for each_query in schema_queries:
for each_query in schema_queries_in_english:
print(f'Question/command: {each_query}')
for each_similarity in schema_queries[each_query]:
for each_similarity in schema_queries_in_english[each_query]:
ratio = fuzz.ratio(each_query, each_similarity)
partial_ratio = fuzz.partial_ratio(each_query, each_similarity)
token_sort_ratio = fuzz.token_sort_ratio(each_query, each_similarity)
Expand Down
63 changes: 45 additions & 18 deletions examples/data/databases/graph/grakn/graql/pattern-matching.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
from fuzzywuzzy import fuzz
import pandas as pd
import sys
from pytictoc import TicToc

DEBUG_PERF=False
if DEBUG_PERF:
timer = TicToc()

def _tic(name=""):
if DEBUG_PERF:
print(f'--- {name}: ', end='')
timer.tic()

def _toc(name=""):
if DEBUG_PERF:
print(f'--- {name}: ', end='')
timer.toc()

### See https://en.wikipedia.org/wiki/Words_of_estimative_probability
words_of_probability_estimation = [
Expand All @@ -14,7 +29,7 @@
["Very unlikely", 0, 2] # Impossible 0%: Give or take 0%
]

schema_queries = {
schema_queries_in_english = {
'List the schema in this keyspace': [
'Show me the schema',
'List the schema',
Expand Down Expand Up @@ -59,55 +74,67 @@
],

'Who are the customers who 1) have all called each other and 2) have all called person with phone number +48 894 777 5173 at least once?': [
'Who are customers called other persons phone number least'
'Who are customers called other persons phone number least once',
'Who are customers one another phone',
"called atleast once"
],

'Get me the phone number of people who have received calls from both customer with phone number +7 171 898 0853 and customer with phone number +370 351 224 5176.': [
'Get phone number of people received calls from customer of certain age'
],

'How does the average call duration among customers aged under 20 compare those aged over 40?': [
'How average call duration among customers aged compare aged'
'How average call duration among customers aged compare aged',
"how long did the call last"
],
}

def get_potential_queries(query_asked):
potential_queries = []
for each_query in schema_queries:
ratio = fuzz.partial_ratio(each_query, query_asked)
potential_queries.append([each_query, ratio])
for similar_query in schema_queries[each_query]:
ratio = fuzz.partial_ratio(similar_query, query_asked)
potential_queries.append([similar_query, ratio])
return potential_queries

def get_confidence_in_words(value):
for each_slab in words_of_probability_estimation:
if (each_slab[1] <= value) and (value <= each_slab[2]):
if (value >= each_slab[1]) and (value <= each_slab[2]):
return each_slab[0]

def add_result_to(results, query, query_asked):
ratio = fuzz.partial_ratio(query, query_asked)
confidence = get_confidence_in_words(ratio)
results.append([query, ratio, confidence])
return results

def get_potential_queries(query_asked):
potential_queries = []
for schema_query in schema_queries_in_english:
potential_queries = add_result_to(potential_queries, schema_query, query_asked)
for similar_query in schema_queries_in_english[schema_query]:
potential_queries = add_result_to(potential_queries, similar_query, query_asked)
return potential_queries

def print_formatted_results(dataframe):
for index, row in dataframe.iterrows():
print(f"{row[0]} ({row[1]}%, {row[2]})")
print(f"{row['query_in_english']} (Confidence: {row['confidence']}, {row['ratio']}%)")

if (len(sys.argv) > 1):
print(f"Query: {sys.argv[1]}")
_tic("get_potential_queries()")
potential_queries = get_potential_queries(sys.argv[1])
results = pd.DataFrame(potential_queries, columns = ['each_query', 'ratio'])
results['ratio'] = results['ratio'].apply(int)
_toc("get_potential_queries()")

_tic("creating dataframe")
results = pd.DataFrame(potential_queries, columns = ['query_in_english', 'ratio', 'confidence'])
results = results.sort_values(by=['ratio'], ascending=False)
results['Confidence'] = results['ratio'].apply(get_confidence_in_words)
filter_greater_or_equal_to_70 = results['ratio'] > 70
results_with_70_or_more_accuracy = results[filter_greater_or_equal_to_70]
_toc("creating dataframe")
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', 3)
SHOW_COUNT = 5
_tic("filtering and printing dataframe")
if len(results_with_70_or_more_accuracy) == 0:
filter_between_40_and_70 = (results['ratio'] >= 40) & (results['ratio'] <= 70)
results_between_40_and_70 = results[filter_between_40_and_70]
print_formatted_results(results_between_40_and_70[:SHOW_COUNT])
else:
print_formatted_results(results_with_70_or_more_accuracy[:SHOW_COUNT])
_toc("filtering and printing dataframe")
else:
print("")
print("Usage:")
Expand Down
26 changes: 26 additions & 0 deletions examples/data/databases/graph/grakn/graql/run-python-in-docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

set -e
set -u
set -o pipefail

echo "Running python contain, mapping to current folder"
WORKDIR=/home/python

echo ""; echo "Run the below command once you are in the container"
echo " $ pip3 install setuptools ijson==2.3 grakn-client"; echo ""
echo ""
echo "Use python3 or pip3 to run any pythong or pip commands"

PREVIOUS_TO_PREVIOUS_DIR=$(cd ../.. && echo $(pwd))

set -x
docker run --rm \
-it \
--volume $(pwd):${WORKDIR} \
--workdir ${WORKDIR} \
--network="host" \
--entrypoint="/bin/bash" \
neomatrix369/grakn:1.6.2-GRAALVM-CE-19.2.1

set +x
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 6e322e2

Please sign in to comment.