forked from neomatrix369/awesome-ai-ml-dl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding the graql aspect of the Grakn example, with pattern matching
- Loading branch information
1 parent
8b39d55
commit d410ff4
Showing
7 changed files
with
509 additions
and
0 deletions.
There are no files selected for viewing
168 changes: 168 additions & 0 deletions
168
examples/data/databases/graph/grakn/graql/English-to-Graql.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
# English-to-Graql | ||
|
||
A list of English phrases or questions mapped to respective Graql queries. | ||
|
||
## Query 1 | ||
|
||
### English | ||
|
||
Can I see the schema? | ||
List the schema in this keyspace | ||
Show me the schema | ||
List the schema | ||
What is the schema here | ||
Schema? | ||
Schema please | ||
|
||
### Graql | ||
|
||
match $x sub thing; get; offset 0; limit 30 | ||
|
||
## Query 2 | ||
|
||
### English | ||
|
||
From 2018-09-10 onwards, which customers called the person with phone number +86 921 547 9004? | ||
|
||
or | ||
|
||
Since September 10th, which customers called the person with phone number +86 921 547 9004? | ||
|
||
### Graql | ||
|
||
match", | ||
$customer isa person, has phone-number $phone-number; | ||
$company isa company, has name "Telecom"; | ||
(customer: $customer, provider: $company) isa contract; | ||
$target isa person, has phone-number "+86 921 547 9004"; | ||
(caller: $customer, callee: $target) isa call, has started-at $started-at; | ||
$min-date == 2018-09-10T00:00:00; $started-at > $min-date; | ||
get $phone-number;" | ||
|
||
### English | ||
|
||
Get me the customers of company “Telecom” who called the target person with phone number +86 921 547 9004 from September 14th onwards. | ||
|
||
### Graql | ||
|
||
match", | ||
$customer isa person, has phone-number $phone-number; | ||
$company isa company, has name "Telecom"; | ||
(customer: $customer, provider: $company) isa contract; | ||
$target isa person, has phone-number "+86 921 547 9004"; | ||
(caller: $customer, callee: $target) isa call, has started-at $started-at; | ||
$min-date == 2018-09-14T00:00:00; $started-at > $min-date; | ||
get $phone-number;" | ||
|
||
## Query 3 | ||
|
||
### English | ||
|
||
Who are the people aged under 20 who have received at least one phone call from a Cambridge customer aged over 50? | ||
|
||
### Graql | ||
|
||
match | ||
$potential_caller isa person, has city "Cambridge", has age > 50; | ||
$company isa company, has name "Telecom"; | ||
(customer: $potential_caller, provider: $company) isa contract; | ||
$pattern-callee isa person, has age < 20; | ||
(caller: $potential_caller, callee: $pattern-callee) isa call, has started-at $pattern-call-date; | ||
$target isa person, has phone-number $phone-number; | ||
not { (customer: $target, provider: $company) isa contract; }; | ||
(caller: $potential_caller, callee: $target) isa call, has started-at $target-call-date; | ||
$target-call-date > $pattern-call-date; | ||
get $phone-number | ||
|
||
## Query 4 | ||
|
||
### English | ||
|
||
Who are the people who have received a call from a London customer aged over 50 who has previously called someone aged under 20? | ||
|
||
or | ||
|
||
Get me the phone number of people who have received a call from a customer aged over 50 after this customer (potential person) made a call to another customer aged under 20. | ||
|
||
match | ||
$potential_caller isa person, has city "London", has age > 50; | ||
$company isa company, has name "Telecom"; | ||
(customer: $potential_caller, provider: $company) isa contract; | ||
$pattern-callee isa person, has age < 20; | ||
(caller: $potential_caller, callee: $pattern-callee) isa call, has started-at $pattern-call-date; | ||
$target isa person, has phone-number $phone-number; | ||
not { (customer: $target, provider: $company) isa contract; }; | ||
(caller: $potential_caller, callee: $target) isa call, has started-at $target-call-date; | ||
$target-call-date > $pattern-call-date; | ||
get $phone-number | ||
|
||
## Query 5 | ||
|
||
### English | ||
|
||
Who are the common contacts of customers with phone numbers +7 171 898 0853 and +370 351 224 5176? | ||
|
||
### Graql | ||
|
||
match | ||
$common-contact isa person, has phone-number $phone-number; | ||
$customer-a isa person, has phone-number "+7 171 898 0853"; | ||
$customer-b isa person, has phone-number "+370 351 224 5176"; | ||
(caller: $customer-a, callee: $common-contact) isa call; | ||
(caller: $customer-b, callee: $common-contact) isa call; | ||
get $phone-number; | ||
|
||
## Query 6 | ||
|
||
### English | ||
Who are the customers who 1) have all called each other and 2) have all called person with phone number +48 894 777 5173 at least once? | ||
|
||
or | ||
|
||
Get me the phone number of people who have received calls from both customer with phone number +7 171 898 0853 and customer with phone number +370 351 224 5176. | ||
|
||
### Graql | ||
|
||
match | ||
$target isa person, has phone-number "+48 894 777 5173"; | ||
$company isa company, has name "Telecom"; | ||
$customer-a isa person, has phone-number $phone-number-a; | ||
(customer: $customer-a, provider: $company) isa contract; | ||
(caller: $customer-a, callee: $target) isa call; | ||
$customer-b isa person, has phone-number $phone-number-b; | ||
(customer: $customer-b, provider: $company) isa contract; | ||
(caller: $customer-b, callee: $target) isa call; | ||
(caller: $customer-a, callee: $customer-b) isa call; | ||
get $phone-number-a, $phone-number-b; | ||
|
||
## Query 7 | ||
|
||
### English | ||
|
||
How does the average call duration among customers aged under 20 compare those aged over 40? (not for graphing, only aggregate calls), splits into query a) and b). | ||
|
||
a) Get me the average call duration among customers who have a contract with company “Telecom” and are aged under 20. | ||
|
||
### Graql | ||
|
||
match | ||
$customer isa person, has age < 20; | ||
$company isa company, has name "Telecom"; | ||
(customer: $customer, provider: $company) isa contract; | ||
(caller: $customer, callee: $anyone) isa call, has duration $duration; | ||
get $duration; mean $duration; | ||
|
||
b) Get me the average call duration among customers who have a contract with company “Telecom” and are aged over 40. | ||
|
||
### Graql | ||
|
||
match | ||
$customer isa person, has age > 40; | ||
$company isa company, has name "Telecom"; | ||
(customer: $customer, provider: $company) isa contract; | ||
(caller: $customer, callee: $anyone) isa call, has duration $duration; | ||
get $duration; mean $duration; | ||
|
||
--- | ||
|
||
[back to README](../README.md) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Graql | ||
|
||
Speaking Graql! | ||
|
||
![Workbase front screen](workbase-front-screen.png) | ||
|
||
## Schema | ||
|
||
For simplicity let's pick the [Phone calls](https://github.com/graknlabs/examples/blob/master/schemas/phone-calls-schema.gql) schema. | ||
|
||
## Data | ||
|
||
Data for the [Phone calls](https://github.com/graknlabs/examples/blob/master/schemas/phone-calls-schema.gql) schema can be found **[here](https://github.com/graknlabs/examples/tree/master/datasets/phone-calls)**, both `xml` and `json` formats. | ||
|
||
## Getting started | ||
|
||
To get quickly aquainted with Grakn, Graql and Workbase, have a look at the [QuickStart guide](https://dev.grakn.ai/docs/general/quickstart) from [GraknLabs](https://grakn.ai). | ||
|
||
Have a look especially around [Graql](https://dev.grakn.ai/docs/schema/overview) and [Workbase](https://dev.grakn.ai/docs/workbase/overview). | ||
|
||
You may not need to do all the setup as the docker container provided (see [README](./README.md) should help get started with Grakn and Graql). See Workbase docs to see how to install and run it - this part can be trivial for many users (just download, extract and run the app). | ||
|
||
Also, see the [Examples overview](https://dev.grakn.ai/docs/examples/phone-calls-overview) resource. | ||
|
||
### High-level | ||
|
||
#### Potential Questions to ask about data | ||
|
||
- Since September 10th, which customers called the person X? | ||
- Who are the people who have received a call from a London customer aged over 50 who has previously called someone aged under 20? | ||
- Who are the common contacts of customers X and Y? | ||
- Who are the customers who | ||
- 1) have all called each other and | ||
- 2) have all called person X at least once? | ||
- How does the average call duration among customers aged under 20 compare with those aged over 40? | ||
|
||
#### Domain concepts | ||
|
||
- A **company** has a **name**, | ||
- and can be the **provider** of a **contract** to a **person**, | ||
- who then becomes a **customer** | ||
- A **person** has a | ||
- **first** and **last name**, | ||
- an **age**, | ||
- a **city** they live in, | ||
- and a **phone number** | ||
- A **person** who doesn’t have | ||
- a registered **contract** (not a **customer**) | ||
- has only a **phone number** | ||
- A **call**, | ||
- made from a **person** (**caller**) to another **person** (**callee**), | ||
- has a **duration** as well as | ||
- the **date** | ||
- and **time** when the **call** has been made | ||
|
||
#### Reorganising, classifying and naming the concepts | ||
|
||
**Relations** | ||
- call is of _type relation_ that has two role players | ||
- person who plays the role of a caller | ||
- and (another) person who plays the role of a callee | ||
- contract is also of _type relation_ that has two role players | ||
- company who plays the role of a provider | ||
- and person who plays the role of a customer | ||
|
||
**Entities** | ||
- company and person are of _type entity_ | ||
|
||
**Attributes** | ||
- first-name, last-name, phone-number, city, age, started-at and duration are of _type attribute_ | ||
|
||
![Schema relationship graph](schema-relationship-graph.png) | ||
|
||
## English-to-Graql | ||
|
||
See [English to Graql](./graql/English-to-Graql.md) | ||
|
||
## Graql-to-English | ||
|
||
See [Graql to English](./graql/Graql-to-English.md) | ||
|
||
### Graql query | ||
|
||
## Resources | ||
|
||
- [Workbase overview](https://dev.grakn.ai/docs/workbase/overview) | ||
- [Download Workbase](https://grakn.ai/download#workbase) | ||
- [Query pattern](https://dev.grakn.ai/docs/pattern/overview) | ||
- [Natural Language Processing for Fuzzy String Matching with Python](https://towardsdatascience.com/natural-language-processing-for-fuzzy-string-matching-with-python-6632b7824c49) | ||
- [Words of estimative probability](https://en.wikipedia.org/wiki/Words_of_estimative_probability) | ||
--- | ||
|
||
[back to README](../README.md) |
42 changes: 42 additions & 0 deletions
42
examples/data/databases/graph/grakn/graql/extract_keywords.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
print('Loading nltk libraries, please wait...') | ||
import nltk | ||
from nltk.corpus import stopwords | ||
from nltk.tokenize import word_tokenize | ||
print('Finished loading nltk libraries.') | ||
|
||
stop_words = set(stopwords.words('english')) | ||
|
||
schema_queries = { | ||
'List the schema in this keyspace', | ||
'From 2018-09-10 onwards, which customers called the person with phone number +86 921 547 9004?', | ||
'Since September 10th, which customers called the person with phone number +86 921 547 9004?', | ||
'Get me the customers of company “Telecom” who called the target person with phone number +86 921 547 9004 from September 14th onwards.', | ||
'Get me the customers of company “Telecom” who called the target person with phone number +86 921 547 9004 from September 10th onwards.', | ||
'Who are the people aged under 20 who have received at least one phone call from a Cambridge customer aged over 50?', | ||
'Who are the people who have received a call from a London customer aged over 50 who has previously called someone aged under 20?', | ||
'Get me the phone number of people who have received a call from a customer aged over 50 after this customer (potential person) made a call to another customer aged under 20.', | ||
'Who are the common contacts of customers with phone numbers +7 171 898 0853 and +370 351 224 5176?', | ||
'Who are the customers who 1) have all called each other and 2) have all called person with phone number +48 894 777 5173 at least once?', | ||
'Get me the phone number of people who have received calls from both customer with phone number +7 171 898 0853 and customer with phone number +370 351 224 5176.', | ||
'How does the average call duration among customers aged under 20 compare those aged over 40?', | ||
} | ||
|
||
punctuations = '''!()[]{};:'"\,<>./?@#$%^&*_~+“”'''; ### excluding - (hypen / dash) | ||
new_schema_queries=[] | ||
for each_query in schema_queries: | ||
new_query = each_query | ||
for each_char in each_query: | ||
if each_char in punctuations: | ||
new_query = new_query.replace(each_char, ""); | ||
new_schema_queries.append(new_query) | ||
|
||
schema_queries=new_schema_queries | ||
|
||
print(f'~~~ Tokenising schema queries (queries: {len(schema_queries)})') | ||
for each_query in schema_queries: | ||
query_tokens = word_tokenize(each_query) | ||
query_without_stop_words = [] | ||
for query_token in query_tokens: | ||
if (not query_token in stop_words) and (not query_token.isnumeric()): | ||
query_without_stop_words.append(query_token) | ||
print(f'{each_query}: {query_without_stop_words}') |
88 changes: 88 additions & 0 deletions
88
examples/data/databases/graph/grakn/graql/pattern-matching-analysis.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
from fuzzywuzzy import fuzz | ||
import pandas as pd | ||
|
||
schema_queries = { | ||
'List the schema in this keyspace': [ | ||
'Show me the schema', | ||
'List the schema', | ||
'List schema keyspace', | ||
'What is the schema here', | ||
'What is the schema', | ||
'What is the schema here?', | ||
'What is the schema?', | ||
'Schema?', | ||
'Schema please' | ||
], | ||
|
||
'From 2018-09-10 onwards, which customers called the person with phone number +86 921 547 9004?': [ | ||
'From a date onwards which customers called another person with phone number' | ||
], | ||
'Since September 10th, which customers called the person with phone number +86 921 547 9004?': [ | ||
'Since a date which customers called a person with phone number' | ||
], | ||
|
||
'Get me the customers of company “Telecom” who called the target person with phone number +86 921 547 9004 from September 14th onwards.': [ | ||
'Get customers of company Telecom who called target person with phone number from a date onwards' | ||
], | ||
|
||
'Get me the customers of company “Telecom” who called the target person with phone number +86 921 547 9004 from September 10th onwards.': [ | ||
'Get customers of company Telecom who called target person with phone number from a date onwards' | ||
], | ||
|
||
'Who are the people aged under 20 who have received at least one phone call from a Cambridge customer aged over 50?': [ | ||
'People aged under certain age received at least one phone call from a place customer from customer aged over certain age' | ||
], | ||
|
||
'Who are the people who have received a call from a London customer aged over 50 who has previously called someone aged under 20?': [ | ||
'Who people received call from customer of certain place aged over certain age also called by someone aged under certain age' | ||
], | ||
|
||
'Get me the phone number of people who have received a call from a customer aged over 50 after this customer (potential person) made a call to another customer aged under 20.': [ | ||
'Get phone number of people received calls from customer aged customer potential person who made calls to another customer aged under certain age' | ||
], | ||
|
||
'Who are the common contacts of customers with phone numbers +7 171 898 0853 and +370 351 224 5176?': [ | ||
'Who are common contacts of customers with certain phone numbers' | ||
], | ||
|
||
'Who are the customers who 1) have all called each other and 2) have all called person with phone number +48 894 777 5173 at least once?': [ | ||
'Who are customers called other persons phone number least' | ||
], | ||
|
||
'Get me the phone number of people who have received calls from both customer with phone number +7 171 898 0853 and customer with phone number +370 351 224 5176.': [ | ||
'Get phone number of people received calls from customer of certain age' | ||
], | ||
|
||
'How does the average call duration among customers aged under 20 compare those aged over 40?': [ | ||
'How average call duration among customers aged compare aged' | ||
], | ||
} | ||
|
||
print('Iterating through schema queries') | ||
comparison_results = [] | ||
for each_query in schema_queries: | ||
print(f'Question/command: {each_query}') | ||
for each_similarity in schema_queries[each_query]: | ||
ratio = fuzz.ratio(each_query, each_similarity) | ||
partial_ratio = fuzz.partial_ratio(each_query, each_similarity) | ||
token_sort_ratio = fuzz.token_sort_ratio(each_query, each_similarity) | ||
comparison_results.append([each_query, each_similarity, ratio, partial_ratio, token_sort_ratio]) | ||
|
||
print('Publishing results') | ||
results = pd.DataFrame(comparison_results, columns = ['each_query', 'each_similarity', 'ratio', 'partial_ratio', 'token_sort_ratio']) | ||
print(results) | ||
print() | ||
print(results.describe()) | ||
print() | ||
ratio_results = results.sort_values(by = 'ratio', ascending = False) | ||
print(ratio_results) | ||
print() | ||
transposed_results = ratio_results.drop('each_query', axis = 1).transpose() | ||
print(transposed_results) | ||
results_partial_ratio = ratio_results.sort_values(by = 'partial_ratio', ascending = False) | ||
transposed_results = results_partial_ratio.drop('each_query', axis = 1).transpose() | ||
print(transposed_results) | ||
print() | ||
token_sort_ratio = ratio_results.sort_values(by = 'token_sort_ratio', ascending = False) | ||
transposed_results = token_sort_ratio.drop('each_query', axis = 1).transpose() | ||
print(transposed_results) |
Oops, something went wrong.