Skip to content

Commit

Permalink
NLP Profiler: add a new summarise column for the 3 high-level features
Browse files Browse the repository at this point in the history
  • Loading branch information
neomatrix369 committed Jul 15, 2020
1 parent a34f9c8 commit 04e5793
Showing 1 changed file with 50 additions and 8 deletions.
58 changes: 50 additions & 8 deletions examples/better-nlp/library/org/neomatrix369/nlp_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,16 @@ def apply_text_profiling(dataframe, text_column, params={}):
if high_level_analysis:
new_dataframe['sentiment_polarity_score'] = new_dataframe[text_column].apply(sentiment_polarity_score)
new_dataframe['sentiment_polarity'] = new_dataframe['sentiment_polarity_score'].apply(sentiment_polarity)
new_dataframe['sentiment_polarity_summarised'] = new_dataframe['sentiment_polarity'].apply(sentiment_polarity_summarised)

new_dataframe['sentiment_subjectivity_score'] = new_dataframe[text_column].apply(sentiment_subjectivity_score)
new_dataframe['sentiment_subjectivity'] = new_dataframe['sentiment_subjectivity_score'].apply(sentiment_subjectivity)
new_dataframe['spellcheck_score'] = new_dataframe[text_column].apply(spellcheck_score)
new_dataframe['spelling_quality'] = new_dataframe['spellcheck_score'].apply(spelling_quality)
new_dataframe['sentiment_subjectivity_summarised'] = new_dataframe['sentiment_subjectivity'].apply(sentiment_subjectivity_summarised)

new_dataframe['spelling_quality_score'] = new_dataframe[text_column].apply(spelling_quality_score)
new_dataframe['spelling_quality'] = new_dataframe['spelling_quality_score'].apply(spelling_quality)
new_dataframe['spelling_quality_summarised'] = new_dataframe['spelling_quality'].apply(spelling_quality_summarised)

if do_grammar_check:
new_dataframe['grammar_check_score'] = new_dataframe[text_column].apply(grammar_check_score)
new_dataframe['grammar_check'] = new_dataframe['grammar_check_score'].apply(grammar_quality)
Expand All @@ -88,7 +94,18 @@ def apply_text_profiling(dataframe, text_column, params={}):
return new_dataframe

### Sentiment analysis

def sentiment_polarity_summarised(sentiment_polarity):
if 'negative' in sentiment_polarity.lower():
return 'Negative'
if 'positive' in sentiment_polarity.lower():
return 'Positive'

return sentiment_polarity


# Docs: https://textblob.readthedocs.io/en/dev/quickstart.html
### See https://en.wikipedia.org/wiki/Words_of_estimative_probability
sentiment_polarity_words_of_probability_estimation = [
["Very positive", 99, 100], # Certain: 100%: Give or take 0%
### The General Area of Possibility
Expand Down Expand Up @@ -118,8 +135,25 @@ def sentiment_polarity_score(text):

return TextBlob(text).sentiment.polarity

### See https://en.wikipedia.org/wiki/Words_of_estimative_probability


def sentiment_polarity_summarised(sentiment_polarity):
if 'negative' in sentiment_polarity.lower():
return 'Negative'
if 'positive' in sentiment_polarity.lower():
return 'Positive'

return sentiment_polarity

### Sentiment Subjectivity

def sentiment_subjectivity_summarised(sentiment_subjectivity):
if 'subjective' in sentiment_subjectivity.lower():
return 'Subjective'
if 'objective' in sentiment_subjectivity.lower():
return 'Objective'

return sentiment_subjectivity

subjectivity_words_of_probability_estimation = [
["Very subjective", 99, 100], # Certain: 100%: Give or take 0%
### The General Area of Possibility
Expand Down Expand Up @@ -149,7 +183,7 @@ def sentiment_subjectivity_score(text):

### Spell check

spellcheck_words_of_probability_estimation = [
spelling_quality_words_of_probability_estimation = [
["Very good", 99, 100], # Certain: 100%: Give or take 0%
### The General Area of Possibility
["Quite good", 87, 99], # Almost Certain: 93%: Give or take 6%
Expand All @@ -160,7 +194,15 @@ def sentiment_subjectivity_score(text):
["Very bad", 0, 2] # Impossible 0%: Give or take 0%
]

def spellcheck_score(text):
def spelling_quality_summarised(spelling_quality):
if 'good' in spelling_quality.lower():
return 'Good'
if 'bad' in spelling_quality.lower():
return 'Bad'

return spelling_quality

def spelling_quality_score(text):
if len(text.strip()) == 0:
return NOT_APPLICABLE

Expand All @@ -182,7 +224,7 @@ def spelling_quality(score):
return NOT_APPLICABLE

score = float(score) * 100
for each_slab in spellcheck_words_of_probability_estimation:
for each_slab in spelling_quality_words_of_probability_estimation:
if (score >= each_slab[1]) and (score <= each_slab[2]):
return each_slab[0]

Expand Down Expand Up @@ -307,4 +349,4 @@ def count_characters_excluding_spaces(text):
return len(text) - count_spaces(text)

def count_sentences(text):
return len(gather_sentences(text))
return len(gather_sentences(text))

0 comments on commit 04e5793

Please sign in to comment.