forked from Uberspace/lab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspelling_tools.py
53 lines (40 loc) · 1.6 KB
/
spelling_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
from os import listdir
from os.path import isfile, join
# sorts the given dict by the name and removes duplicates.
# writes the result to sorted_dict.txt
def sort_dict():
with open('source/dict.txt', 'r') as f:
words = f.readlines()
sorted_words = sorted(words, key=lambda v: v.lower())
with open('source/sorted_dict.txt', 'w') as f:
last = ''
for word in sorted_words:
if last != word:
print(word[:-1], file=f)
last = word
# reads the result of the spell checking (make spelling), counts for every misspelled words its amount
# and returns a sorted list of word and amount to the shell and creates a new_words.txt just with the
# words found in the analysis.
def read_terms_from_errors():
new_words = {}
build_dir = 'build/spelling/'
total_words = 0
for f in os.listdir(build_dir):
if isfile(join(build_dir, f)):
with open(join(build_dir, f), 'r') as f:
lines = f.readlines()
for line in lines:
word = line.split("(")[1].split(")")[0]
total_words += 1
if word not in new_words:
new_words[word] = 1
else:
new_words[word] += 1
with open('new_words.txt', 'w') as f:
for word in sorted(new_words, key=lambda w: new_words[w], reverse=True):
print(word, file=f)
print(word, new_words[word])
print('\nFound %d unique words in a total of %d misspelled words' % (len(new_words), total_words))
read_terms_from_errors()
sort_dict()