Skip to content

Commit

Permalink
Added spell checker
Browse files Browse the repository at this point in the history
  • Loading branch information
DeevsDeevs committed Oct 23, 2022
1 parent 352f55f commit f47f954
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 4 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ streamlit-app/rec_data.parquet
.DS_Store
streamlit-app/LABSE-5307-epoch-5-lem.feather
streamlit-app/BertCLS_epoch_5-lem.pth
streamlit-app/model_ru_en.bin
streamlit-app/model_ru_en.bin.spell
19 changes: 15 additions & 4 deletions streamlit-app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pymorphy2
import nltk
import string
import jamspell

nltk.download('punkt')

Expand Down Expand Up @@ -55,7 +56,10 @@ def load_all():
punctuation = set(string.punctuation)
morph = pymorphy2.MorphAnalyzer()

return data, names_counts, tokenizer, bert_cls, embeddings, index, kpgz_dict, rec_dict, punctuation, morph
jsp = jamspell.TSpellCorrector()
jsp.LoadLangModel('model_ru_en.bin')

return data, names_counts, tokenizer, bert_cls, embeddings, index, kpgz_dict, rec_dict, punctuation, morph, jsp

def get_fig_price(series: pd.Series):
try:
Expand Down Expand Up @@ -93,7 +97,7 @@ def main():
st.markdown("""
# Tender Search Engine
""")
data, names_counts, tokenizer, bert_cls, embeddings, index, kpgz_dict, rec_dict, punctuation, morph = load_all()
data, names_counts, tokenizer, bert_cls, embeddings, index, kpgz_dict, rec_dict, punctuation, morph, jsp = load_all()
search_request = st.text_input('Введите слова для поиска:').lower().strip()
search_expander = st.expander('Дополнительные настройки')
additional_info = search_expander.text_input('Ключевые характеристики').lower().strip()
Expand All @@ -103,6 +107,13 @@ def main():
if max_price < min_price:
min_price, max_price = max_price, min_price
if search_request:
old_search_request = search_request
search_request = jsp.FixFragment(search_request)
if old_search_request != search_request:
st.markdown(f"""
Автоисправление <br/>
{old_search_request} было заменено на {search_request}
""", unsafe_allow_html=True)
cnt = 0
pos_pop_requests = []
for val in (names_counts.index):
Expand All @@ -113,8 +124,8 @@ def main():
if cnt == 3:
break
st.markdown(f"""
Автодополнение: <br/> {r"<br/>".join(pos_pop_requests)}
""", unsafe_allow_html=True)
Автодополнение: <br/> {r"<br/>".join(pos_pop_requests)}
""", unsafe_allow_html=True)
search_request = utils.clear_text(search_request, punctuation, morph)
additional_info = utils.clear_text(additional_info, punctuation, morph)
search_results = utils.get_search_results(search_request=search_request, additional_info=additional_info, data=data,
Expand Down
1 change: 1 addition & 0 deletions streamlit-app/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ humanfriendly==10.0
idna==3.4
importlib-metadata==4.13.0
importlib-resources==5.9.0
jamspell==0.0.12
Jinja2==3.1.2
joblib==1.2.0
jsonschema==4.16.0
Expand Down

0 comments on commit f47f954

Please sign in to comment.