Add files via upload

kyauy · Jul 12, 2024 · ae03dcd · ae03dcd
1 parent 52d408f
commit ae03dcd
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 7 deletions.
diff --git a/clinfly_app_cli.py b/clinfly_app_cli.py
@@ -18,6 +18,7 @@
     convert_df,
     convert_json,
     convert_list_phenogenius,
+    convert_pdf_to_text,
 )
 from utilities.extract_hpo import add_biometrics, extract_hpo
 from utilities.get_model import get_models, get_nlp_marian
@@ -258,13 +259,25 @@ def main():
     Last_name: str
     First_name: str
     Report: str
-    with open(file_name, "r") as fichier:
-        for ligne in fichier:
-            elements = ligne.strip().split("\t")
-            Report_id, Last_name, First_name, Report = elements
+
+    if os.path.isfile(args.file):
+        with open(file_name, 'r') as fichier:
+          for ligne in fichier:
+            elements = ligne.strip().split('\t')
+            Report_id, Last_name, First_name, text_or_link = elements
             print("Report_id:", Report_id)
             print("Last_name:", Last_name)
             print("First_name:", First_name)
-            print("Report:", Report)
+            if os.path.exists(text_or_link):
+                if text_or_link.lower().endswith('.pdf'):
+                    print(f"Processing PDF file: {text_or_link}")
+                    Report = convert_pdf_to_text(text_or_link)
+                else:
+                    print(f"Unsupported file type. Please provide a link to a PDF files.")
+            else:
+                Report = text_or_link
+                print("Report:", Report)
             main()
             print()
+    else:
+        print("Input is not a file. Please provide a valid input.")
diff --git a/clinfly_app_st.py b/clinfly_app_st.py
@@ -2,7 +2,7 @@
 from utilities.web_utilities import display_page_title, display_sidebar, stack_checker
 from utilities.anonymize import get_cities_list,get_abbreviation_dict_correction, reformat_to_report, anonymize_analyzer, anonymize_engine, add_space_to_comma_endpoint,get_list_not_deidentify, config_deidentify
 from utilities.translate import get_translation_dict_correction, translate_report
-from utilities.convert import convert_df_no_header, convert_df, convert_json, convert_list_phenogenius
+from utilities.convert import convert_df_no_header, convert_df, convert_json, convert_list_phenogenius, convert_pdf_to_text
 from utilities.extract_hpo import add_biometrics, extract_hpo
 from utilities.get_model import get_models, get_nlp_marian
 import streamlit as st
@@ -60,14 +60,19 @@
         with c2:
             prenom = st.text_input("First name", "John", key="surname")
         courrier = st.text_area(
-            "Paste medical letter",
+            "You can paste the medical letter",
             "Chers collegues, j'ai recu en consultation M. John Doe né le 14/07/1789 pour une fièvre récurrente et une maladie de Crohn. Il a pour antécédent des epistaxis recurrents. Parmi les antécédants familiaux, sa maman a présenté un cancer des ovaires. Il mesure 1.90 m (+2.5  DS),  pèse 93 kg (+3.6 DS) et son PC est à 57 cm (+0DS) ...",
             height=200,
             key="letter",
         )
+        uploaded_file = st.file_uploader("Or upload it (only pdf files are supported)")
 
         submit_button = st.form_submit_button(label="Submit report")
 
+    if uploaded_file is not None:
+        # To read file as bytes:
+        bytes_data = uploaded_file.getvalue()
+        courrier = convert_pdf_to_text(bytes_data)
 
     if submit_button or st.session_state.load_report:
         st.session_state.load_report = True

diff --git a/pyproject.toml b/pyproject.toml
@@ -20,6 +20,8 @@ streamlit = "^1.20.0"
 memory-profiler = "^0.61.0"
 Unidecode = "^1.3.6"
 pydantic = "1.10.13"
+pdf2image = "^1.17.0"
+pytesseract = "^0.3.10"
 
 [tool.poetry.dev-dependencies]
 pytest = "^5.2"