Add compatibility between new output type and demo.

Adibvafa · Jul 28, 2024 · 45a23a5 · 45a23a5
1 parent 3526c36
commit 45a23a5
Showing 1 changed file with 12 additions and 298 deletions.
diff --git a/CodonTransformerDemo.ipynb b/CodonTransformerDemo.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -53,98 +53,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e5867c02ed854a2eb07a20cd25d3ecd7",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(HTML(value='<b style=\"font-size:20px;\">Enter Protein Sequence:</b><div style=\"height:18px;\"></d…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "10342006b7274512be6322777316e93c",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='\\n        <style>\\n            .widget-textarea > textarea {\\n                font-size: 12px;\\n  …"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "397d496d9b494c80916a3ba933bec09b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(HTML(value='<b style=\"font-size:20px;\">Select Organism:</b><div style=\"height:10px;\"></div>'), …"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <style>\n",
-       "        .widget-dropdown > select {\n",
-       "            font-size: 16px;\n",
-       "            font-weight: normal;\n",
-       "            background-color: #f0f0f0;\n",
-       "            border-radius: 5px;\n",
-       "            padding: 5px;\n",
-       "        }\n",
-       "        .widget-label {\n",
-       "            font-size: 18px;\n",
-       "            font-weight: bold;\n",
-       "        }\n",
-       "        .custom-container {\n",
-       "            display: flex;\n",
-       "            flex-direction: column;\n",
-       "            align-items: flex-start;\n",
-       "        }\n",
-       "        .widget-dropdown option[value^=\"\"] {\n",
-       "            font-family: sans-serif;\n",
-       "            font-weight: bold;\n",
-       "            font-size: 18px;\n",
-       "            padding: 510px;\n",
-       "        }\n",
-       "        .widget-dropdown option[value*=\"Selected Organisms\"],\n",
-       "        .widget-dropdown option[value*=\"All Organisms\"] {\n",
-       "            text-align: center;\n",
-       "            font-family: Arial, sans-serif;\n",
-       "            font-weight: bold;\n",
-       "            font-size: 20px;\n",
-       "            color: #6900A1;\n",
-       "            background-color: #00D8A1;\n",
-       "        }\n",
-       "    </style>\n",
-       "    "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Sample: MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGG, Homo sapiens\n",
     "user = UserContainer()\n",
@@ -154,35 +65,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-----------------------------\n",
-      "|          Organism         |\n",
-      "-----------------------------\n",
-      "Homo sapiens\n",
-      "\n",
-      "-----------------------------\n",
-      "|       Input Protein       |\n",
-      "-----------------------------\n",
-      "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGG\n",
-      "\n",
-      "-----------------------------\n",
-      "|      Processed Input      |\n",
-      "-----------------------------\n",
-      "M_UNK A_UNK L_UNK W_UNK M_UNK R_UNK L_UNK L_UNK P_UNK L_UNK L_UNK A_UNK L_UNK L_UNK A_UNK L_UNK W_UNK G_UNK P_UNK D_UNK P_UNK A_UNK A_UNK A_UNK F_UNK V_UNK N_UNK Q_UNK H_UNK L_UNK C_UNK G_UNK S_UNK H_UNK L_UNK V_UNK E_UNK A_UNK L_UNK Y_UNK L_UNK V_UNK C_UNK G_UNK E_UNK R_UNK G_UNK F_UNK F_UNK Y_UNK T_UNK P_UNK K_UNK T_UNK R_UNK R_UNK E_UNK A_UNK E_UNK D_UNK L_UNK Q_UNK V_UNK G_UNK Q_UNK V_UNK E_UNK L_UNK G_UNK G_UNK __UNK\n",
-      "\n",
-      "-----------------------------\n",
-      "|       Predicted DNA       |\n",
-      "-----------------------------\n",
-      "ATGGCCCTGTGGATGAGGCTGCTGCCCCTGCTGGCCCTGCTGGCCCTGTGGGGGCCTGACCCAGCTGCCGCCTTTGTGAACCAGCACCTGTGTGGCAGCCACCTGGTGGAGGCCCTGTACCTGGTGTGTGGGGAGAGAGGCTTCTTCTACACACCCAAGACCAGAAGAGAGGCCGAGGACCTGCAGGTGGGCCAGGTGGAGCTGGGAGGCTGA\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "output = predict_dna_sequence(\n",
     "    protein=user.protein,\n",
@@ -212,91 +97,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>protein_sequence</th>\n",
-       "      <th>organism</th>\n",
-       "      <th>predicted_dna</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>MSEKYIVTWDMLQIHARKLASRLMPSEQWKGIIAVSRGGLVPGALL...</td>\n",
-       "      <td>Escherichia coli general</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>MKNIIRTPETHPLTWRLRDDKQPVWLDEYRSKNGYEGARKALTGLS...</td>\n",
-       "      <td>Escherichia coli general</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>MDALQIAEDTLQTLVPHCPVPSGPRRIFLDANVKESYCPLVPHTMY...</td>\n",
-       "      <td>Homo sapiens</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>MAFANFRRILRLSTFEKRKSREYEHVRRDLDPNEVWEIVGELGDGA...</td>\n",
-       "      <td>Homo sapiens</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>MTEKDAGGFNMSTFMNRKFQEPIQQIKTFSWMGFSWTCRKRRKHYQ...</td>\n",
-       "      <td>Arabidopsis thaliana</td>\n",
-       "      <td>None</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                    protein_sequence  \\\n",
-       "0  MSEKYIVTWDMLQIHARKLASRLMPSEQWKGIIAVSRGGLVPGALL...   \n",
-       "1  MKNIIRTPETHPLTWRLRDDKQPVWLDEYRSKNGYEGARKALTGLS...   \n",
-       "2  MDALQIAEDTLQTLVPHCPVPSGPRRIFLDANVKESYCPLVPHTMY...   \n",
-       "3  MAFANFRRILRLSTFEKRKSREYEHVRRDLDPNEVWEIVGELGDGA...   \n",
-       "4  MTEKDAGGFNMSTFMNRKFQEPIQQIKTFSWMGFSWTCRKRRKHYQ...   \n",
-       "\n",
-       "                   organism predicted_dna  \n",
-       "0  Escherichia coli general          None  \n",
-       "1  Escherichia coli general          None  \n",
-       "2              Homo sapiens          None  \n",
-       "3              Homo sapiens          None  \n",
-       "4      Arabidopsis thaliana          None  "
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Update with the actual path to your dataset\n",
     "dataset_path = \"demo/sample_dataset.csv\"\n",
@@ -309,98 +112,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "CodonTransformer Predicting: 100%|██████████| 5/5 [00:00<00:00, 17.00 Sequences/s]\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>protein_sequence</th>\n",
-       "      <th>organism</th>\n",
-       "      <th>predicted_dna</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>MSEKYIVTWDMLQIHARKLASRLMPSEQWKGIIAVSRGGLVPGALL...</td>\n",
-       "      <td>Escherichia coli general</td>\n",
-       "      <td>DNASequencePrediction(organism='Escherichia co...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>MKNIIRTPETHPLTWRLRDDKQPVWLDEYRSKNGYEGARKALTGLS...</td>\n",
-       "      <td>Escherichia coli general</td>\n",
-       "      <td>DNASequencePrediction(organism='Escherichia co...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>MDALQIAEDTLQTLVPHCPVPSGPRRIFLDANVKESYCPLVPHTMY...</td>\n",
-       "      <td>Homo sapiens</td>\n",
-       "      <td>DNASequencePrediction(organism='Homo sapiens',...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>MAFANFRRILRLSTFEKRKSREYEHVRRDLDPNEVWEIVGELGDGA...</td>\n",
-       "      <td>Homo sapiens</td>\n",
-       "      <td>DNASequencePrediction(organism='Homo sapiens',...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>MTEKDAGGFNMSTFMNRKFQEPIQQIKTFSWMGFSWTCRKRRKHYQ...</td>\n",
-       "      <td>Arabidopsis thaliana</td>\n",
-       "      <td>DNASequencePrediction(organism='Arabidopsis th...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                    protein_sequence  \\\n",
-       "0  MSEKYIVTWDMLQIHARKLASRLMPSEQWKGIIAVSRGGLVPGALL...   \n",
-       "1  MKNIIRTPETHPLTWRLRDDKQPVWLDEYRSKNGYEGARKALTGLS...   \n",
-       "2  MDALQIAEDTLQTLVPHCPVPSGPRRIFLDANVKESYCPLVPHTMY...   \n",
-       "3  MAFANFRRILRLSTFEKRKSREYEHVRRDLDPNEVWEIVGELGDGA...   \n",
-       "4  MTEKDAGGFNMSTFMNRKFQEPIQQIKTFSWMGFSWTCRKRRKHYQ...   \n",
-       "\n",
-       "                   organism                                      predicted_dna  \n",
-       "0  Escherichia coli general  DNASequencePrediction(organism='Escherichia co...  \n",
-       "1  Escherichia coli general  DNASequencePrediction(organism='Escherichia co...  \n",
-       "2              Homo sapiens  DNASequencePrediction(organism='Homo sapiens',...  \n",
-       "3              Homo sapiens  DNASequencePrediction(organism='Homo sapiens',...  \n",
-       "4      Arabidopsis thaliana  DNASequencePrediction(organism='Arabidopsis th...  "
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "for index, data in tqdm(\n",
     "    dataset.iterrows(),\n",
@@ -409,14 +123,14 @@
     "    total=dataset.shape[0],\n",
     "):\n",
     "\n",
-    "    predicted_dna = predict_dna_sequence(\n",
+    "    outputs = predict_dna_sequence(\n",
     "        protein=data[\"protein_sequence\"],\n",
     "        organism=data[\"organism\"],\n",
     "        device=DEVICE,\n",
     "        tokenizer_object=tokenizer,\n",
     "        model_object=model,\n",
     "    )\n",
-    "    dataset.loc[index, \"predicted_dna\"] = predicted_dna\n",
+    "    dataset.loc[index, \"predicted_dna\"] = output.predicted_dna\n",
     "\n",
     "dataset.to_csv(output_path)\n",
     "dataset.head()"