Skip to content

Commit

Permalink
Fix im2latex link (yandexdataschool#175)
Browse files Browse the repository at this point in the history
  • Loading branch information
dniku committed Jun 6, 2019
1 parent 06d3db0 commit 29f7d85
Showing 1 changed file with 33 additions and 113 deletions.
146 changes: 33 additions & 113 deletions week07_seq2seq/practice_torch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"\n",
"Encoder-decoder architectures are about converting anything to anything, including\n",
" * Machine translation and spoken dialogue systems\n",
" * [Image captioning](http://mscoco.org/dataset/#captions-challenge2015) and [image2latex](https://openai.com/requests-for-research/#im2latex) (convolutional encoder, recurrent decoder)\n",
" * [Image captioning](http://mscoco.org/dataset/#captions-challenge2015) and [image2latex](https://htmlpreview.github.io/?https://github.com/openai/requests-for-research/blob/master/_requests_for_research/im2latex.html) (convolutional encoder, recurrent decoder)\n",
" * Generating [images by captions](https://arxiv.org/abs/1511.02793) (recurrent encoder, convolutional decoder)\n",
" * Grapheme2phoneme - convert words to transcripts\n",
" \n",
Expand All @@ -47,9 +47,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# If True, only translates phrases shorter than 20 characters (way easier).\n",
Expand Down Expand Up @@ -84,9 +82,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
Expand Down Expand Up @@ -117,9 +113,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# get all unique lines in source language\n",
Expand All @@ -141,10 +135,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
Expand All @@ -164,9 +155,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"from voc import Vocab\n",
Expand All @@ -177,9 +166,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# Here's how you cast lines into ids and backwards.\n",
Expand All @@ -205,9 +192,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
Expand Down Expand Up @@ -245,9 +230,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
Expand All @@ -258,9 +241,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"from basic_model_torch import BasicTranslationModel\n",
Expand All @@ -271,10 +252,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"metadata": {},
"outputs": [],
"source": [
"# Play around with symbolic_translate and symbolic_score\n",
Expand All @@ -291,9 +269,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# score logp(out | inp) with untrained input\n",
Expand All @@ -307,9 +283,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def translate(lines, max_len=MAX_OUTPUT_LENGTH):\n",
Expand All @@ -332,9 +306,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"print(\"Sample inputs:\", all_words[:3])\n",
Expand Down Expand Up @@ -368,9 +340,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import editdistance # !pip install editdistance\n",
Expand Down Expand Up @@ -401,9 +371,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# should be around 5-50 and decrease rapidly after training :)\n",
Expand All @@ -422,9 +390,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import random\n",
Expand All @@ -448,9 +414,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"bx, by = sample_batch(train_words, word_to_translation, batch_size=3)\n",
Expand All @@ -463,9 +427,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"from basic_model_torch import infer_length, infer_mask, to_one_hot\n",
Expand Down Expand Up @@ -496,9 +458,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# test it\n",
Expand All @@ -525,9 +485,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import clear_output\n",
Expand All @@ -542,10 +500,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": false
},
"metadata": {},
"outputs": [],
"source": [
"\n",
Expand Down Expand Up @@ -597,9 +552,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"for word in train_words[:10]:\n",
Expand All @@ -609,9 +562,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"test_scores = []\n",
Expand Down Expand Up @@ -649,9 +600,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def compute_reward(input_sequence, translations):\n",
Expand All @@ -666,9 +615,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def scst_objective_on_batch(input_sequence, max_len=MAX_OUTPUT_LENGTH):\n",
Expand Down Expand Up @@ -725,9 +672,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"entropy_history = [np.nan] * len(loss_history)\n",
Expand All @@ -737,10 +682,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": false
},
"metadata": {},
"outputs": [],
"source": [
"for i in trange(100000):\n",
Expand Down Expand Up @@ -802,9 +744,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"for word in train_words[:10]:\n",
Expand All @@ -814,10 +754,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"metadata": {},
"outputs": [],
"source": [
"test_scores = []\n",
Expand Down Expand Up @@ -905,19 +842,15 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"assert not EASY_MODE, \"make sure you set EASY_MODE = False at the top of the notebook.\""
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"metadata": {},
"source": [
"`[your report/log here or anywhere you please]`"
]
Expand All @@ -934,22 +867,9 @@
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 29f7d85

Please sign in to comment.