Merge pull request huggingface#30 from lvwerra/fix-distilbert-title

fix title for distilbert example
shizhediao · Jan 1, 2022 · 5410be6 · 5410be6
2 parents caed471 + 11a96e0
commit 5410be6
Show file tree

Hide file tree

Showing 6 changed files with 27 additions and 32 deletions.
diff --git a/docs/03-distilbert-imdb-training.html b/docs/03-distilbert-imdb-training.html
@@ -1,12 +1,12 @@
 ---
 
-title: Title
+title: Train IMDb Classifier
 
 keywords: fastai
 sidebar: home_sidebar
 
-summary: "summary"
-description: "summary"
+summary: "Train a IMDb classifier with DistilBERT."
+description: "Train a IMDb classifier with DistilBERT."
 ---
 <!--
 
@@ -27,12 +27,6 @@
 </div>
     {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="Train-IMDb-Classifier">Train IMDb Classifier<a class="anchor-link" href="#Train-IMDb-Classifier"> </a></h1>
-</div>
-</div>
-</div>
     {% raw %}
 
 <div class="cell border-box-sizing code_cell rendered">

diff --git a/docs/_data/sidebars/home_sidebar.yml b/docs/_data/sidebars/home_sidebar.yml
@@ -19,7 +19,7 @@ entries:
       title: PPO for transformer models
       url: /02-ppo
     - output: web,pdf
-      title: Title
+      title: Train IMDb Classifier
       url: /03-distilbert-imdb-training
     - output: web,pdf
       title: Tune GPT2 to generate positive reviews

diff --git a/docs/index.html b/docs/index.html
@@ -139,9 +139,9 @@ <h3 id="Example">Example<a class="anchor-link" href="#Example"> </a></h3><p>This
 <div class="output_area">
 
 <div class="output_subarea output_stream output_stderr output_text">
-<pre>Some weights of GPT2HeadWithValueModel were not initialized from the model checkpoint at gpt2 and are newly initialized: [&#39;h.3.attn.masked_bias&#39;, &#39;h.11.attn.masked_bias&#39;, &#39;h.1.attn.masked_bias&#39;, &#39;v_head.summary.weight&#39;, &#39;h.9.attn.masked_bias&#39;, &#39;lm_head.weight&#39;, &#39;h.8.attn.masked_bias&#39;, &#39;v_head.summary.bias&#39;, &#39;h.5.attn.masked_bias&#39;, &#39;h.2.attn.masked_bias&#39;, &#39;h.7.attn.masked_bias&#39;, &#39;h.4.attn.masked_bias&#39;, &#39;h.0.attn.masked_bias&#39;, &#39;h.6.attn.masked_bias&#39;, &#39;h.10.attn.masked_bias&#39;]
+<pre>Some weights of GPT2HeadWithValueModel were not initialized from the model checkpoint at gpt2 and are newly initialized: [&#39;h.9.attn.masked_bias&#39;, &#39;h.8.attn.masked_bias&#39;, &#39;lm_head.weight&#39;, &#39;h.4.attn.masked_bias&#39;, &#39;v_head.summary.weight&#39;, &#39;h.10.attn.masked_bias&#39;, &#39;h.0.attn.masked_bias&#39;, &#39;h.5.attn.masked_bias&#39;, &#39;h.7.attn.masked_bias&#39;, &#39;h.6.attn.masked_bias&#39;, &#39;h.1.attn.masked_bias&#39;, &#39;v_head.summary.bias&#39;, &#39;h.11.attn.masked_bias&#39;, &#39;h.2.attn.masked_bias&#39;, &#39;h.3.attn.masked_bias&#39;]
 You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
-Some weights of GPT2HeadWithValueModel were not initialized from the model checkpoint at gpt2 and are newly initialized: [&#39;h.3.attn.masked_bias&#39;, &#39;h.11.attn.masked_bias&#39;, &#39;h.1.attn.masked_bias&#39;, &#39;v_head.summary.weight&#39;, &#39;h.9.attn.masked_bias&#39;, &#39;lm_head.weight&#39;, &#39;h.8.attn.masked_bias&#39;, &#39;v_head.summary.bias&#39;, &#39;h.5.attn.masked_bias&#39;, &#39;h.2.attn.masked_bias&#39;, &#39;h.7.attn.masked_bias&#39;, &#39;h.4.attn.masked_bias&#39;, &#39;h.0.attn.masked_bias&#39;, &#39;h.6.attn.masked_bias&#39;, &#39;h.10.attn.masked_bias&#39;]
+Some weights of GPT2HeadWithValueModel were not initialized from the model checkpoint at gpt2 and are newly initialized: [&#39;h.9.attn.masked_bias&#39;, &#39;h.8.attn.masked_bias&#39;, &#39;lm_head.weight&#39;, &#39;h.4.attn.masked_bias&#39;, &#39;v_head.summary.weight&#39;, &#39;h.10.attn.masked_bias&#39;, &#39;h.0.attn.masked_bias&#39;, &#39;h.5.attn.masked_bias&#39;, &#39;h.7.attn.masked_bias&#39;, &#39;h.6.attn.masked_bias&#39;, &#39;h.1.attn.masked_bias&#39;, &#39;v_head.summary.bias&#39;, &#39;h.11.attn.masked_bias&#39;, &#39;h.2.attn.masked_bias&#39;, &#39;h.3.attn.masked_bias&#39;]
 You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 </pre>
 </div>

diff --git a/docs/sidebar.json b/docs/sidebar.json
@@ -4,7 +4,7 @@
     "Utility functions": "/00-core",
     "GPT2 with value head": "/01-gpt2-with-value-head",
     "PPO for transformer models": "/02-ppo",
-    "Title": "/03-distilbert-imdb-training",
+    "Train IMDb Classifier": "/03-distilbert-imdb-training",
     "Tune GPT2 to generate positive reviews": "/04-gpt2-sentiment-ppo-training",
     "Tune GPT2 to generate controlled sentiment reviews": "/05-gpt2-sentiment-control"
   }

diff --git a/nbs/03-distilbert-imdb-training.ipynb b/nbs/03-distilbert-imdb-training.ipynb
@@ -4,7 +4,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Train IMDb Classifier"
+    "# Train IMDb Classifier\n",
+    "> Train a IMDb classifier with DistilBERT."
    ]
   },
   {

diff --git a/nbs/04-gpt2-sentiment-ppo-training.ipynb b/nbs/04-gpt2-sentiment-ppo-training.ipynb
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -79,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -148,7 +148,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -222,7 +222,7 @@
        "4  Oh, brother...after hearing about this ridicul...          0"
       ]
      },
-     "execution_count": 7,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -253,7 +253,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -270,7 +270,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -279,7 +279,7 @@
        "SequenceClassifierOutput(loss=None, logits=tensor([[ 2.3350, -2.7266]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)"
       ]
      },
-     "execution_count": 9,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -292,7 +292,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -301,7 +301,7 @@
        "SequenceClassifierOutput(loss=None, logits=tensor([[-2.2948,  2.5570]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)"
       ]
      },
-     "execution_count": 10,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -321,7 +321,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -330,7 +330,7 @@
        "tensor(2.5570, grad_fn=<SelectBackward0>)"
       ]
      },
-     "execution_count": 11,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -374,7 +374,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -383,7 +383,7 @@
        "[<wandb.wandb_torch.TorchGraph at 0x168e77c10>]"
       ]
      },
-     "execution_count": 13,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -408,7 +408,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -417,7 +417,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -442,7 +442,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -459,7 +459,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {