Rename bert to transformers (#3946)

* rename folder bert to transformers * rename bert_model_optimization.py to optimizer.py * update URL links in notebooks
microsoft · May 14, 2020 · 782c6c2 · 782c6c2
1 parent 3c4f3d0
commit 782c6c2
Show file tree

Hide file tree

Showing 54 changed files with 3,844 additions and 87 deletions.
diff --git a/...me/python/tools/bert/test_data/bert_mrpc_tensorflow2.1_opset10/test_data_set_0/input_0.pb b/...me/python/tools/bert/test_data/bert_mrpc_tensorflow2.1_opset10/test_data_set_0/input_0.pb
diff --git a/...e/python/tools/bert/test_data/bert_mrpc_tensorflow2.1_opset10/test_data_set_0/output_0.pb b/...e/python/tools/bert/test_data/bert_mrpc_tensorflow2.1_opset10/test_data_set_0/output_0.pb
diff --git a/...python/tools/bert/test_data/bert_squad_pytorch1.4_opset10_fp32/test_data_set_0/input_0.pb b/...python/tools/bert/test_data/bert_squad_pytorch1.4_opset10_fp32/test_data_set_0/input_0.pb
diff --git a/...ython/tools/bert/test_data/bert_squad_pytorch1.4_opset10_fp32/test_data_set_0/output_0.pb b/...ython/tools/bert/test_data/bert_squad_pytorch1.4_opset10_fp32/test_data_set_0/output_0.pb
diff --git a/...ython/tools/bert/test_data/bert_squad_pytorch1.4_opset10_fp32/test_data_set_0/output_1.pb b/...ython/tools/bert/test_data/bert_squad_pytorch1.4_opset10_fp32/test_data_set_0/output_1.pb
diff --git a/...time/python/tools/bert/test_data/bert_squad_pytorch1.4_opset11/test_data_set_0/input_1.pb b/...time/python/tools/bert/test_data/bert_squad_pytorch1.4_opset11/test_data_set_0/input_1.pb
diff --git a/...time/python/tools/bert/test_data/bert_squad_pytorch1.4_opset11/test_data_set_0/input_2.pb b/...time/python/tools/bert/test_data/bert_squad_pytorch1.4_opset11/test_data_set_0/input_2.pb
diff --git a/...ols/bert/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/test_data_set_0/input_1.pb b/...ols/bert/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/test_data_set_0/input_1.pb
diff --git a/...ols/bert/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/test_data_set_0/input_2.pb b/...ols/bert/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/test_data_set_0/input_2.pb
diff --git a/...untime/python/tools/bert/BertOnnxModel.py → ...ython/tools/transformers/BertOnnxModel.py b/...untime/python/tools/bert/BertOnnxModel.py → ...ython/tools/transformers/BertOnnxModel.py
diff --git a/...e/python/tools/bert/BertOnnxModelKeras.py → .../tools/transformers/BertOnnxModelKeras.py b/...e/python/tools/bert/BertOnnxModelKeras.py → .../tools/transformers/BertOnnxModelKeras.py
diff --git a/...time/python/tools/bert/BertOnnxModelTF.py → ...hon/tools/transformers/BertOnnxModelTF.py b/...time/python/tools/bert/BertOnnxModelTF.py → ...hon/tools/transformers/BertOnnxModelTF.py
diff --git a/...untime/python/tools/bert/Gpt2OnnxModel.py → ...ython/tools/transformers/Gpt2OnnxModel.py b/...untime/python/tools/bert/Gpt2OnnxModel.py → ...ython/tools/transformers/Gpt2OnnxModel.py
diff --git a/onnxruntime/python/tools/bert/MachineInfo.py → .../python/tools/transformers/MachineInfo.py b/onnxruntime/python/tools/bert/MachineInfo.py → .../python/tools/transformers/MachineInfo.py
diff --git a/onnxruntime/python/tools/bert/OnnxModel.py → ...me/python/tools/transformers/OnnxModel.py b/onnxruntime/python/tools/bert/OnnxModel.py → ...me/python/tools/transformers/OnnxModel.py
diff --git a/onnxruntime/python/tools/bert/README.md → ...ntime/python/tools/transformers/README.md b/onnxruntime/python/tools/bert/README.md → ...ntime/python/tools/transformers/README.md
@@ -43,14 +43,14 @@ For tf2onnx, please refer to this notebook: https://github.com/onnx/tensorflow-o
 
 ## Model Optimization
 
-Example of using the script bert_model_optimization.py to convert a BERT-large model to run in V100 GPU:
+Example of using the script optimizer.py to convert a BERT-large model to run in V100 GPU:
 ```console
-python bert_model_optimization.py --input original_model.onnx --output optimized_model_gpu.onnx --num_heads 16 --hidden_size 1024 --input_int32 --float16
+python optimizer.py --input original_model.onnx --output optimized_model_gpu.onnx --num_heads 16 --hidden_size 1024 --input_int32 --float16
 ```
 
 ### Options
 
-See below for description of some options of bert_model_optimization.py:
+See below for description of some options of optimizer.py:
 
 - **input**: input model path
 - **output**: output model path

diff --git a/...ntime/python/tools/bert/ShapeOptimizer.py → ...thon/tools/transformers/ShapeOptimizer.py b/...ntime/python/tools/bert/ShapeOptimizer.py → ...thon/tools/transformers/ShapeOptimizer.py
diff --git a/onnxruntime/python/tools/bert/benchmark.py → ...me/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/bert/benchmark.py → ...me/python/tools/transformers/benchmark.py
@@ -160,22 +160,22 @@ def validate_onnx_model(onnx_model_filename, example_inputs, example_outputs_fla
 def optimize_onnx_model(onnx_model_filename, model_type, num_attention_heads, hidden_size, fp16):
     optimized_model_filename = onnx_model_filename.replace(".onnx", "_fp16.onnx" if fp16 else "_fp32.onnx")
     if not os.path.exists(optimized_model_filename):
-        import bert_model_optimization as bert_opt
+        from optimizer import optimize_model
         # Use onnxruntime to optimize model, which will be saved to *_ort_cpu.onnx
-        opt_model = bert_opt.optimize_model(onnx_model_filename,
-                                            model_type,
-                                            num_heads=num_attention_heads,
-                                            hidden_size=hidden_size,
-                                            opt_level=99,
-                                            only_onnxruntime=True)
+        opt_model = optimize_model(onnx_model_filename,
+                                   model_type,
+                                   num_heads=num_attention_heads,
+                                   hidden_size=hidden_size,
+                                   opt_level=99,
+                                   only_onnxruntime=True)
         optimize_model_statistics[onnx_model_filename] = opt_model.get_fused_operator_statistics()
 
         # Use script to optimize model.
-        opt_model = bert_opt.optimize_model(onnx_model_filename,
-                                            model_type,
-                                            num_heads=num_attention_heads,
-                                            hidden_size=hidden_size,
-                                            opt_level=0)
+        opt_model = optimize_model(onnx_model_filename,
+                                   model_type,
+                                   num_heads=num_attention_heads,
+                                   hidden_size=hidden_size,
+                                   opt_level=0)
         optimize_model_statistics[optimized_model_filename] = opt_model.get_fused_operator_statistics()
 
         if fp16:
@@ -210,14 +210,14 @@ def export_onnx_model(model_name, cache_dir, input_names, fp16, optimize_onnx, v
         dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten)
 
         torch.onnx.export(model=model,
-                            args=tuple(example_inputs.values()),
-                            f=onnx_model_filename,
-                            input_names=list(example_inputs.keys()),
-                            output_names=output_names,
-                            example_outputs=example_outputs,
-                            dynamic_axes=dynamic_axes,
-                            do_constant_folding=True,
-                            opset_version=MODELS[model_name][1])
+                          args=tuple(example_inputs.values()),
+                          f=onnx_model_filename,
+                          input_names=list(example_inputs.keys()),
+                          output_names=output_names,
+                          example_outputs=example_outputs,
+                          dynamic_axes=dynamic_axes,
+                          do_constant_folding=True,
+                          opset_version=MODELS[model_name][1])
     else:
         logger.info(f"Skip export since model existed: {onnx_model_filename}")
 
@@ -481,7 +481,7 @@ def parse_arguments():
                         "--optimize_onnx",
                         required=False,
                         action="store_true",
-                        help="Use bert_model_optimization.py to optimize onnx model")
+                        help="Use optimizer.py to optimize onnx model")
 
     parser.add_argument("-v", "--validate_onnx", required=False, action="store_true", help="Validate ONNX model")
 

diff --git a/...ntime/python/tools/bert/benchmark_gpt2.py → ...thon/tools/transformers/benchmark_gpt2.py b/...ntime/python/tools/bert/benchmark_gpt2.py → ...thon/tools/transformers/benchmark_gpt2.py
@@ -232,7 +232,7 @@ def main():
         remove_past_outputs(export_model_path, onnx_model_path)
 
     if args.enable_optimization:
-        from bert_model_optimization import optimize_model
+        from optimizer import optimize_model
         m = optimize_model(onnx_model_path,
                            model_type='gpt2',
                            num_heads=12,

diff --git a/...ntime/python/tools/bert/bert_perf_test.py → ...thon/tools/transformers/bert_perf_test.py b/...ntime/python/tools/bert/bert_perf_test.py → ...thon/tools/transformers/bert_perf_test.py
diff --git a/...ntime/python/tools/bert/bert_test_data.py → ...thon/tools/transformers/bert_test_data.py b/...ntime/python/tools/bert/bert_test_data.py → ...thon/tools/transformers/bert_test_data.py
diff --git a/...python/tools/bert/compare_bert_results.py → ...ools/transformers/compare_bert_results.py b/...python/tools/bert/compare_bert_results.py → ...ools/transformers/compare_bert_results.py
diff --git a/...ce_Bert_with_OnnxRuntime_on_AzureML.ipynb → ...ce_Bert_with_OnnxRuntime_on_AzureML.ipynb b/...ce_Bert_with_OnnxRuntime_on_AzureML.ipynb → ...ce_Bert_with_OnnxRuntime_on_AzureML.ipynb
diff --git a/...erence_GPT2_with_OnnxRuntime_on_CPU.ipynb → ...erence_GPT2_with_OnnxRuntime_on_CPU.ipynb b/...erence_GPT2_with_OnnxRuntime_on_CPU.ipynb → ...erence_GPT2_with_OnnxRuntime_on_CPU.ipynb
@@ -356,7 +356,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "bert_opt_script = os.path.join(bert_tools_dir, 'bert_model_optimization.py')"
+    "bert_opt_script = os.path.join(bert_tools_dir, 'optimizer.py')"
    ]
   },
   {
@@ -365,7 +365,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Local directory corresponding to https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/bert/\n",
+    "# Local directory corresponding to https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/\n",
     "%run $bert_opt_script --model_type gpt2 --input $onnx_model_path --output $optimized_model --opt_level 0"
    ]
   },

diff --git a/.../PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb → .../PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb b/.../PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb → .../PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb
@@ -457,10 +457,10 @@
    "source": [
     "## 5. Offline Optimization Script and Test Tools\n",
     "\n",
-    "It is recommended to download the [OnnxRuntime Python Tools for BERT](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/bert), and try them on the exported ONNX models. It could help verify whether the model is fully optimized, and get performance test results.\n",
+    "It is recommended to download the [OnnxRuntime Python Tools for BERT](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers), and try them on the exported ONNX models. It could help verify whether the model is fully optimized, and get performance test results.\n",
     "\n",
     "### Download OnnxRuntime Python Tools for Bert\n",
-    "You may copy the whole [directory](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/bert) to a sub-directory named bert_scripts for this notebook. The list of script files might need update if import error happens when you run some script."
+    "You may copy the whole [directory](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers) to a sub-directory named bert_scripts for this notebook. The list of script files might need update if import error happens when you run some script."
    ]
   },
   {
@@ -479,7 +479,7 @@
       "100% [..............................................................................] 21565 / 21565Downloaded BertOnnxModelKeras.py\n",
       "100% [..............................................................................] 26114 / 26114Downloaded BertOnnxModelTF.py\n",
       "100% [..............................................................................] 22773 / 22773Downloaded OnnxModel.py\n",
-      "100% [................................................................................] 7917 / 7917Downloaded bert_model_optimization.py\n",
+      "100% [................................................................................] 7917 / 7917Downloaded optimizer.py\n",
       "100% [................................................................................] 5478 / 5478Downloaded MachineInfo.py\n"
      ]
     }
@@ -488,8 +488,8 @@
       "import os\n",
       "import wget\n",
       "\n",
-      "url_prfix = \"https://raw.githubusercontent.com/microsoft/onnxruntime/master/onnxruntime/python/tools/bert/\"\n",
-      "script_files = ['bert_perf_test.py', 'bert_test_data.py', 'compare_bert_results.py', 'BertOnnxModel.py', 'BertOnnxModelKeras.py', 'BertOnnxModelTF.py', 'Gpt2OnnxModel.py', 'OnnxModel.py', 'bert_model_optimization.py', 'MachineInfo.py']\n",
+      "url_prfix = \"https://raw.githubusercontent.com/microsoft/onnxruntime/master/onnxruntime/python/tools/transformers/\"\n",
+      "script_files = ['bert_perf_test.py', 'bert_test_data.py', 'compare_bert_results.py', 'BertOnnxModel.py', 'BertOnnxModelKeras.py', 'BertOnnxModelTF.py', 'Gpt2OnnxModel.py', 'OnnxModel.py', 'optimizer.py', 'MachineInfo.py']\n",
       "\n",
       "script_dir = './bert_scripts'\n",
       "if not os.path.exists(script_dir):\n",
@@ -515,7 +515,7 @@
     "* The exported model uses dynamic axis and this makes it harder for shape inference of the graph. That blocks some optimization to be applied.\n",
     "* Some optimization is better to be done offline. Like change input tensor type from int64 to int32 to avoid extra Cast nodes, or convert model to float16 to achieve better performance in V100 or T4 GPU.\n",
     "\n",
-    "We have python script **bert_model_optimization.py**, which is more flexible in graph pattern matching and model conversion (like float32 to float16). You can also use it to verify whether a Bert model is fully optimized.\n",
+    "We have python script **optimizer.py**, which is more flexible in graph pattern matching and model conversion (like float32 to float16). You can also use it to verify whether a Bert model is fully optimized.\n",
     "\n",
     "In this example, we can see that it introduces optimization that is not provided by onnxruntime: SkipLayerNormalization and bias fusion, which is not fused in OnnxRuntime due to shape inference as mentioned.\n",
     "\n",
@@ -531,8 +531,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "bert_model_optimization.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_ort_cpu.onnx\n",
-      "bert_model_optimization.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_ort_cpu.onnx\n",
+      "optimizer.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_ort_cpu.onnx\n",
+      "optimizer.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_ort_cpu.onnx\n",
       "    BertOnnxModel.py: Fused LayerNormalization count: 0\n",
       "    BertOnnxModel.py: Fused Reshape count:0\n",
       "    BertOnnxModel.py: Fused SkipLayerNormalization count: 24\n",
@@ -542,13 +542,13 @@
       "    BertOnnxModel.py: opset verion: 11\n",
       "        OnnxModel.py: Output model to ./onnx/bert-base-cased-squad_opt_cpu.onnx\n",
       "    BertOnnxModel.py: EmbedLayer=1, Attention=12, Gelu=12, LayerNormalization=24, Succesful=True\n",
-      "bert_model_optimization.py: The output model is fully optimized.\n"
+      "optimizer.py: The output model is fully optimized.\n"
      ]
     }
    ],
    "source": [
     "optimized_model_path = './onnx/bert-base-cased-squad_opt_cpu.onnx'\n",
-    "%run ./bert_scripts/bert_model_optimization.py --input $export_model_path --output $optimized_model_path"
+    "%run ./bert_scripts/optimizer.py --input $export_model_path --output $optimized_model_path"
    ]
   },
   {

diff --git a/.../PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb → .../PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb b/.../PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb → .../PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb
@@ -576,10 +576,10 @@
    "source": [
     "## 5. Offline Optimization and Test Tools\n",
     "\n",
-    "It is recommended to download the [OnnxRuntime Python Tools for BERT](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/bert), and try them on the exported ONNX models. It could help verify whether the model is fully optimized, and get performance test results.\n",
+    "It is recommended to download the [OnnxRuntime Python Tools for BERT](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers), and try them on the exported ONNX models. It could help verify whether the model is fully optimized, and get performance test results.\n",
     "\n",
     "### Download OnnxRuntime Python Tools for Bert\n",
-    "You may copy the whole [directory](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/bert) to a sub-directory named bert_scripts for this notebook. The list of script files might need update if import error happens when you run some script."
+    "You may copy the whole [directory](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers) to a sub-directory named bert_scripts for this notebook. The list of script files might need update if import error happens when you run some script."
    ]
   },
   {
@@ -598,7 +598,7 @@
       "100% [..............................................................................] 21565 / 21565Downloaded BertOnnxModelKeras.py\n",
       "100% [..............................................................................] 26114 / 26114Downloaded BertOnnxModelTF.py\n",
       "100% [..............................................................................] 22773 / 22773Downloaded OnnxModel.py\n",
-      "100% [................................................................................] 7795 / 7795Downloaded bert_model_optimization.py\n",
+      "100% [................................................................................] 7795 / 7795Downloaded optimizer.py\n",
       "100% [................................................................................] 5885 / 5885Downloaded MachineInfo.py\n"
      ]
     }
@@ -607,8 +607,8 @@
     "import os\n",
     "import wget\n",
     "\n",
-    "url_prfix = \"https://raw.githubusercontent.com/microsoft/onnxruntime/master/onnxruntime/python/tools/bert/\"\n",
-    "script_files = ['bert_perf_test.py', 'bert_test_data.py', 'compare_bert_results.py', 'BertOnnxModel.py', 'BertOnnxModelKeras.py', 'BertOnnxModelTF.py', 'Gpt2OnnxModel.py', 'OnnxModel.py', 'bert_model_optimization.py', 'MachineInfo.py']\n",
+    "url_prfix = \"https://raw.githubusercontent.com/microsoft/onnxruntime/master/onnxruntime/python/tools/transformers/\"\n",
+    "script_files = ['bert_perf_test.py', 'bert_test_data.py', 'compare_bert_results.py', 'BertOnnxModel.py', 'BertOnnxModelKeras.py', 'BertOnnxModelTF.py', 'Gpt2OnnxModel.py', 'OnnxModel.py', 'optimizer.py', 'MachineInfo.py']\n",
     "\n",
     "script_dir = './bert_scripts'\n",
     "if not os.path.exists(script_dir):\n",
@@ -634,7 +634,7 @@
     "* The exported model uses dynamic axis. That impacts shape inference. Without enough shape information, some optimization cannot be applied due to the constraint on the input shape.\n",
     "* Some optimization are not supported by OnnxRuntime, but it is feasible in offline script. Like changing input tensor type from int64 to int32 to avoid extra Cast nodes, or converting model to float16 to achieve better performance in V100 or T4 GPU.\n",
     "\n",
-    "We have python script **bert_model_optimization.py**, which is flexible in graph pattern matching and model conversions to tackle these problems.\n",
+    "We have python script **optimizer.py**, which is flexible in graph pattern matching and model conversions to tackle these problems.\n",
     "\n",
     "In below example, we can see that the tool provide an extra optimization - SkipLayerNormalization and bias (Add) are not fused in OnnxRuntime due to shape inference.\n",
     "\n",
@@ -660,8 +660,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "bert_model_optimization.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
-      "bert_model_optimization.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
+      "optimizer.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
+      "optimizer.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
       "    BertOnnxModel.py: Fused LayerNormalization count: 0\n",
       "    BertOnnxModel.py: Fused Reshape count:0\n",
       "    BertOnnxModel.py: Fused SkipLayerNormalization count: 24\n",
@@ -671,13 +671,13 @@
       "    BertOnnxModel.py: opset verion: 11\n",
       "        OnnxModel.py: Output model to ./onnx/bert-base-cased-squad_opt_gpu_fp32.onnx\n",
       "    BertOnnxModel.py: EmbedLayer=1, Attention=12, Gelu=12, LayerNormalization=24, Succesful=True\n",
-      "bert_model_optimization.py: The output model is fully optimized.\n"
+      "optimizer.py: The output model is fully optimized.\n"
      ]
     }
    ],
    "source": [
     "optimized_fp32_model_path = './onnx/bert-base-cased-squad_opt_{}_fp32.onnx'.format('gpu' if use_gpu else 'cpu')\n",
-    "%run ./bert_scripts/bert_model_optimization.py --input $export_model_path --output $optimized_fp32_model_path --input_int32"
+    "%run ./bert_scripts/optimizer.py --input $export_model_path --output $optimized_fp32_model_path --input_int32"
    ]
   },
   {
@@ -1117,7 +1117,7 @@
    "source": [
     "## 6. Model Optimization with Float16\n",
     "\n",
-    "The bert_model_optimization.py script have an option **--float16** to convert model to use float16 to store weights. After the conversion, it could be faster to run in GPU with tensor cores like V100 or T4.\n",
+    "The optimizer.py script have an option **--float16** to convert model to use float16 to store weights. After the conversion, it could be faster to run in GPU with tensor cores like V100 or T4.\n",
     "\n",
     "Let's run tools to measure the performance on V100. The results show significant performance improvement: latency is about 3.4 ms for float32 model, and 1.8 ms for float16 model."
    ]
@@ -1131,8 +1131,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "bert_model_optimization.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
-      "bert_model_optimization.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
+      "optimizer.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
+      "optimizer.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
       "    BertOnnxModel.py: Fused LayerNormalization count: 0\n",
       "    BertOnnxModel.py: Fused Reshape count:0\n",
       "    BertOnnxModel.py: Fused SkipLayerNormalization count: 24\n",
@@ -1142,13 +1142,13 @@
       "    BertOnnxModel.py: opset verion: 11\n",
       "        OnnxModel.py: Output model to ./onnx/bert-base-cased-squad_opt_gpu_fp16.onnx\n",
       "    BertOnnxModel.py: EmbedLayer=1, Attention=12, Gelu=12, LayerNormalization=24, Succesful=True\n",
-      "bert_model_optimization.py: The output model is fully optimized.\n"
+      "optimizer.py: The output model is fully optimized.\n"
      ]
     }
    ],
    "source": [
     "optimized_fp16_model_path = './onnx/bert-base-cased-squad_opt_{}_fp16.onnx'.format('gpu' if use_gpu else 'cpu')\n",
-    "%run  ./bert_scripts/bert_model_optimization.py --input $export_model_path --output $optimized_fp16_model_path --float16 --input_int32"
+    "%run  ./bert_scripts/optimizer.py --input $export_model_path --output $optimized_fp16_model_path --float16 --input_int32"
    ]
   },
   {