Skip to content

Commit

Permalink
Rename bert to transformers (#3946)
Browse files Browse the repository at this point in the history
* rename folder bert to transformers
* rename bert_model_optimization.py to optimizer.py
* update URL links in notebooks
  • Loading branch information
tianleiwu authored May 14, 2020
1 parent 3c4f3d0 commit 782c6c2
Show file tree
Hide file tree
Showing 54 changed files with 3,844 additions and 87 deletions.
Binary file not shown.

This file was deleted.

Binary file not shown.

This file was deleted.

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ For tf2onnx, please refer to this notebook: https://github.com/onnx/tensorflow-o

## Model Optimization

Example of using the script bert_model_optimization.py to convert a BERT-large model to run in V100 GPU:
Example of using the script optimizer.py to convert a BERT-large model to run in V100 GPU:
```console
python bert_model_optimization.py --input original_model.onnx --output optimized_model_gpu.onnx --num_heads 16 --hidden_size 1024 --input_int32 --float16
python optimizer.py --input original_model.onnx --output optimized_model_gpu.onnx --num_heads 16 --hidden_size 1024 --input_int32 --float16
```

### Options

See below for description of some options of bert_model_optimization.py:
See below for description of some options of optimizer.py:

- **input**: input model path
- **output**: output model path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,22 +160,22 @@ def validate_onnx_model(onnx_model_filename, example_inputs, example_outputs_fla
def optimize_onnx_model(onnx_model_filename, model_type, num_attention_heads, hidden_size, fp16):
optimized_model_filename = onnx_model_filename.replace(".onnx", "_fp16.onnx" if fp16 else "_fp32.onnx")
if not os.path.exists(optimized_model_filename):
import bert_model_optimization as bert_opt
from optimizer import optimize_model
# Use onnxruntime to optimize model, which will be saved to *_ort_cpu.onnx
opt_model = bert_opt.optimize_model(onnx_model_filename,
model_type,
num_heads=num_attention_heads,
hidden_size=hidden_size,
opt_level=99,
only_onnxruntime=True)
opt_model = optimize_model(onnx_model_filename,
model_type,
num_heads=num_attention_heads,
hidden_size=hidden_size,
opt_level=99,
only_onnxruntime=True)
optimize_model_statistics[onnx_model_filename] = opt_model.get_fused_operator_statistics()

# Use script to optimize model.
opt_model = bert_opt.optimize_model(onnx_model_filename,
model_type,
num_heads=num_attention_heads,
hidden_size=hidden_size,
opt_level=0)
opt_model = optimize_model(onnx_model_filename,
model_type,
num_heads=num_attention_heads,
hidden_size=hidden_size,
opt_level=0)
optimize_model_statistics[optimized_model_filename] = opt_model.get_fused_operator_statistics()

if fp16:
Expand Down Expand Up @@ -210,14 +210,14 @@ def export_onnx_model(model_name, cache_dir, input_names, fp16, optimize_onnx, v
dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten)

torch.onnx.export(model=model,
args=tuple(example_inputs.values()),
f=onnx_model_filename,
input_names=list(example_inputs.keys()),
output_names=output_names,
example_outputs=example_outputs,
dynamic_axes=dynamic_axes,
do_constant_folding=True,
opset_version=MODELS[model_name][1])
args=tuple(example_inputs.values()),
f=onnx_model_filename,
input_names=list(example_inputs.keys()),
output_names=output_names,
example_outputs=example_outputs,
dynamic_axes=dynamic_axes,
do_constant_folding=True,
opset_version=MODELS[model_name][1])
else:
logger.info(f"Skip export since model existed: {onnx_model_filename}")

Expand Down Expand Up @@ -481,7 +481,7 @@ def parse_arguments():
"--optimize_onnx",
required=False,
action="store_true",
help="Use bert_model_optimization.py to optimize onnx model")
help="Use optimizer.py to optimize onnx model")

parser.add_argument("-v", "--validate_onnx", required=False, action="store_true", help="Validate ONNX model")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def main():
remove_past_outputs(export_model_path, onnx_model_path)

if args.enable_optimization:
from bert_model_optimization import optimize_model
from optimizer import optimize_model
m = optimize_model(onnx_model_path,
model_type='gpt2',
num_heads=12,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@
"metadata": {},
"outputs": [],
"source": [
"bert_opt_script = os.path.join(bert_tools_dir, 'bert_model_optimization.py')"
"bert_opt_script = os.path.join(bert_tools_dir, 'optimizer.py')"
]
},
{
Expand All @@ -365,7 +365,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Local directory corresponding to https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/bert/\n",
"# Local directory corresponding to https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/\n",
"%run $bert_opt_script --model_type gpt2 --input $onnx_model_path --output $optimized_model --opt_level 0"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -457,10 +457,10 @@
"source": [
"## 5. Offline Optimization Script and Test Tools\n",
"\n",
"It is recommended to download the [OnnxRuntime Python Tools for BERT](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/bert), and try them on the exported ONNX models. It could help verify whether the model is fully optimized, and get performance test results.\n",
"It is recommended to download the [OnnxRuntime Python Tools for BERT](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers), and try them on the exported ONNX models. It could help verify whether the model is fully optimized, and get performance test results.\n",
"\n",
"### Download OnnxRuntime Python Tools for Bert\n",
"You may copy the whole [directory](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/bert) to a sub-directory named bert_scripts for this notebook. The list of script files might need update if import error happens when you run some script."
"You may copy the whole [directory](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers) to a sub-directory named bert_scripts for this notebook. The list of script files might need update if import error happens when you run some script."
]
},
{
Expand All @@ -479,7 +479,7 @@
"100% [..............................................................................] 21565 / 21565Downloaded BertOnnxModelKeras.py\n",
"100% [..............................................................................] 26114 / 26114Downloaded BertOnnxModelTF.py\n",
"100% [..............................................................................] 22773 / 22773Downloaded OnnxModel.py\n",
"100% [................................................................................] 7917 / 7917Downloaded bert_model_optimization.py\n",
"100% [................................................................................] 7917 / 7917Downloaded optimizer.py\n",
"100% [................................................................................] 5478 / 5478Downloaded MachineInfo.py\n"
]
}
Expand All @@ -488,8 +488,8 @@
"import os\n",
"import wget\n",
"\n",
"url_prfix = \"https://raw.githubusercontent.com/microsoft/onnxruntime/master/onnxruntime/python/tools/bert/\"\n",
"script_files = ['bert_perf_test.py', 'bert_test_data.py', 'compare_bert_results.py', 'BertOnnxModel.py', 'BertOnnxModelKeras.py', 'BertOnnxModelTF.py', 'Gpt2OnnxModel.py', 'OnnxModel.py', 'bert_model_optimization.py', 'MachineInfo.py']\n",
"url_prfix = \"https://raw.githubusercontent.com/microsoft/onnxruntime/master/onnxruntime/python/tools/transformers/\"\n",
"script_files = ['bert_perf_test.py', 'bert_test_data.py', 'compare_bert_results.py', 'BertOnnxModel.py', 'BertOnnxModelKeras.py', 'BertOnnxModelTF.py', 'Gpt2OnnxModel.py', 'OnnxModel.py', 'optimizer.py', 'MachineInfo.py']\n",
"\n",
"script_dir = './bert_scripts'\n",
"if not os.path.exists(script_dir):\n",
Expand All @@ -515,7 +515,7 @@
"* The exported model uses dynamic axis and this makes it harder for shape inference of the graph. That blocks some optimization to be applied.\n",
"* Some optimization is better to be done offline. Like change input tensor type from int64 to int32 to avoid extra Cast nodes, or convert model to float16 to achieve better performance in V100 or T4 GPU.\n",
"\n",
"We have python script **bert_model_optimization.py**, which is more flexible in graph pattern matching and model conversion (like float32 to float16). You can also use it to verify whether a Bert model is fully optimized.\n",
"We have python script **optimizer.py**, which is more flexible in graph pattern matching and model conversion (like float32 to float16). You can also use it to verify whether a Bert model is fully optimized.\n",
"\n",
"In this example, we can see that it introduces optimization that is not provided by onnxruntime: SkipLayerNormalization and bias fusion, which is not fused in OnnxRuntime due to shape inference as mentioned.\n",
"\n",
Expand All @@ -531,8 +531,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"bert_model_optimization.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_ort_cpu.onnx\n",
"bert_model_optimization.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_ort_cpu.onnx\n",
"optimizer.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_ort_cpu.onnx\n",
"optimizer.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_ort_cpu.onnx\n",
" BertOnnxModel.py: Fused LayerNormalization count: 0\n",
" BertOnnxModel.py: Fused Reshape count:0\n",
" BertOnnxModel.py: Fused SkipLayerNormalization count: 24\n",
Expand All @@ -542,13 +542,13 @@
" BertOnnxModel.py: opset verion: 11\n",
" OnnxModel.py: Output model to ./onnx/bert-base-cased-squad_opt_cpu.onnx\n",
" BertOnnxModel.py: EmbedLayer=1, Attention=12, Gelu=12, LayerNormalization=24, Succesful=True\n",
"bert_model_optimization.py: The output model is fully optimized.\n"
"optimizer.py: The output model is fully optimized.\n"
]
}
],
"source": [
"optimized_model_path = './onnx/bert-base-cased-squad_opt_cpu.onnx'\n",
"%run ./bert_scripts/bert_model_optimization.py --input $export_model_path --output $optimized_model_path"
"%run ./bert_scripts/optimizer.py --input $export_model_path --output $optimized_model_path"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -576,10 +576,10 @@
"source": [
"## 5. Offline Optimization and Test Tools\n",
"\n",
"It is recommended to download the [OnnxRuntime Python Tools for BERT](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/bert), and try them on the exported ONNX models. It could help verify whether the model is fully optimized, and get performance test results.\n",
"It is recommended to download the [OnnxRuntime Python Tools for BERT](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers), and try them on the exported ONNX models. It could help verify whether the model is fully optimized, and get performance test results.\n",
"\n",
"### Download OnnxRuntime Python Tools for Bert\n",
"You may copy the whole [directory](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/bert) to a sub-directory named bert_scripts for this notebook. The list of script files might need update if import error happens when you run some script."
"You may copy the whole [directory](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers) to a sub-directory named bert_scripts for this notebook. The list of script files might need update if import error happens when you run some script."
]
},
{
Expand All @@ -598,7 +598,7 @@
"100% [..............................................................................] 21565 / 21565Downloaded BertOnnxModelKeras.py\n",
"100% [..............................................................................] 26114 / 26114Downloaded BertOnnxModelTF.py\n",
"100% [..............................................................................] 22773 / 22773Downloaded OnnxModel.py\n",
"100% [................................................................................] 7795 / 7795Downloaded bert_model_optimization.py\n",
"100% [................................................................................] 7795 / 7795Downloaded optimizer.py\n",
"100% [................................................................................] 5885 / 5885Downloaded MachineInfo.py\n"
]
}
Expand All @@ -607,8 +607,8 @@
"import os\n",
"import wget\n",
"\n",
"url_prfix = \"https://raw.githubusercontent.com/microsoft/onnxruntime/master/onnxruntime/python/tools/bert/\"\n",
"script_files = ['bert_perf_test.py', 'bert_test_data.py', 'compare_bert_results.py', 'BertOnnxModel.py', 'BertOnnxModelKeras.py', 'BertOnnxModelTF.py', 'Gpt2OnnxModel.py', 'OnnxModel.py', 'bert_model_optimization.py', 'MachineInfo.py']\n",
"url_prfix = \"https://raw.githubusercontent.com/microsoft/onnxruntime/master/onnxruntime/python/tools/transformers/\"\n",
"script_files = ['bert_perf_test.py', 'bert_test_data.py', 'compare_bert_results.py', 'BertOnnxModel.py', 'BertOnnxModelKeras.py', 'BertOnnxModelTF.py', 'Gpt2OnnxModel.py', 'OnnxModel.py', 'optimizer.py', 'MachineInfo.py']\n",
"\n",
"script_dir = './bert_scripts'\n",
"if not os.path.exists(script_dir):\n",
Expand All @@ -634,7 +634,7 @@
"* The exported model uses dynamic axis. That impacts shape inference. Without enough shape information, some optimization cannot be applied due to the constraint on the input shape.\n",
"* Some optimization are not supported by OnnxRuntime, but it is feasible in offline script. Like changing input tensor type from int64 to int32 to avoid extra Cast nodes, or converting model to float16 to achieve better performance in V100 or T4 GPU.\n",
"\n",
"We have python script **bert_model_optimization.py**, which is flexible in graph pattern matching and model conversions to tackle these problems.\n",
"We have python script **optimizer.py**, which is flexible in graph pattern matching and model conversions to tackle these problems.\n",
"\n",
"In below example, we can see that the tool provide an extra optimization - SkipLayerNormalization and bias (Add) are not fused in OnnxRuntime due to shape inference.\n",
"\n",
Expand All @@ -660,8 +660,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"bert_model_optimization.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
"bert_model_optimization.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
"optimizer.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
"optimizer.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
" BertOnnxModel.py: Fused LayerNormalization count: 0\n",
" BertOnnxModel.py: Fused Reshape count:0\n",
" BertOnnxModel.py: Fused SkipLayerNormalization count: 24\n",
Expand All @@ -671,13 +671,13 @@
" BertOnnxModel.py: opset verion: 11\n",
" OnnxModel.py: Output model to ./onnx/bert-base-cased-squad_opt_gpu_fp32.onnx\n",
" BertOnnxModel.py: EmbedLayer=1, Attention=12, Gelu=12, LayerNormalization=24, Succesful=True\n",
"bert_model_optimization.py: The output model is fully optimized.\n"
"optimizer.py: The output model is fully optimized.\n"
]
}
],
"source": [
"optimized_fp32_model_path = './onnx/bert-base-cased-squad_opt_{}_fp32.onnx'.format('gpu' if use_gpu else 'cpu')\n",
"%run ./bert_scripts/bert_model_optimization.py --input $export_model_path --output $optimized_fp32_model_path --input_int32"
"%run ./bert_scripts/optimizer.py --input $export_model_path --output $optimized_fp32_model_path --input_int32"
]
},
{
Expand Down Expand Up @@ -1117,7 +1117,7 @@
"source": [
"## 6. Model Optimization with Float16\n",
"\n",
"The bert_model_optimization.py script have an option **--float16** to convert model to use float16 to store weights. After the conversion, it could be faster to run in GPU with tensor cores like V100 or T4.\n",
"The optimizer.py script have an option **--float16** to convert model to use float16 to store weights. After the conversion, it could be faster to run in GPU with tensor cores like V100 or T4.\n",
"\n",
"Let's run tools to measure the performance on V100. The results show significant performance improvement: latency is about 3.4 ms for float32 model, and 1.8 ms for float16 model."
]
Expand All @@ -1131,8 +1131,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"bert_model_optimization.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
"bert_model_optimization.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
"optimizer.py: Save optimized model by onnxruntime to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
"optimizer.py: Use OnnxRuntime to optimize and save the optimized model to ./onnx\\bert-base-cased-squad_opset11_ort_gpu.onnx\n",
" BertOnnxModel.py: Fused LayerNormalization count: 0\n",
" BertOnnxModel.py: Fused Reshape count:0\n",
" BertOnnxModel.py: Fused SkipLayerNormalization count: 24\n",
Expand All @@ -1142,13 +1142,13 @@
" BertOnnxModel.py: opset verion: 11\n",
" OnnxModel.py: Output model to ./onnx/bert-base-cased-squad_opt_gpu_fp16.onnx\n",
" BertOnnxModel.py: EmbedLayer=1, Attention=12, Gelu=12, LayerNormalization=24, Succesful=True\n",
"bert_model_optimization.py: The output model is fully optimized.\n"
"optimizer.py: The output model is fully optimized.\n"
]
}
],
"source": [
"optimized_fp16_model_path = './onnx/bert-base-cased-squad_opt_{}_fp16.onnx'.format('gpu' if use_gpu else 'cpu')\n",
"%run ./bert_scripts/bert_model_optimization.py --input $export_model_path --output $optimized_fp16_model_path --float16 --input_int32"
"%run ./bert_scripts/optimizer.py --input $export_model_path --output $optimized_fp16_model_path --float16 --input_int32"
]
},
{
Expand Down
Loading

0 comments on commit 782c6c2

Please sign in to comment.