-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
inference_speed_test.py
50 lines (42 loc) · 1.7 KB
/
inference_speed_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""
python inference_speed_test.py \
--model-variant mobilenetv3 \
--resolution 1920 1080 \
--downsample-ratio 0.25 \
--precision float32
"""
import argparse
import torch
from tqdm import tqdm
from model.model import MattingNetwork
torch.backends.cudnn.benchmark = True
class InferenceSpeedTest:
def __init__(self):
self.parse_args()
self.init_model()
self.loop()
def parse_args(self):
parser = argparse.ArgumentParser()
parser.add_argument('--model-variant', type=str, required=True)
parser.add_argument('--resolution', type=int, required=True, nargs=2)
parser.add_argument('--downsample-ratio', type=float, required=True)
parser.add_argument('--precision', type=str, default='float32')
parser.add_argument('--disable-refiner', action='store_true')
self.args = parser.parse_args()
def init_model(self):
self.device = 'cuda'
self.precision = {'float32': torch.float32, 'float16': torch.float16}[self.args.precision]
self.model = MattingNetwork(self.args.model_variant)
self.model = self.model.to(device=self.device, dtype=self.precision).eval()
self.model = torch.jit.script(self.model)
self.model = torch.jit.freeze(self.model)
def loop(self):
w, h = self.args.resolution
src = torch.randn((1, 3, h, w), device=self.device, dtype=self.precision)
with torch.no_grad():
rec = None, None, None, None
for _ in tqdm(range(1000)):
fgr, pha, *rec = self.model(src, *rec, self.args.downsample_ratio)
torch.cuda.synchronize()
if __name__ == '__main__':
InferenceSpeedTest()