mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
The `usort` config in `pyproject.toml` has no effect due to a typo. Fixing the typo make `usort` do more and generate the changes in the PR. Except `pyproject.toml`, all changes are generated by `lintrunner -a --take UFMT --all-files`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/127123 Approved by: https://github.com/Skylion007 ghstack dependencies: #127122
373 lines
13 KiB
Python
373 lines
13 KiB
Python
#! /usr/bin/env python3
|
|
|
|
import argparse
|
|
import glob
|
|
import json
|
|
import os
|
|
import shutil
|
|
import tarfile
|
|
import tempfile
|
|
|
|
from urllib.request import urlretrieve
|
|
|
|
import boto3
|
|
import numpy as np
|
|
import onnx
|
|
import onnx.backend
|
|
from onnx import numpy_helper
|
|
|
|
import caffe2.python.onnx.backend
|
|
import caffe2.python.onnx.frontend
|
|
import caffe2.python.workspace as c2_workspace
|
|
from caffe2.proto import caffe2_pb2
|
|
|
|
from caffe2.python.models.download import (
|
|
deleteDirectory,
|
|
downloadFromURLToFile,
|
|
getURLFromName,
|
|
)
|
|
|
|
|
|
"""A script converting Caffe2 models to ONNX, and updating ONNX model zoos.
|
|
|
|
Arguments:
|
|
-v, verbose
|
|
--local-dir, where we store the ONNX and Caffe2 models
|
|
--no-cache, ignore existing models in local-dir
|
|
--clean-test-data, delete all the existing test data when updating ONNX model zoo
|
|
--add-test-data, add add-test-data sets of test data for each ONNX model
|
|
--only-local, run locally (for testing purpose)
|
|
|
|
Examples:
|
|
# store the data in /home/username/zoo-dir, delete existing test data, ignore local cache,
|
|
# and generate 3 sets of new test data
|
|
python update-caffe2-models.py --local-dir /home/username/zoo-dir --clean-test-data --no-cache --add-test-data 3
|
|
|
|
"""
|
|
|
|
# TODO: Add GPU support
|
|
|
|
|
|
def upload_onnx_model(model_name, zoo_dir, backup=False, only_local=False):
|
|
if only_local:
|
|
print("No uploading in local only mode.")
|
|
return
|
|
model_dir = os.path.join(zoo_dir, model_name)
|
|
suffix = "-backup" if backup else ""
|
|
if backup:
|
|
print(f"Backing up the previous version of ONNX model {model_name}...")
|
|
rel_file_name = f"{model_name}{suffix}.tar.gz"
|
|
abs_file_name = os.path.join(zoo_dir, rel_file_name)
|
|
print(f"Compressing {model_name} model to {abs_file_name}")
|
|
with tarfile.open(abs_file_name, "w:gz") as f:
|
|
f.add(model_dir, arcname=model_name)
|
|
file_size = os.stat(abs_file_name).st_size
|
|
print(
|
|
f"Uploading {abs_file_name} ({float(file_size) / 1024 / 1024} MB) to s3 cloud..."
|
|
)
|
|
client = boto3.client("s3", "us-east-1")
|
|
transfer = boto3.s3.transfer.S3Transfer(client)
|
|
transfer.upload_file(
|
|
abs_file_name,
|
|
"download.onnx",
|
|
f"models/latest/{rel_file_name}",
|
|
extra_args={"ACL": "public-read"},
|
|
)
|
|
|
|
print(f"Successfully uploaded {rel_file_name} to s3!")
|
|
|
|
|
|
def download_onnx_model(model_name, zoo_dir, use_cache=True, only_local=False):
|
|
model_dir = os.path.join(zoo_dir, model_name)
|
|
if os.path.exists(model_dir):
|
|
if use_cache:
|
|
upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
|
|
return
|
|
else:
|
|
shutil.rmtree(model_dir)
|
|
url = f"https://s3.amazonaws.com/download.onnx/models/latest/{model_name}.tar.gz"
|
|
|
|
download_file = tempfile.NamedTemporaryFile(delete=False)
|
|
try:
|
|
download_file.close()
|
|
print(
|
|
f"Downloading ONNX model {model_name} from {url} and save in {download_file.name} ...\n"
|
|
)
|
|
urlretrieve(url, download_file.name)
|
|
with tarfile.open(download_file.name) as t:
|
|
print(f"Extracting ONNX model {model_name} to {zoo_dir} ...\n")
|
|
t.extractall(zoo_dir)
|
|
except Exception as e:
|
|
print(f"Failed to download/backup data for ONNX model {model_name}: {e}")
|
|
if not os.path.exists(model_dir):
|
|
os.makedirs(model_dir)
|
|
finally:
|
|
os.remove(download_file.name)
|
|
|
|
if not only_local:
|
|
upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
|
|
|
|
|
|
def download_caffe2_model(model_name, zoo_dir, use_cache=True):
|
|
model_dir = os.path.join(zoo_dir, model_name)
|
|
if os.path.exists(model_dir):
|
|
if use_cache:
|
|
return
|
|
else:
|
|
shutil.rmtree(model_dir)
|
|
os.makedirs(model_dir)
|
|
|
|
for f in ["predict_net.pb", "init_net.pb", "value_info.json"]:
|
|
url = getURLFromName(model_name, f)
|
|
dest = os.path.join(model_dir, f)
|
|
try:
|
|
try:
|
|
downloadFromURLToFile(url, dest, show_progress=False)
|
|
except TypeError:
|
|
# show_progress not supported prior to
|
|
# Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1
|
|
# (Sep 17, 2017)
|
|
downloadFromURLToFile(url, dest)
|
|
except Exception as e:
|
|
print(f"Abort: {e}")
|
|
print("Cleaning up...")
|
|
deleteDirectory(model_dir)
|
|
raise
|
|
|
|
|
|
def caffe2_to_onnx(caffe2_model_name, caffe2_model_dir):
|
|
caffe2_init_proto = caffe2_pb2.NetDef()
|
|
caffe2_predict_proto = caffe2_pb2.NetDef()
|
|
|
|
with open(os.path.join(caffe2_model_dir, "init_net.pb"), "rb") as f:
|
|
caffe2_init_proto.ParseFromString(f.read())
|
|
caffe2_init_proto.name = f"{caffe2_model_name}_init"
|
|
with open(os.path.join(caffe2_model_dir, "predict_net.pb"), "rb") as f:
|
|
caffe2_predict_proto.ParseFromString(f.read())
|
|
caffe2_predict_proto.name = caffe2_model_name
|
|
with open(os.path.join(caffe2_model_dir, "value_info.json"), "rb") as f:
|
|
value_info = json.loads(f.read())
|
|
|
|
print(
|
|
f"Converting Caffe2 model {caffe2_model_name} in {caffe2_model_dir} to ONNX format"
|
|
)
|
|
onnx_model = caffe2.python.onnx.frontend.caffe2_net_to_onnx_model(
|
|
init_net=caffe2_init_proto,
|
|
predict_net=caffe2_predict_proto,
|
|
value_info=value_info,
|
|
)
|
|
|
|
return onnx_model, caffe2_init_proto, caffe2_predict_proto
|
|
|
|
|
|
def tensortype_to_ndarray(tensor_type):
|
|
shape = []
|
|
for dim in tensor_type.shape.dim:
|
|
shape.append(dim.dim_value)
|
|
if tensor_type.elem_type == onnx.TensorProto.FLOAT:
|
|
type = np.float32
|
|
elif tensor_type.elem_type == onnx.TensorProto.INT:
|
|
type = np.int32
|
|
else:
|
|
raise
|
|
array = np.random.rand(*shape).astype(type)
|
|
return array
|
|
|
|
|
|
def generate_test_input_data(onnx_model, scale):
|
|
real_inputs_names = list(
|
|
{input.name for input in onnx_model.graph.input}
|
|
- {init.name for init in onnx_model.graph.initializer}
|
|
)
|
|
real_inputs = []
|
|
for name in real_inputs_names:
|
|
for input in onnx_model.graph.input:
|
|
if name == input.name:
|
|
real_inputs.append(input)
|
|
|
|
test_inputs = []
|
|
for input in real_inputs:
|
|
ndarray = tensortype_to_ndarray(input.type.tensor_type)
|
|
test_inputs.append((input.name, ndarray * scale))
|
|
|
|
return test_inputs
|
|
|
|
|
|
def generate_test_output_data(caffe2_init_net, caffe2_predict_net, inputs):
|
|
p = c2_workspace.Predictor(caffe2_init_net, caffe2_predict_net)
|
|
inputs_map = {input[0]: input[1] for input in inputs}
|
|
|
|
output = p.run(inputs_map)
|
|
c2_workspace.ResetWorkspace()
|
|
return output
|
|
|
|
|
|
def onnx_verify(onnx_model, inputs, ref_outputs):
|
|
prepared = caffe2.python.onnx.backend.prepare(onnx_model)
|
|
onnx_inputs = []
|
|
for input in inputs:
|
|
if isinstance(input, tuple):
|
|
onnx_inputs.append(input[1])
|
|
else:
|
|
onnx_inputs.append(input)
|
|
onnx_outputs = prepared.run(inputs=onnx_inputs)
|
|
np.testing.assert_almost_equal(onnx_outputs, ref_outputs, decimal=3)
|
|
|
|
|
|
model_mapping = {
|
|
"bvlc_alexnet": "bvlc_alexnet",
|
|
"bvlc_googlenet": "bvlc_googlenet",
|
|
"bvlc_reference_caffenet": "bvlc_reference_caffenet",
|
|
"bvlc_reference_rcnn_ilsvrc13": "bvlc_reference_rcnn_ilsvrc13",
|
|
"densenet121": "densenet121",
|
|
#'finetune_flickr_style': 'finetune_flickr_style',
|
|
"inception_v1": "inception_v1",
|
|
"inception_v2": "inception_v2",
|
|
"resnet50": "resnet50",
|
|
"shufflenet": "shufflenet",
|
|
"squeezenet": "squeezenet_old",
|
|
#'vgg16': 'vgg16',
|
|
"vgg19": "vgg19",
|
|
"zfnet512": "zfnet512",
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Update the ONNX models.")
|
|
parser.add_argument("-v", action="store_true", default=False, help="verbose")
|
|
parser.add_argument(
|
|
"--local-dir",
|
|
type=str,
|
|
default=os.path.expanduser("~"),
|
|
help="local dir to store Caffe2 and ONNX models",
|
|
)
|
|
parser.add_argument(
|
|
"--no-cache",
|
|
action="store_true",
|
|
default=False,
|
|
help="whether use local ONNX models",
|
|
)
|
|
parser.add_argument(
|
|
"--clean-test-data",
|
|
action="store_true",
|
|
default=False,
|
|
help="remove the old test data",
|
|
)
|
|
parser.add_argument(
|
|
"--add-test-data", type=int, default=0, help="add new test data"
|
|
)
|
|
parser.add_argument(
|
|
"--only-local",
|
|
action="store_true",
|
|
default=False,
|
|
help="no upload including backup",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
delete_test_data = args.clean_test_data
|
|
add_test_data = args.add_test_data
|
|
use_cache = not args.no_cache
|
|
only_local = args.only_local
|
|
|
|
root_dir = args.local_dir
|
|
caffe2_zoo_dir = os.path.join(root_dir, ".caffe2", "models")
|
|
onnx_zoo_dir = os.path.join(root_dir, ".onnx", "models")
|
|
|
|
for onnx_model_name in model_mapping:
|
|
c2_model_name = model_mapping[onnx_model_name]
|
|
|
|
print(
|
|
f"####### Processing ONNX model {onnx_model_name} ({c2_model_name} in Caffe2) #######"
|
|
)
|
|
download_caffe2_model(c2_model_name, caffe2_zoo_dir, use_cache=use_cache)
|
|
download_onnx_model(
|
|
onnx_model_name, onnx_zoo_dir, use_cache=use_cache, only_local=only_local
|
|
)
|
|
|
|
onnx_model_dir = os.path.join(onnx_zoo_dir, onnx_model_name)
|
|
|
|
if delete_test_data:
|
|
print("Deleting all the existing test data...")
|
|
# NB: For now, we don't delete the npz files.
|
|
# for f in glob.glob(os.path.join(onnx_model_dir, '*.npz')):
|
|
# os.remove(f)
|
|
for f in glob.glob(os.path.join(onnx_model_dir, "test_data_set*")):
|
|
shutil.rmtree(f)
|
|
|
|
onnx_model, c2_init_net, c2_predict_net = caffe2_to_onnx(
|
|
c2_model_name, os.path.join(caffe2_zoo_dir, c2_model_name)
|
|
)
|
|
|
|
print(f"Deleteing old ONNX {onnx_model_name} model...")
|
|
for f in glob.glob(os.path.join(onnx_model_dir, "model*".format())):
|
|
os.remove(f)
|
|
|
|
print(f"Serializing generated ONNX {onnx_model_name} model ...")
|
|
with open(os.path.join(onnx_model_dir, "model.onnx"), "wb") as file:
|
|
file.write(onnx_model.SerializeToString())
|
|
|
|
print(f"Verifying model {onnx_model_name} with ONNX model checker...")
|
|
onnx.checker.check_model(onnx_model)
|
|
|
|
total_existing_data_set = 0
|
|
print(f"Verifying model {onnx_model_name} with existing test data...")
|
|
for f in glob.glob(os.path.join(onnx_model_dir, "*.npz")):
|
|
test_data = np.load(f, encoding="bytes")
|
|
inputs = list(test_data["inputs"])
|
|
ref_outputs = list(test_data["outputs"])
|
|
onnx_verify(onnx_model, inputs, ref_outputs)
|
|
total_existing_data_set += 1
|
|
for f in glob.glob(os.path.join(onnx_model_dir, "test_data_set*")):
|
|
inputs = []
|
|
inputs_num = len(glob.glob(os.path.join(f, "input_*.pb")))
|
|
for i in range(inputs_num):
|
|
tensor = onnx.TensorProto()
|
|
with open(os.path.join(f, f"input_{i}.pb"), "rb") as pf:
|
|
tensor.ParseFromString(pf.read())
|
|
inputs.append(numpy_helper.to_array(tensor))
|
|
ref_outputs = []
|
|
ref_outputs_num = len(glob.glob(os.path.join(f, "output_*.pb")))
|
|
for i in range(ref_outputs_num):
|
|
tensor = onnx.TensorProto()
|
|
with open(os.path.join(f, f"output_{i}.pb"), "rb") as pf:
|
|
tensor.ParseFromString(pf.read())
|
|
ref_outputs.append(numpy_helper.to_array(tensor))
|
|
onnx_verify(onnx_model, inputs, ref_outputs)
|
|
total_existing_data_set += 1
|
|
|
|
starting_index = 0
|
|
while os.path.exists(
|
|
os.path.join(onnx_model_dir, f"test_data_set_{starting_index}")
|
|
):
|
|
starting_index += 1
|
|
|
|
if total_existing_data_set == 0 and add_test_data == 0:
|
|
add_test_data = 3
|
|
total_existing_data_set = 3
|
|
|
|
print(f"Generating {add_test_data} sets of new test data...")
|
|
for i in range(starting_index, add_test_data + starting_index):
|
|
data_dir = os.path.join(onnx_model_dir, f"test_data_set_{i}")
|
|
os.makedirs(data_dir)
|
|
inputs = generate_test_input_data(onnx_model, 255)
|
|
ref_outputs = generate_test_output_data(c2_init_net, c2_predict_net, inputs)
|
|
onnx_verify(onnx_model, inputs, ref_outputs)
|
|
for index, input in enumerate(inputs):
|
|
tensor = numpy_helper.from_array(input[1])
|
|
with open(os.path.join(data_dir, f"input_{index}.pb"), "wb") as file:
|
|
file.write(tensor.SerializeToString())
|
|
for index, output in enumerate(ref_outputs):
|
|
tensor = numpy_helper.from_array(output)
|
|
with open(os.path.join(data_dir, f"output_{index}.pb"), "wb") as file:
|
|
file.write(tensor.SerializeToString())
|
|
|
|
del onnx_model
|
|
del c2_init_net
|
|
del c2_predict_net
|
|
|
|
upload_onnx_model(
|
|
onnx_model_name, onnx_zoo_dir, backup=False, only_local=only_local
|
|
)
|
|
|
|
print("\n\n")
|