pytorch/caffe2/python/model_device_test.py

import numpy as np
import unittest
import sys

from caffe2.proto import caffe2_pb2, caffe2_legacy_pb2
from caffe2.python import core, cnn, workspace, device_checker, test_util


class TestMNISTLeNet(test_util.TestCase):

    def _MNISTNetworks(self):
        init_net = core.Net("init")
        filter1 = init_net.XavierFill([], "filter1", shape=[20, 1, 5, 5])
        bias1 = init_net.ConstantFill([], "bias1", shape=[20, ], value=0.0)
        filter2 = init_net.XavierFill([], "filter2", shape=[50, 20, 5, 5])
        bias2 = init_net.ConstantFill([], "bias2", shape=[50, ], value=0.0)
        W3 = init_net.XavierFill([], "W3", shape=[500, 800])
        B3 = init_net.ConstantFill([], "B3", shape=[500], value=0.0)
        W4 = init_net.XavierFill([], "W4", shape=[10, 500])
        B4 = init_net.ConstantFill([], "B4", shape=[10], value=0.0)
        data, label = init_net.TensorProtosDBInput(
            [],
            ["data", "label"],
            batch_size=64,
            db="gen/data/mnist/mnist-train-nchw-minidb",
            db_type="minidb"
        )
        LR = init_net.ConstantFill([], "LR", shape=[1], value=-0.1)
        ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.0)

        train_net = core.Net("train")
        conv1 = train_net.Conv([data, filter1, bias1],
                               "conv1",
                               kernel=5,
                               pad=0,
                               stride=1,
                               order="NCHW")
        pool1 = conv1.MaxPool([], ["pool1"], kernel=2, stride=2, order="NCHW")
        conv2 = pool1.Conv([filter2, bias2],
                           "conv2",
                           kernel=5,
                           pad=0,
                           stride=1,
                           order="NCHW")
        pool2 = conv2.MaxPool([], ["pool2"], kernel=2, stride=2, order="NCHW")
        flatten2 = pool2.Flatten([], "pool2_flatten")
        softmax = (
            flatten2.FC([W3, B3], "fc3")
            .Relu([], "fc3_relu").FC(
                [W4, B4], "pred").Softmax(
                    [], "softmax")
        )
        # Cross entropy, and accuracy
        xent = softmax.LabelCrossEntropy([label], "xent")
        # The loss function.
        loss = xent.AveragedLoss([], ["loss"])
        # Get gradient, skipping the input and flatten layers.
        grad_map = train_net.AddGradientOperators()
        accuracy = softmax.Accuracy([label], "accuracy")
        # parameter update.
        for param in [filter1, bias1, filter2, bias2, W3, B3, W4, B4]:
            train_net.WeightedSum([param, ONE, grad_map[str(param)], LR], param)
        return init_net, train_net

    def testMNISTNetworks(self):
        # First, we get all the random initialization of parameters.
        init_net, train_net = self._MNISTNetworks()
        workspace.ResetWorkspace()
        workspace.RunNetOnce(init_net)
        inputs = dict(
            [
                (str(name), workspace.FetchBlob(str(name))
                ) for name in workspace.Blobs()
            ]
        )
        cpu_device = caffe2_pb2.DeviceOption()
        cpu_device.device_type = caffe2_pb2.CPU
        gpu_device = caffe2_pb2.DeviceOption()
        gpu_device.device_type = caffe2_pb2.CUDA

        checker = device_checker.DeviceChecker(1e-2, [cpu_device, gpu_device])
        ret = checker.CheckNet(train_net.Proto(), inputs)
        self.assertEqual(ret, True)


class TestMiniAlexNet(test_util.TestCase):

    def _MiniAlexNetNoDropout(self, order):
        # First, AlexNet using the cnn wrapper.
        model = cnn.CNNModelHelper(order, name="alexnet")
        conv1 = model.Conv(
            "data",
            "conv1",
            3,
            16,
            11,
            ("XavierFill", {}),
            ("ConstantFill", {}),
            stride=4,
            pad=0
        )
        relu1 = model.Relu(conv1, "relu1")
        norm1 = model.LRN(relu1, "norm1", size=5, alpha=0.0001, beta=0.75)
        pool1 = model.MaxPool(norm1, "pool1", kernel=3, stride=2)
        conv2 = model.GroupConv(
            pool1,
            "conv2",
            16,
            32,
            5,
            ("XavierFill", {}),
            ("ConstantFill", {"value": 0.1}),
            group=2,
            stride=1,
            pad=2
        )
        relu2 = model.Relu(conv2, "relu2")
        norm2 = model.LRN(relu2, "norm2", size=5, alpha=0.0001, beta=0.75)
        pool2 = model.MaxPool(norm2, "pool2", kernel=3, stride=2)
        conv3 = model.Conv(
            pool2,
            "conv3",
            32,
            64,
            3,
            ("XavierFill", {'std': 0.01}),
            ("ConstantFill", {}),
            pad=1
        )
        relu3 = model.Relu(conv3, "relu3")
        conv4 = model.GroupConv(
            relu3,
            "conv4",
            64,
            64,
            3,
            ("XavierFill", {}),
            ("ConstantFill", {"value": 0.1}),
            group=2,
            pad=1
        )
        relu4 = model.Relu(conv4, "relu4")
        conv5 = model.GroupConv(
            relu4,
            "conv5",
            64,
            32,
            3,
            ("XavierFill", {}),
            ("ConstantFill", {"value": 0.1}),
            group=2,
            pad=1
        )
        relu5 = model.Relu(conv5, "relu5")
        pool5 = model.MaxPool(relu5, "pool5", kernel=3, stride=2)
        fc6 = model.FC(
            pool5, "fc6", 1152, 1024, ("XavierFill", {}),
            ("ConstantFill", {"value": 0.1})
        )
        relu6 = model.Relu(fc6, "relu6")
        fc7 = model.FC(
            relu6, "fc7", 1024, 1024, ("XavierFill", {}),
            ("ConstantFill", {"value": 0.1})
        )
        relu7 = model.Relu(fc7, "relu7")
        fc8 = model.FC(
            relu7, "fc8", 1024, 5, ("XavierFill", {}),
            ("ConstantFill", {"value": 0.0})
        )
        pred = model.Softmax(fc8, "pred")
        xent = model.LabelCrossEntropy([pred, "label"], "xent")
        loss = model.AveragedLoss([xent], ["loss"])
        model.AddGradientOperators()
        return model

    def _testMiniAlexNet(self, order):
        # First, we get all the random initialization of parameters.
        model = self._MiniAlexNetNoDropout(order)
        workspace.ResetWorkspace()
        workspace.RunNetOnce(model.param_init_net)
        inputs = dict(
            [(str(name), workspace.FetchBlob(str(name))) for name in
             model.params]
        )
        if order == "NCHW":
            inputs["data"] = np.random.rand(4, 3, 227, 227).astype(np.float32)
        else:
            inputs["data"] = np.random.rand(4, 227, 227, 3).astype(np.float32)
        inputs["label"] = np.array([1, 2, 3, 4]).astype(np.int32)

        cpu_device = caffe2_pb2.DeviceOption()
        cpu_device.device_type = caffe2_pb2.CPU
        gpu_device = caffe2_pb2.DeviceOption()
        gpu_device.device_type = caffe2_pb2.CUDA

        checker = device_checker.DeviceChecker(1e-2, [cpu_device, gpu_device])
        ret = checker.CheckNet(
            model.net.Proto(),
            inputs,
            # The indices sometimes may be sensitive to small numerical
            # differences in the input, so we ignore checking them.
            ignore=['_pool1_idx', '_pool2_idx', '_pool5_idx']
        )
        self.assertEqual(ret, True)

    def testMiniAlexNet(self):
        self._testMiniAlexNet("NCHW")
        self._testMiniAlexNet("NHWC")


if __name__ == '__main__':
    if not workspace.has_gpu_support:
        print('No GPU support. Skipping gpu test.')
    elif workspace.NumberOfGPUs() == 0:
        print('No GPU device. Skipping gpu test.')
    else:
        unittest.main()