mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary: Change log - Support rectangle cropping, where height and width of clip cropping can be set separately. This is useful when most video resolution is non-square, such as 240p, 360p and 480p where width is significantly larger than height. - Comparisons of training on ucf101 between using 112x112 croppings and using 112x144 cropping. - https://fburl.com/i0rw6y1k - Support 14 multi-cropping per video clip at testing stage to improve classification accuracy. Take left-top, central-top, right-top, left-bottom, central-bottom, right-bottom and central-central croppings as well as their mirrorings. In total, 14 croppings. - Comparisons on the same model trained on UCF-101. Use 1 clip per video - RGB. f41014306, w/o Vs f41014868, w/ multi-cropping: `0.64099 Vs 0.65796` - OF. f41014889, w/o Vs f41014913, w/ multi-cropping: `0.65796 Vs 0.67624` - Support color jittering and color lighting on RGB data for training data augmentation. - Comparisons of training on ucf101 from scratch with and without color jittering and lighting: - https://fburl.com/k69zatul Reviewed By: HengCV Differential Revision: D6962620 fbshipit-source-id: 9b43478945874142727fea351ee04417218e6606
50 lines
1.9 KiB
Python
50 lines
1.9 KiB
Python
# Copyright (c) 2016-present, Facebook, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
##############################################################################
|
|
|
|
## @package tools
|
|
# Module caffe2.python.helpers.tools
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
def image_input(
|
|
model, blob_in, blob_out, order="NCHW", use_gpu_transform=False, **kwargs
|
|
):
|
|
assert 'is_test' in kwargs, "Argument 'is_test' is required"
|
|
if order == "NCHW":
|
|
if (use_gpu_transform):
|
|
kwargs['use_gpu_transform'] = 1 if use_gpu_transform else 0
|
|
# GPU transform will handle NHWC -> NCHW
|
|
outputs = model.net.ImageInput(blob_in, blob_out, **kwargs)
|
|
pass
|
|
else:
|
|
outputs = model.net.ImageInput(
|
|
blob_in, [blob_out[0] + '_nhwc'] + blob_out[1:], **kwargs
|
|
)
|
|
outputs_list = list(outputs)
|
|
outputs_list[0] = model.net.NHWC2NCHW(outputs_list[0], blob_out[0])
|
|
outputs = tuple(outputs_list)
|
|
else:
|
|
outputs = model.net.ImageInput(blob_in, blob_out, **kwargs)
|
|
return outputs
|
|
|
|
|
|
def video_input(model, blob_in, blob_out, **kwargs):
|
|
# size of outputs can vary depending on kwargs
|
|
outputs = model.net.VideoInput(blob_in, blob_out, **kwargs)
|
|
return outputs
|