mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Add 'torch/lib/THNN/' from commit '4fe7059a315d156ecd080ff7bd5b4fe3d3a9efad'
git-subtree-dir: torch/lib/THNN git-subtree-mainline:c3f0c1e2e0git-subtree-split:4fe7059a31
This commit is contained in:
commit
035eb28e18
65
torch/lib/THNN/CMakeLists.txt
Normal file
65
torch/lib/THNN/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
|
||||
CMAKE_POLICY(VERSION 2.6)
|
||||
|
||||
IF(NOT Torch_FOUND)
|
||||
FIND_PACKAGE(Torch REQUIRED)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT THNN_INSTALL_LIB_SUBDIR)
|
||||
SET(THNN_INSTALL_LIB_SUBDIR "lib" CACHE PATH "THNN install library directory")
|
||||
ENDIF()
|
||||
|
||||
# Flags
|
||||
# When using MSVC
|
||||
IF(MSVC)
|
||||
# we want to respect the standard, and we are bored of those **** .
|
||||
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1)
|
||||
ENDIF(MSVC)
|
||||
|
||||
IF (CMAKE_VERSION VERSION_LESS "3.1")
|
||||
SET(CMAKE_C_FLAGS "-std=c99 ${CMAKE_C_FLAGS}")
|
||||
ELSE ()
|
||||
SET(CMAKE_C_STANDARD 99)
|
||||
ENDIF ()
|
||||
|
||||
# OpenMP support?
|
||||
SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?")
|
||||
IF (APPLE AND CMAKE_COMPILER_IS_GNUCC)
|
||||
EXEC_PROGRAM (uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
|
||||
STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
|
||||
MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}")
|
||||
IF (DARWIN_VERSION GREATER 9)
|
||||
SET(APPLE_OPENMP_SUCKS 1)
|
||||
ENDIF (DARWIN_VERSION GREATER 9)
|
||||
EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion
|
||||
OUTPUT_VARIABLE GCC_VERSION)
|
||||
IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2)
|
||||
MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)")
|
||||
MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP")
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unknown-pragmas")
|
||||
SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE)
|
||||
ENDIF ()
|
||||
ENDIF ()
|
||||
|
||||
IF (WITH_OPENMP)
|
||||
FIND_PACKAGE(OpenMP)
|
||||
IF(OPENMP_FOUND)
|
||||
MESSAGE(STATUS "Compiling with OpenMP support")
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
||||
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
|
||||
ENDIF(OPENMP_FOUND)
|
||||
ENDIF (WITH_OPENMP)
|
||||
|
||||
LINK_DIRECTORIES("${Torch_INSTALL_LIB}")
|
||||
|
||||
SET(src init.c)
|
||||
ADD_LIBRARY(THNN MODULE init.c)
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
### Torch packages supposes libraries prefix is "lib"
|
||||
SET_TARGET_PROPERTIES(THNN PROPERTIES
|
||||
PREFIX "lib"
|
||||
IMPORT_PREFIX "lib")
|
||||
TARGET_LINK_LIBRARIES(THNN TH)
|
||||
|
||||
INSTALL(TARGETS THNN LIBRARY DESTINATION ${THNN_INSTALL_LIB_SUBDIR})
|
||||
32
torch/lib/THNN/README.md
Normal file
32
torch/lib/THNN/README.md
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# THNN
|
||||
|
||||
THNN is a library that gathers nn's C implementations of neural network modules. It's entirely free of Lua dependency and therefore can be used in any application that has a C FFI. Please note that it only contains quite low level functions, and an object oriented C/C++ wrapper will be created soon as another library.
|
||||
|
||||
There is also a CUDA counterpart of THNN (THCUNN) in the [cunn repository](https://github.com/torch/cunn/tree/master/lib/THCUNN).
|
||||
|
||||
## Links
|
||||
|
||||
* [API reference](doc/api_reference.md)
|
||||
* [Style guidelines](doc/style_guidelines.md)
|
||||
|
||||
## Motivation
|
||||
|
||||
Torch's neural network package (nn) provided many optimized C implementations of modules, but the source files contained Lua specific code and headers so they couldn't be easily compiled and included anywhere else.
|
||||
|
||||
THNN is based on the same code, but is written in pure C, so it can be easily included in other code. **Future C implementations should be committed to THNN.**
|
||||
|
||||
## API
|
||||
|
||||
THNN is a purely functional library. It provides 2-3 functions for each module, that perform the most important operations:
|
||||
|
||||
* **updateOutput** - applies the module to an input
|
||||
* **updateGradInput** - accepts gradient w.r.t. output and previous module input, and computes a gradient w.r.t. that input
|
||||
* **accGradParameters** - *(optional, only modules with parameters)* accepts gradient w.r.t. output and previous module input, and computes gradient w.r.t. the parameters
|
||||
|
||||
For information on argument types please check the [API reference](doc/api_reference.md).
|
||||
|
||||
## Developer docs
|
||||
|
||||
* [Style guidelines](doc/style_guidelines.md)
|
||||
|
||||
This section will be expanded when FFI refactoring will be finished.
|
||||
25
torch/lib/THNN/THNN.h
Normal file
25
torch/lib/THNN/THNN.h
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#ifndef THNN_H
|
||||
#define THNN_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <TH.h>
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#define THNN_(NAME) TH_CONCAT_3(THNN_, Real, NAME)
|
||||
|
||||
#define THIndexTensor THLongTensor
|
||||
#define THIndexTensor_(NAME) THLongTensor_ ## NAME
|
||||
|
||||
#define THIntegerTensor THIntTensor
|
||||
#define THIntegerTensor_(NAME) THIntTensor_ ## NAME
|
||||
|
||||
typedef long THIndex_t;
|
||||
typedef int THInteger_t;
|
||||
typedef void THNNState;
|
||||
|
||||
#include "generic/THNN.h"
|
||||
#include <THGenerateFloatTypes.h>
|
||||
|
||||
#endif
|
||||
1509
torch/lib/THNN/doc/api_reference.md
Normal file
1509
torch/lib/THNN/doc/api_reference.md
Normal file
File diff suppressed because it is too large
Load Diff
106
torch/lib/THNN/doc/generate_reference.lua
Normal file
106
torch/lib/THNN/doc/generate_reference.lua
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
--[[
|
||||
This script regenerates api_reference.md based on comments placed in THNN.h.
|
||||
]]--
|
||||
|
||||
local header = [[
|
||||
# API docs
|
||||
|
||||
This document only describes a THNN API. For a thorough review of all modules present here please refer to [nn's docs](http://github.com/torch/nn/tree/master/doc).
|
||||
|
||||
### Note on function names
|
||||
|
||||
Please remember, that because C doesn't support function overloading, functions taking different tensor types have different names. So e.g. for an Abs module, there are actually two updateOutput functions:
|
||||
|
||||
* `void THNN_FloatAbs_updateOutput(...)`
|
||||
* `void THNN_DoubleAbs_updateOutput(...)`
|
||||
|
||||
In these docs such function will be referred to as `void THNN_Abs_updateOutput(...)`, and it's up to developer to add a type prefix. `real` is an alias for that type.
|
||||
|
||||
### Argument types
|
||||
|
||||
Some arguments have additional tags placed in square brackets:
|
||||
* **[OUT]** - This is the output argument. It will be reshaped if needed.
|
||||
* **[OPTIONAL]** - This argument is optional and can be safely set to NULL
|
||||
* **[BUFFER]** - A buffer. `updateGradInput` and `accGradParameters` should get the same buffers that were used in `updateOutput` call.
|
||||
* **[MODIFIED]** - Some functions accept an `inplace` flag. If set to true, this argument might be modified (in addition to the output).
|
||||
|
||||
## Module list
|
||||
|
||||
These are all modules implemented in THNN:
|
||||
|
||||
]]
|
||||
|
||||
local hfile = io.open('../generic/THNN.h', 'r')
|
||||
local lines = hfile:read('*a'):split('\n')
|
||||
hfile:close()
|
||||
|
||||
-- Parse input
|
||||
local declarations = {}
|
||||
local current_declaration
|
||||
local declaration_module
|
||||
for i,line in ipairs(lines) do
|
||||
if line:sub(1, 6) == 'TH_API' then
|
||||
current_declaration = ''
|
||||
declaration_module = line:match('THNN_%((.+)_.+%)')
|
||||
end
|
||||
|
||||
if current_declaration then
|
||||
current_declaration = current_declaration .. line .. '\n'
|
||||
end
|
||||
|
||||
if line:match('%);') then
|
||||
current_declaration = current_declaration:sub(1, -2) -- remove a trailing newline
|
||||
declarations[declaration_module] = declarations[declaration_module] or {}
|
||||
table.insert(declarations[declaration_module], current_declaration)
|
||||
current_declaration = nil
|
||||
declaration_module = nil
|
||||
end
|
||||
end
|
||||
declarations["unfolded"] = nil
|
||||
|
||||
-- Sort modules
|
||||
modules = {}
|
||||
for k,_ in pairs(declarations) do table.insert(modules, k) end
|
||||
table.sort(modules)
|
||||
|
||||
-- Create an index
|
||||
local outfile = io.open('api_reference.md', 'w')
|
||||
outfile:write(header)
|
||||
for i, name in ipairs(modules) do
|
||||
outfile:write(string.format('* [%s](#%s)\n', name, name:lower()))
|
||||
end
|
||||
outfile:write('\n')
|
||||
|
||||
-- Write proper docs
|
||||
for i,name in ipairs(modules) do
|
||||
outfile:write('## ' .. name ..'\n')
|
||||
|
||||
for i,declaration in ipairs(declarations[name]) do
|
||||
|
||||
-- Write source code
|
||||
outfile:write('```C' .. '\n')
|
||||
local declaration_lines = declaration:split('\n')
|
||||
for i, line in ipairs(declaration_lines) do
|
||||
if i == 1 then
|
||||
line = line:gsub('TH_API ', ''):gsub('%(', ''):gsub('%)', '') .. '(' -- remove macro junk
|
||||
else
|
||||
line = line:gsub('%s*//.*$', '') -- remove the comment
|
||||
end
|
||||
outfile:write(line .. '\n')
|
||||
end
|
||||
outfile:write('```' .. '\n')
|
||||
|
||||
-- Describe arguments
|
||||
table.remove(declaration_lines, 1)
|
||||
for i,line in ipairs(declaration_lines) do
|
||||
local param, comment = line:match('^%s*(.*),%s*// (.*)$')
|
||||
if param == nil then param, comment = line:match('^%s*(.*)%);%s*// (.*)$') end
|
||||
|
||||
if param ~= nil then
|
||||
comment = comment:gsub('%[', '%*%*%['):gsub('%]', '%]%*%*') -- use bold font for tags
|
||||
outfile:write(string.format('`%s` - %s\n<br/>\n', param, comment))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
outfile:close()
|
||||
59
torch/lib/THNN/doc/style_guidelines.md
Normal file
59
torch/lib/THNN/doc/style_guidelines.md
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
## API design guidelines
|
||||
|
||||
Functions should return `void`.
|
||||
|
||||
All functions should accept arguments in the following order. `...` represent any module-specific parameters or buffers, disregarding whether they are used for writing or reading. Arguments in `...` below should be ordered like this:
|
||||
```
|
||||
[weight], [bias], [any buffers], [additional arguments], [optional arguments]
|
||||
```
|
||||
|
||||
### Modules
|
||||
```
|
||||
updateOutput: state, input, output, ...
|
||||
updateGradInput: state, input, gradOutput, gradInput, ...
|
||||
accGradParameters: state, input, gradOutput, [gradWeight], [gradBias], ...
|
||||
```
|
||||
|
||||
e.g.
|
||||
```C
|
||||
void THNN_(HardShrink_updateGradInput)(
|
||||
THNNState* state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
real lambda)
|
||||
```
|
||||
|
||||
### Criterions
|
||||
```
|
||||
updateOutput: state, input, target, output, ...
|
||||
updateGradInput: state, input, target, gradInput, ...
|
||||
```
|
||||
|
||||
e.g.
|
||||
|
||||
```C
|
||||
void THNN_(ClassNLLCriterion_updateOutput)(
|
||||
THNNState* state,
|
||||
THTensor *input,
|
||||
THLongTensor *target,
|
||||
THTensor *output,
|
||||
THTensor *weights,
|
||||
THTensor *total_weight,
|
||||
bool sizeAverage)
|
||||
```
|
||||
|
||||
## Code style guide
|
||||
|
||||
```C
|
||||
void THNN_Linear_updateOutput(
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias);
|
||||
//<- 10 ->
|
||||
```
|
||||
|
||||
All arguments should start on a new line after function name, and they should be indented using 10 spaces.
|
||||
|
||||
Use 2 spaces for block indentation.
|
||||
27
torch/lib/THNN/generic/Abs.c
Normal file
27
torch/lib/THNN/generic/Abs.c
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/Abs.c"
|
||||
#else
|
||||
|
||||
void THNN_(Abs_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output)
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
THTensor_(abs)(output, input);
|
||||
}
|
||||
|
||||
void THNN_(Abs_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
|
||||
real z = *input_data;
|
||||
*gradInput_data = *gradOutput_data * (z >= 0 ? 1 : -1);
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
39
torch/lib/THNN/generic/AbsCriterion.c
Normal file
39
torch/lib/THNN/generic/AbsCriterion.c
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/AbsCriterion.c"
|
||||
#else
|
||||
|
||||
void THNN_(AbsCriterion_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *output,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real sum = 0;
|
||||
|
||||
TH_TENSOR_APPLY2(real, input, real, target,
|
||||
sum += fabs(*input_data - *target_data);
|
||||
);
|
||||
|
||||
if (sizeAverage)
|
||||
sum /= THTensor_(nElement)(input);
|
||||
|
||||
THTensor_(set1d)(output, 0, sum);
|
||||
}
|
||||
|
||||
void THNN_(AbsCriterion_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *gradInput,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
|
||||
*gradInput_data = (*input_data - *target_data) >= 0 ? norm : -norm;
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
144
torch/lib/THNN/generic/BatchNormalization.c
Normal file
144
torch/lib/THNN/generic/BatchNormalization.c
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/BatchNormalization.c"
|
||||
#else
|
||||
|
||||
void THNN_(BatchNormalization_updateOutput)(
|
||||
THNNState *state, THTensor *input, THTensor *output,
|
||||
THTensor *weight, THTensor *bias,
|
||||
THTensor *running_mean, THTensor *running_var,
|
||||
THTensor *save_mean, THTensor *save_std,
|
||||
bool train, double momentum, double eps)
|
||||
{
|
||||
long nInput = THTensor_(size)(input, 1);
|
||||
long f,n = THTensor_(nElement)(input) / nInput;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (f = 0; f < nInput; ++f) {
|
||||
THTensor *in = THTensor_(newSelect)(input, 1, f);
|
||||
THTensor *out = THTensor_(newSelect)(output, 1, f);
|
||||
|
||||
real mean, invstd;
|
||||
|
||||
if (train) {
|
||||
// compute mean per input
|
||||
accreal sum = 0;
|
||||
TH_TENSOR_APPLY(real, in, sum += *in_data;);
|
||||
|
||||
mean = (real) sum / n;
|
||||
THTensor_(set1d)(save_mean, f, (real) mean);
|
||||
|
||||
// compute variance per input
|
||||
sum = 0;
|
||||
TH_TENSOR_APPLY(real, in,
|
||||
sum += (*in_data - mean) * (*in_data - mean););
|
||||
|
||||
if (sum == 0 && eps == 0.0) {
|
||||
invstd = 0;
|
||||
} else {
|
||||
invstd = (real) (1 / sqrt(sum/n + eps));
|
||||
}
|
||||
THTensor_(set1d)(save_std, f, (real) invstd);
|
||||
|
||||
// update running averages
|
||||
THTensor_(set1d)(running_mean, f,
|
||||
(real) (momentum * mean + (1 - momentum) * THTensor_(get1d)(running_mean, f)));
|
||||
|
||||
accreal unbiased_var = sum / (n - 1);
|
||||
THTensor_(set1d)(running_var, f,
|
||||
(real) (momentum * unbiased_var + (1 - momentum) * THTensor_(get1d)(running_var, f)));
|
||||
} else {
|
||||
mean = THTensor_(get1d)(running_mean, f);
|
||||
invstd = 1 / sqrt(THTensor_(get1d)(running_var, f) + eps);
|
||||
}
|
||||
|
||||
// compute output
|
||||
real w = weight ? THTensor_(get1d)(weight, f) : 1;
|
||||
real b = bias ? THTensor_(get1d)(bias, f) : 0;
|
||||
|
||||
TH_TENSOR_APPLY2(real, in, real, out,
|
||||
*out_data = (real) (((*in_data - mean) * invstd) * w + b););
|
||||
|
||||
THTensor_(free)(out);
|
||||
THTensor_(free)(in);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(BatchNormalization_backward)(
|
||||
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput,
|
||||
THTensor *gradWeight, THTensor *gradBias, THTensor *weight,
|
||||
THTensor *running_mean, THTensor *running_var,
|
||||
THTensor *save_mean, THTensor *save_std,
|
||||
bool train, double scale, double eps)
|
||||
{
|
||||
long nInput = THTensor_(size)(input, 1);
|
||||
long f,n = THTensor_(nElement)(input) / nInput;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (f = 0; f < nInput; ++f) {
|
||||
THTensor *in = THTensor_(newSelect)(input, 1, f);
|
||||
THTensor *gradOut = THTensor_(newSelect)(gradOutput, 1, f);
|
||||
real w = weight ? THTensor_(get1d)(weight, f) : 1;
|
||||
real mean, invstd;
|
||||
if (train) {
|
||||
mean = THTensor_(get1d)(save_mean, f);
|
||||
invstd = THTensor_(get1d)(save_std, f);
|
||||
} else {
|
||||
mean = THTensor_(get1d)(running_mean, f);
|
||||
invstd = 1 / sqrt(THTensor_(get1d)(running_var, f) + eps);
|
||||
}
|
||||
|
||||
// sum over all gradOutput in feature plane
|
||||
accreal sum = 0;
|
||||
TH_TENSOR_APPLY(real, gradOut, sum += *gradOut_data;);
|
||||
|
||||
// dot product of the Q(X) and gradOuput
|
||||
accreal dotp = 0;
|
||||
TH_TENSOR_APPLY2(real, in, real, gradOut,
|
||||
dotp += (*in_data - mean) * (*gradOut_data););
|
||||
|
||||
if (gradInput) {
|
||||
THTensor *gradIn = THTensor_(newSelect)(gradInput, 1, f);
|
||||
|
||||
if (train) {
|
||||
// when in training mode
|
||||
// Q(X) = X - E[x] ; i.e. input centered to zero mean
|
||||
// Y = Q(X) / σ ; i.e. BN output before weight and bias
|
||||
// dL/dX = (Q(dL/dY) - dot(Y, dL/dY) * Y) / σ * w
|
||||
|
||||
// projection of gradOutput on to output scaled by std
|
||||
real k = (real) dotp * invstd * invstd / n;
|
||||
TH_TENSOR_APPLY2(real, gradIn, real, in,
|
||||
*gradIn_data = (*in_data - mean) * k;);
|
||||
|
||||
accreal gradMean = sum / n;
|
||||
TH_TENSOR_APPLY2(real, gradIn, real, gradOut,
|
||||
*gradIn_data = (*gradOut_data - gradMean - *gradIn_data) * invstd * w;);
|
||||
|
||||
} else {
|
||||
// when in evaluation mode
|
||||
// Q(X) = X - running_mean ; i.e. input centered to zero mean
|
||||
// Y = Q(X) / running_std ; i.e. BN output before weight and bias
|
||||
// dL/dX = w / running_std
|
||||
TH_TENSOR_APPLY2(real, gradIn, real, gradOut,
|
||||
*gradIn_data = *gradOut_data * invstd * w;);
|
||||
}
|
||||
|
||||
THTensor_(free)(gradIn);
|
||||
}
|
||||
|
||||
if (gradWeight) {
|
||||
real val = THTensor_(get1d)(gradWeight, f);
|
||||
THTensor_(set1d)(gradWeight, f, val + scale * dotp * invstd);
|
||||
}
|
||||
|
||||
if (gradBias) {
|
||||
real val = THTensor_(get1d)(gradBias, f);
|
||||
THTensor_(set1d)(gradBias, f, val + scale * sum);
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOut);
|
||||
THTensor_(free)(in);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
147
torch/lib/THNN/generic/ClassNLLCriterion.c
Normal file
147
torch/lib/THNN/generic/ClassNLLCriterion.c
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/ClassNLLCriterion.c"
|
||||
#else
|
||||
|
||||
void THNN_(ClassNLLCriterion_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THIndexTensor *target,
|
||||
THTensor *output,
|
||||
bool sizeAverage,
|
||||
THTensor *weights,
|
||||
THTensor *total_weight)
|
||||
{
|
||||
int n_dims = THTensor_(nDimension)(input);
|
||||
int n_classes = THTensor_(size)(input, n_dims - 1);
|
||||
|
||||
if (THIndexTensor_(nDimension)(target) > 1) {
|
||||
THError("multi-target not supported");
|
||||
}
|
||||
if (THTensor_(nDimension)(input) > 2) {
|
||||
THError("input tensor should be 1D or 2D");
|
||||
}
|
||||
if (weights && THTensor_(nElement)(weights) != n_classes) {
|
||||
THError("weight tensor should be defined either for all or no classes");
|
||||
}
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
target = THIndexTensor_(newContiguous)(target);
|
||||
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
|
||||
|
||||
real *input_data = THTensor_(data)(input);
|
||||
THIndex_t *target_data = THIndexTensor_(data)(target);
|
||||
real *weights_data = weights ? THTensor_(data)(weights) : NULL;
|
||||
real *output_data = THTensor_(data)(output);
|
||||
real *total_weight_data = THTensor_(data)(total_weight);
|
||||
|
||||
output_data[0] = total_weight_data[0] = 0.0;
|
||||
|
||||
if (THTensor_(nDimension)(input) == 1) {
|
||||
int cur_target = target_data[0] - TH_INDEX_BASE;
|
||||
THAssert(cur_target >= 0 && cur_target < n_classes);
|
||||
total_weight_data[0] = weights ? weights_data[cur_target] : 1.0f;
|
||||
output_data[0] = -input_data[cur_target] * total_weight_data[0];
|
||||
} else if (THTensor_(nDimension)(input) == 2) {
|
||||
int batch_size = THTensor_(size)(input, 0);
|
||||
THAssert(THIndexTensor_(size)(target, 0) == batch_size);
|
||||
|
||||
int n_target = THTensor_(size)(input, 1);
|
||||
|
||||
int i;
|
||||
for (i = 0; i < batch_size; i++) {
|
||||
int cur_target = target_data[i] - TH_INDEX_BASE;
|
||||
THAssert(cur_target >= 0 && cur_target < n_classes);
|
||||
|
||||
real cur_weight = weights ? weights_data[cur_target] : 1.0f;
|
||||
total_weight_data[0] += cur_weight;
|
||||
output_data[0] -= input_data[i * n_target + cur_target] * cur_weight;
|
||||
}
|
||||
}
|
||||
|
||||
if (sizeAverage && total_weight_data[0]) {
|
||||
output_data[0] /= total_weight_data[0];
|
||||
}
|
||||
|
||||
if (weights) {
|
||||
THTensor_(free)(weights);
|
||||
}
|
||||
THTensor_(free)(input);
|
||||
THIndexTensor_(free)(target);
|
||||
}
|
||||
|
||||
void THNN_(ClassNLLCriterion_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THIndexTensor *target,
|
||||
THTensor *gradInput,
|
||||
bool sizeAverage,
|
||||
THTensor *weights,
|
||||
THTensor *total_weight)
|
||||
{
|
||||
int n_dims = THTensor_(nDimension)(input);
|
||||
int n_classes = THTensor_(size)(input, n_dims - 1);
|
||||
|
||||
if (!THTensor_(isContiguous)(gradInput)) {
|
||||
THError("gradInput must be contiguous");
|
||||
}
|
||||
|
||||
real *total_weight_data = THTensor_(data)(total_weight);
|
||||
|
||||
if (!(*total_weight_data > 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (THIndexTensor_(nDimension)(target) > 1) {
|
||||
THError("multi-target not supported");
|
||||
}
|
||||
|
||||
if (THTensor_(nDimension)(input) > 2) {
|
||||
THError("input tensor should be 1D or 2D");
|
||||
}
|
||||
|
||||
if (weights && THTensor_(nElement)(weights) != n_classes) {
|
||||
THError("weight tensor should be defined either for all or no classes");
|
||||
}
|
||||
|
||||
target = THIndexTensor_(newContiguous)(target);
|
||||
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
|
||||
|
||||
THIndex_t *target_data = THIndexTensor_(data)(target);
|
||||
real *weights_data = weights ? THTensor_(data)(weights) : NULL;
|
||||
real *gradInput_data = THTensor_(data)(gradInput);
|
||||
|
||||
if (THTensor_(nDimension)(input) == 1) {
|
||||
int cur_target = target_data[0] - TH_INDEX_BASE;
|
||||
THAssert(cur_target >= 0 && cur_target < n_classes);
|
||||
|
||||
gradInput_data[cur_target] =
|
||||
(!sizeAverage && weights) ? -weights_data[cur_target] : -1;
|
||||
|
||||
} else if (THTensor_(nDimension)(input) == 2) {
|
||||
int batch_size = THTensor_(size)(input, 0);
|
||||
THAssert(THIndexTensor_(size)(target, 0) == batch_size);
|
||||
|
||||
int n_target = THTensor_(size)(input, 1);
|
||||
|
||||
int i;
|
||||
for (i = 0; i < batch_size; i++){
|
||||
int cur_target = target_data[i] - TH_INDEX_BASE;
|
||||
|
||||
THAssert(cur_target >= 0 && cur_target < n_classes);
|
||||
|
||||
gradInput_data[i * n_target + cur_target] =
|
||||
-(weights ? weights_data[cur_target] : 1.0f);
|
||||
|
||||
if (sizeAverage && *total_weight_data) {
|
||||
gradInput_data[i * n_target + cur_target] /= *total_weight_data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
THIndexTensor_(free)(target);
|
||||
if (weights) {
|
||||
THTensor_(free)(weights);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
39
torch/lib/THNN/generic/DistKLDivCriterion.c
Normal file
39
torch/lib/THNN/generic/DistKLDivCriterion.c
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/DistKLDivCriterion.c"
|
||||
#else
|
||||
|
||||
void THNN_(DistKLDivCriterion_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *output,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real sum = 0;
|
||||
|
||||
TH_TENSOR_APPLY2(real, input, real, target,
|
||||
sum += *target_data > 0 ? *target_data * (log(*target_data) - *input_data) : 0;
|
||||
);
|
||||
|
||||
if (sizeAverage)
|
||||
sum /= THTensor_(nElement)(input);
|
||||
|
||||
THTensor_(set1d)(output, 0, sum);
|
||||
}
|
||||
|
||||
void THNN_(DistKLDivCriterion_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *gradInput,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
|
||||
*gradInput_data = *target_data > 0 ? norm * (-*target_data) : 0;
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
51
torch/lib/THNN/generic/ELU.c
Normal file
51
torch/lib/THNN/generic/ELU.c
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/ELU.c"
|
||||
#else
|
||||
|
||||
void THNN_(ELU_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
real alpha,
|
||||
bool inplace)
|
||||
{
|
||||
if(inplace) {
|
||||
TH_TENSOR_APPLY(real, input,
|
||||
if(*input_data <= 0) {
|
||||
*input_data = (exp(*input_data) - 1) * alpha;
|
||||
}
|
||||
);
|
||||
THTensor_(set)(output, input);
|
||||
} else {
|
||||
THTensor_(resizeAs)(output, input);
|
||||
TH_TENSOR_APPLY2(real, input, real, output,
|
||||
*output_data = *input_data <= 0 ? (exp(*input_data)-1)*alpha : *input_data;
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(ELU_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *output,
|
||||
real alpha,
|
||||
bool inplace)
|
||||
{
|
||||
if(inplace) {
|
||||
TH_TENSOR_APPLY2(real, gradOutput, real, output,
|
||||
if(*output_data <= 0) {
|
||||
*gradOutput_data *= *output_data + alpha;
|
||||
}
|
||||
);
|
||||
THTensor_(set)(gradInput, gradOutput);
|
||||
} else {
|
||||
THTensor_(resizeAs)(gradInput, output);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
|
||||
*gradInput_data = *output_data <= 0 ? *gradOutput_data * (*output_data + alpha) : *gradOutput_data;
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
39
torch/lib/THNN/generic/HardShrink.c
Normal file
39
torch/lib/THNN/generic/HardShrink.c
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/HardShrink.c"
|
||||
#else
|
||||
|
||||
void THNN_(HardShrink_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
real lambda)
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
|
||||
TH_TENSOR_APPLY2(real, output, real, input,
|
||||
if (*input_data > lambda)
|
||||
*output_data = *input_data;
|
||||
else if (*input_data < -lambda)
|
||||
*output_data = *input_data;
|
||||
else
|
||||
*output_data = 0;
|
||||
);
|
||||
}
|
||||
|
||||
void THNN_(HardShrink_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
real lambda)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
|
||||
if (*input_data > lambda || *input_data < -lambda)
|
||||
*gradInput_data = *gradOutput_data;
|
||||
else
|
||||
*gradInput_data = 0;
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
127
torch/lib/THNN/generic/HardTanh.c
Normal file
127
torch/lib/THNN/generic/HardTanh.c
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/HardTanh.c"
|
||||
#else
|
||||
|
||||
void THNN_(HardTanh_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
real min_val,
|
||||
real max_val,
|
||||
bool inplace)
|
||||
{
|
||||
if (inplace)
|
||||
THTensor_(set)(output, input);
|
||||
else
|
||||
THTensor_(resizeAs)(output, input);
|
||||
|
||||
if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output))
|
||||
{
|
||||
if (inplace)
|
||||
TH_TENSOR_APPLY(real, input,
|
||||
if (*input_data < min_val)
|
||||
*input_data = min_val;
|
||||
else if (*input_data > max_val)
|
||||
*input_data = max_val;
|
||||
);
|
||||
TH_TENSOR_APPLY2(real, output, real, input,
|
||||
if (*input_data < min_val)
|
||||
*output_data = min_val;
|
||||
else if (*input_data <= max_val)
|
||||
*output_data = *input_data;
|
||||
else
|
||||
*output_data = max_val;
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
real* ptr_input = THTensor_(data)(input);
|
||||
real* ptr_output = THTensor_(data)(output);
|
||||
long i;
|
||||
long n = THTensor_(nElement)(input);
|
||||
|
||||
if (inplace)
|
||||
#pragma omp parallel for private(i)
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (ptr_input[i] < min_val)
|
||||
ptr_input[i] = min_val;
|
||||
else if (ptr_input[i] > max_val)
|
||||
ptr_input[i] = max_val;
|
||||
}
|
||||
else
|
||||
#pragma omp parallel for private(i)
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (ptr_input[i] < min_val)
|
||||
ptr_output[i] = min_val;
|
||||
else if (ptr_input[i] <= max_val)
|
||||
ptr_output[i] = ptr_input[i];
|
||||
else
|
||||
ptr_output[i] = max_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(HardTanh_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
real min_val,
|
||||
real max_val,
|
||||
bool inplace)
|
||||
{
|
||||
if (inplace)
|
||||
THTensor_(set)(gradInput, gradOutput);
|
||||
else
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
|
||||
if (input->nDimension == 1 ||
|
||||
!THTensor_(isContiguous)(input) ||
|
||||
!THTensor_(isContiguous)(gradOutput) ||
|
||||
!THTensor_(isContiguous)(gradInput))
|
||||
{
|
||||
if (inplace)
|
||||
{
|
||||
TH_TENSOR_APPLY2(real, gradOutput, real, input,
|
||||
if (*input_data < min_val || *input_data > max_val)
|
||||
*gradOutput_data = 0;
|
||||
);
|
||||
}
|
||||
else
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
|
||||
if (*input_data < min_val || *input_data > max_val)
|
||||
*gradInput_data = 0;
|
||||
else
|
||||
*gradInput_data = *gradOutput_data;
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
real* ptr_gradOutput = THTensor_(data)(gradOutput);
|
||||
real* ptr_gradInput = THTensor_(data)(gradInput);
|
||||
real* ptr_input = THTensor_(data)(input);
|
||||
long i;
|
||||
long n = THTensor_(nElement)(input);
|
||||
|
||||
if (inplace)
|
||||
#pragma omp parallel for private(i)
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (ptr_input[i] <= min_val || ptr_input[i] >= max_val)
|
||||
ptr_gradInput[i] = 0;
|
||||
}
|
||||
else
|
||||
#pragma omp parallel for private(i)
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (ptr_input[i] < min_val || ptr_input[i] > max_val)
|
||||
ptr_gradInput[i] = 0;
|
||||
else
|
||||
ptr_gradInput[i] = ptr_gradOutput[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
36
torch/lib/THNN/generic/L1Cost.c
Normal file
36
torch/lib/THNN/generic/L1Cost.c
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/L1Cost.c"
|
||||
#else
|
||||
|
||||
void THNN_(L1Cost_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output)
|
||||
{
|
||||
accreal sum = 0;
|
||||
|
||||
TH_TENSOR_APPLY(real, input,
|
||||
sum += fabs(*input_data);
|
||||
);
|
||||
|
||||
THTensor_(set1d)(output, 0, sum);
|
||||
}
|
||||
|
||||
void THNN_(L1Cost_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY2(real, gradInput, real, input,
|
||||
if (*input_data > 0)
|
||||
*gradInput_data = 1;
|
||||
else if (*input_data < 0)
|
||||
*gradInput_data = -1;
|
||||
else
|
||||
*gradInput_data = 0;
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
54
torch/lib/THNN/generic/LeakyReLU.c
Normal file
54
torch/lib/THNN/generic/LeakyReLU.c
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/LeakyReLU.c"
|
||||
#else
|
||||
|
||||
void THNN_(LeakyReLU_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
real negval,
|
||||
bool inplace)
|
||||
{
|
||||
if (inplace)
|
||||
{
|
||||
TH_TENSOR_APPLY(real, input,
|
||||
if (*input_data <= 0)
|
||||
*input_data *= negval;
|
||||
);
|
||||
THTensor_(set)(output, input);
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
TH_TENSOR_APPLY2(real, output, real, input,
|
||||
*output_data = *input_data > 0 ? *input_data : *input_data * negval;
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(LeakyReLU_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
real negval,
|
||||
bool inplace)
|
||||
{
|
||||
if (inplace)
|
||||
{
|
||||
TH_TENSOR_APPLY2(real, gradOutput, real, input,
|
||||
if (*input_data <= 0)
|
||||
*gradOutput_data *= negval;
|
||||
);
|
||||
THTensor_(set)(gradInput, gradOutput);
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
|
||||
*gradInput_data = *input_data > 0 ? *gradOutput_data : *gradOutput_data * negval;
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
35
torch/lib/THNN/generic/LogSigmoid.c
Normal file
35
torch/lib/THNN/generic/LogSigmoid.c
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/LogSigmoid.c"
|
||||
#else
|
||||
|
||||
void THNN_(LogSigmoid_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *buffer)
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
THTensor_(resizeAs)(buffer, input);
|
||||
|
||||
TH_TENSOR_APPLY3(real, output, real, input, real, buffer,
|
||||
real z = exp(-*input_data);
|
||||
*buffer_data = z;
|
||||
*output_data = -log(1. + z);
|
||||
);
|
||||
}
|
||||
|
||||
void THNN_(LogSigmoid_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *buffer)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, buffer);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, buffer,
|
||||
real z = *buffer_data;
|
||||
*gradInput_data = *gradOutput_data * z / (1. + z);
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
110
torch/lib/THNN/generic/LogSoftMax.c
Normal file
110
torch/lib/THNN/generic/LogSoftMax.c
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/LogSoftMax.c"
|
||||
#else
|
||||
|
||||
void THNN_(LogSoftMax_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output)
|
||||
{
|
||||
real *input_data, *output_data;
|
||||
long nframe = 0, dim = 0;
|
||||
long t, d;
|
||||
|
||||
if (input->nDimension == 1)
|
||||
{
|
||||
nframe = 1;
|
||||
dim = input->size[0];
|
||||
}
|
||||
else if (input->nDimension == 2)
|
||||
{
|
||||
nframe = input->size[0];
|
||||
dim = input->size[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
THArgCheck(0, 2, "vector or matrix expected");
|
||||
}
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
THTensor_(resizeAs)(output, input);
|
||||
|
||||
real *input_data0 = THTensor_(data)(input);
|
||||
real *output_data0 = THTensor_(data)(output);
|
||||
|
||||
accreal logsum;
|
||||
real maxInput;
|
||||
#pragma omp parallel for private(t, d, maxInput, logsum, input_data, output_data)
|
||||
for (t = 0; t < nframe; t++)
|
||||
{
|
||||
logsum = 0;
|
||||
maxInput = -THInf;
|
||||
input_data = input_data0 + dim*t;
|
||||
output_data = output_data0 + dim*t;
|
||||
|
||||
for (d = 0; d < dim; d++)
|
||||
maxInput = THMax(maxInput, input_data[d]);
|
||||
|
||||
for (d = 0; d < dim; d++)
|
||||
logsum += exp(input_data[d] - maxInput);
|
||||
logsum = maxInput + log(logsum);
|
||||
|
||||
for (d = 0; d < dim; d++)
|
||||
output_data[d] = input_data[d] - logsum;
|
||||
}
|
||||
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
void THNN_(LogSoftMax_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *output)
|
||||
{
|
||||
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
real *gradInput_data, *gradOutput_data, *output_data;
|
||||
long nframe = 0, dim = 0;
|
||||
long t, d;
|
||||
|
||||
if (output->nDimension == 1)
|
||||
{
|
||||
nframe = 1;
|
||||
dim = output->size[0];
|
||||
}
|
||||
else if (output->nDimension == 2)
|
||||
{
|
||||
nframe = output->size[0];
|
||||
dim = output->size[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
THError("vector or matrix expected");
|
||||
}
|
||||
|
||||
THTensor_(resizeAs)(gradInput, output);
|
||||
real *gradInput_data0 = THTensor_(data)(gradInput);
|
||||
real *output_data0 = THTensor_(data)(output);
|
||||
real *gradOutput_data0 = THTensor_(data)(gradOutput);
|
||||
accreal sum;
|
||||
#pragma omp parallel for private(t, sum, d, gradInput_data, output_data, gradOutput_data)
|
||||
for (t = 0; t < nframe; t++)
|
||||
{
|
||||
sum = 0;
|
||||
gradInput_data = gradInput_data0 + dim*t;
|
||||
output_data = output_data0 + dim*t;
|
||||
gradOutput_data = gradOutput_data0 + dim*t;
|
||||
|
||||
for (d = 0; d < dim; d++)
|
||||
sum += gradOutput_data[d];
|
||||
|
||||
for (d = 0; d < dim; d++)
|
||||
gradInput_data[d] = gradOutput_data[d] - exp(output_data[d])*sum;
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
213
torch/lib/THNN/generic/LookupTable.c
Normal file
213
torch/lib/THNN/generic/LookupTable.c
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/LookupTable.c"
|
||||
#else
|
||||
|
||||
static void THNN_(LookupTable_resetCount)(
|
||||
THInteger_t *count_data,
|
||||
THIndexTensor *input)
|
||||
{
|
||||
int i;
|
||||
THIndex_t *input_data = THIndexTensor_(data)(input);
|
||||
long numel = THIndexTensor_(nElement)(input);
|
||||
|
||||
for (i = 0; i<numel; i++)
|
||||
{
|
||||
long k = input_data[i] - TH_INDEX_BASE;
|
||||
count_data[k] = 0;
|
||||
}
|
||||
for (i = 0; i<numel; i++)
|
||||
{
|
||||
long k = input_data[i] - TH_INDEX_BASE;
|
||||
count_data[k]++;
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(LookupTable_accGradParameters)(
|
||||
THNNState *state,
|
||||
THIndexTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THIntegerTensor *count,
|
||||
THTensor *sorted,
|
||||
THTensor *indices,
|
||||
bool scaleGradByFreq,
|
||||
int paddingValue,
|
||||
real scale)
|
||||
{
|
||||
long i;
|
||||
THInteger_t *count_data = NULL;
|
||||
|
||||
if (scaleGradByFreq)
|
||||
{
|
||||
THIntegerTensor_(resize1d)(count, gradWeight->size[0]);
|
||||
count_data = THIntegerTensor_(data)(count);
|
||||
}
|
||||
|
||||
if (!THTensor_(isContiguous)(gradWeight))
|
||||
THError("gradWeight must be contiguous");
|
||||
if (!THIndexTensor_(isContiguous)(input))
|
||||
THError("input must be contiguous");
|
||||
if (THIndexTensor_(nDimension)(input) != 1 && THIndexTensor_(nDimension)(input) != 2)
|
||||
THError("input must be a vector or matrix");
|
||||
|
||||
THIndex_t *input_data = THIndexTensor_(data)(input);
|
||||
long numel = THIndexTensor_(nElement)(input);
|
||||
long numw = THTensor_(size)(gradWeight, 0);
|
||||
|
||||
// check that inputs are all within range
|
||||
for (i=0; i<numel; i++)
|
||||
if (input_data[i] < TH_INDEX_BASE || input_data[i] >= numw + TH_INDEX_BASE)
|
||||
THError("input out of range");
|
||||
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
real *gw = THTensor_(data)(gradWeight);
|
||||
real *go = THTensor_(data)(gradOutput);
|
||||
long stride = THTensor_(stride)(gradWeight, 0);
|
||||
|
||||
if (count_data)
|
||||
THNN_(LookupTable_resetCount)(count_data, input);
|
||||
|
||||
#ifdef _OPENMP
|
||||
if (numel > 1000)
|
||||
{
|
||||
// The strategy is to parallelize over sections of the vocabulary, so that
|
||||
// thread 1 handles updates to gradWeight[0..nVocab/nThreads]. Every thread
|
||||
// has to traverse the entire input, but the dominating factor is the axpy
|
||||
// BLAS call.
|
||||
#pragma omp parallel private(i)
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
int nthreads = omp_get_num_threads();
|
||||
|
||||
long start = tid * (numw/nthreads + 1);
|
||||
long end = start + (numw/nthreads + 1);
|
||||
for (i=0; i<numel; i++)
|
||||
{
|
||||
if (input_data[i] != paddingValue)
|
||||
{
|
||||
long k = input_data[i] - TH_INDEX_BASE;
|
||||
if (k >= start && k < end)
|
||||
{
|
||||
real scale_ = scale;
|
||||
if (count_data) scale_ /= count_data[k];
|
||||
THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOutput);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (i=0; i<numel; i++)
|
||||
{
|
||||
if (input_data[i] != paddingValue)
|
||||
{
|
||||
long k = input_data[i] - TH_INDEX_BASE;
|
||||
real scale_ = scale;
|
||||
if (count_data) scale_ /= count_data[k];
|
||||
THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1);
|
||||
}
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
/*
|
||||
* Keep the norm of weight smaller than maxNorm
|
||||
*/
|
||||
|
||||
static void THNN_(LookupTable_renormRow)(
|
||||
real *row_data,
|
||||
long stride,
|
||||
real maxNorm,
|
||||
real normType)
|
||||
{
|
||||
real norm = 0;
|
||||
real new_norm;
|
||||
long j;
|
||||
for (j=0; j<stride; j++)
|
||||
{
|
||||
if (normType == 1) {
|
||||
norm += fabs(row_data[j]);
|
||||
} else if (normType == 2) {
|
||||
norm += row_data[j] * row_data[j];
|
||||
} else {
|
||||
norm += pow(fabs(row_data[j]), normType);
|
||||
}
|
||||
}
|
||||
norm = pow(norm, 1.0 / normType);
|
||||
if (norm > maxNorm)
|
||||
{
|
||||
new_norm = maxNorm / (norm + 1e-7);
|
||||
for (j=0; j<stride; j++) {
|
||||
row_data[j] *= new_norm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int THNN_(compare_THIndex)(const void* a, const void* b)
|
||||
{
|
||||
return *(const THIndex_t*)a < *(const THIndex_t*)b ? -1 : 1;
|
||||
}
|
||||
|
||||
void THNN_(LookupTable_renorm)(
|
||||
THNNState *state,
|
||||
THIndexTensor *idx,
|
||||
THTensor *weight,
|
||||
real maxNorm,
|
||||
real normType)
|
||||
{
|
||||
if (!THTensor_(isContiguous)(weight))
|
||||
THError("weight must be contiguous");
|
||||
if (!THIndexTensor_(isContiguous)(idx))
|
||||
THError("input must be contiguous");
|
||||
if (THIndexTensor_(nDimension)(idx) != 1)
|
||||
THError("idx must be a vector");
|
||||
if (normType <= 0)
|
||||
THError("non-positive-norm not supported");
|
||||
|
||||
long i;
|
||||
THIndex_t *row_idx = THIndexTensor_(data)(idx);
|
||||
long numel = THIndexTensor_(nElement)(idx);
|
||||
|
||||
long numw = THTensor_(size)(weight, 0);
|
||||
long stride = THTensor_(stride)(weight, 0);
|
||||
real *gw = THTensor_(data)(weight);
|
||||
for (i=0; i<numel; i++)
|
||||
if (row_idx[i] < TH_INDEX_BASE || row_idx[i] >= numw + TH_INDEX_BASE)
|
||||
THError("input out of range");
|
||||
// get unique indices
|
||||
qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex));
|
||||
long ptr = 0;
|
||||
for (i=0; i<numel; i++)
|
||||
if (i == 0 || row_idx[i] != row_idx[i-1])
|
||||
row_idx[ptr++] = row_idx[i];
|
||||
numel = ptr;
|
||||
|
||||
#ifdef _OPENMP
|
||||
if (numel > 1000)
|
||||
{
|
||||
// The strategy is to parallelize over the rows that appear in
|
||||
// row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads].
|
||||
// This distributes the work evenly to each thread.
|
||||
#pragma omp parallel for private(i)
|
||||
for (i=0; i<numel; i++)
|
||||
{
|
||||
long k = row_idx[i] - TH_INDEX_BASE;
|
||||
THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
for (i=0; i<numel; i++)
|
||||
{
|
||||
long k = row_idx[i] - TH_INDEX_BASE;
|
||||
THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
40
torch/lib/THNN/generic/MSECriterion.c
Normal file
40
torch/lib/THNN/generic/MSECriterion.c
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/MSECriterion.c"
|
||||
#else
|
||||
|
||||
void THNN_(MSECriterion_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *output,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real sum = 0;
|
||||
|
||||
TH_TENSOR_APPLY2(real, input, real, target,
|
||||
real z = (*input_data - *target_data);
|
||||
sum += z*z;
|
||||
);
|
||||
|
||||
if (sizeAverage)
|
||||
sum /= THTensor_(nElement)(input);
|
||||
|
||||
THTensor_(set1d)(output, 0, sum);
|
||||
}
|
||||
|
||||
void THNN_(MSECriterion_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *gradInput,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real norm = (sizeAverage ? 2./((real)THTensor_(nElement)(input)) : 2.);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
|
||||
*gradInput_data = norm * (*input_data - *target_data);
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
42
torch/lib/THNN/generic/MarginCriterion.c
Normal file
42
torch/lib/THNN/generic/MarginCriterion.c
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/MarginCriterion.c"
|
||||
#else
|
||||
|
||||
void THNN_(MarginCriterion_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *output,
|
||||
bool sizeAverage,
|
||||
real margin)
|
||||
{
|
||||
real sum = 0;
|
||||
|
||||
TH_TENSOR_APPLY2(real, input, real, target,
|
||||
real z = (margin - *input_data * *target_data);
|
||||
sum += z>0 ? z : 0;
|
||||
);
|
||||
|
||||
if (sizeAverage)
|
||||
sum /= THTensor_(nElement)(input);
|
||||
|
||||
THTensor_(set1d)(output, 0, sum);
|
||||
}
|
||||
|
||||
void THNN_(MarginCriterion_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *gradInput,
|
||||
bool sizeAverage,
|
||||
real margin)
|
||||
{
|
||||
real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
|
||||
*gradInput_data = (*input_data * *target_data) < margin ? -norm * *target_data : 0;
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
174
torch/lib/THNN/generic/MultiLabelMarginCriterion.c
Normal file
174
torch/lib/THNN/generic/MultiLabelMarginCriterion.c
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/MultiLabelMarginCriterion.c"
|
||||
#else
|
||||
|
||||
void THNN_(MultiLabelMarginCriterion_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *output,
|
||||
THTensor *isTarget,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real *input_data, *target_data, *isTarget_data;
|
||||
long nframe, dim;
|
||||
long t, d, dt, ddt;
|
||||
real sum;
|
||||
|
||||
THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
|
||||
|
||||
if (input->nDimension == 1)
|
||||
{
|
||||
nframe = 1;
|
||||
dim = input->size[0];
|
||||
THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size");
|
||||
}
|
||||
else
|
||||
{
|
||||
nframe = input->size[0];
|
||||
dim = input->size[1];
|
||||
THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size");
|
||||
}
|
||||
|
||||
THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range");
|
||||
THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range");
|
||||
|
||||
target = THTensor_(newContiguous)(target);
|
||||
input = THTensor_(newContiguous)(input);
|
||||
input_data = THTensor_(data)(input);
|
||||
target_data = THTensor_(data)(target);
|
||||
|
||||
THTensor_(resizeAs)(isTarget, target);
|
||||
THTensor_(zero)(isTarget);
|
||||
isTarget_data = THTensor_(data)(isTarget);
|
||||
|
||||
sum = 0;
|
||||
for (t = 0; t < nframe; t++)
|
||||
{
|
||||
for (ddt = 0; ddt < dim; ddt++)
|
||||
{
|
||||
long target_idx = (long)target_data[ddt] - TH_INDEX_BASE;
|
||||
if (target_idx < 0)
|
||||
break;
|
||||
isTarget_data[target_idx] = 1;
|
||||
}
|
||||
for (dt = 0; dt < dim; dt++)
|
||||
{
|
||||
long target_idx = (long)target_data[dt] - TH_INDEX_BASE;
|
||||
real input_target;
|
||||
if (target_idx < 0)
|
||||
break;
|
||||
|
||||
input_target = input_data[target_idx];
|
||||
for (d = 0; d < dim; d++)
|
||||
{
|
||||
if (!isTarget_data[d])
|
||||
{
|
||||
real z = 1 - input_target + input_data[d];
|
||||
if (z > 0)
|
||||
sum += z;
|
||||
}
|
||||
}
|
||||
}
|
||||
input_data += dim;
|
||||
target_data += dim;
|
||||
isTarget_data += dim;
|
||||
}
|
||||
|
||||
sum /= dim;
|
||||
if (sizeAverage)
|
||||
sum /= nframe;
|
||||
|
||||
THTensor_(set1d)(output, 0, sum);
|
||||
|
||||
THTensor_(free)(input);
|
||||
THTensor_(free)(target);
|
||||
}
|
||||
|
||||
void THNN_(MultiLabelMarginCriterion_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *gradInput,
|
||||
THTensor *isTarget,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real *input_data;
|
||||
real *gradInput_data;
|
||||
real *target_data;
|
||||
real *isTarget_data;
|
||||
long nframe, dim;
|
||||
long t, d, dt;
|
||||
real g;
|
||||
|
||||
THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
|
||||
|
||||
if (input->nDimension == 1)
|
||||
{
|
||||
nframe = 1;
|
||||
dim = input->size[0];
|
||||
THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size");
|
||||
THArgCheck((isTarget->nDimension == 1) && (isTarget->size[0] == dim), 3, "inconsistent isTarget size");
|
||||
}
|
||||
else
|
||||
{
|
||||
nframe = input->size[0];
|
||||
dim = input->size[1];
|
||||
THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size");
|
||||
THArgCheck((isTarget->nDimension == 2) && (isTarget->size[0] == nframe) && (isTarget->size[1] == dim), 3, "inconsistent isTarget size");
|
||||
}
|
||||
|
||||
THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range");
|
||||
THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range");
|
||||
|
||||
THArgCheck(THTensor_(minall)(isTarget) >= 0, 3, "isTarget out of range");
|
||||
THArgCheck(THTensor_(maxall)(isTarget) <= 1, 3, "isTarget out of range");
|
||||
|
||||
target = THTensor_(newContiguous)(target);
|
||||
input = THTensor_(newContiguous)(input);
|
||||
isTarget = THTensor_(newContiguous)(isTarget);
|
||||
input_data = THTensor_(data)(input);
|
||||
target_data = THTensor_(data)(target);
|
||||
isTarget_data = THTensor_(data)(isTarget);
|
||||
|
||||
g = sizeAverage ? ( 1./((real)(nframe*dim)) ) : ( 1./((real)dim) );
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
|
||||
for (t = 0; t < nframe; t++)
|
||||
{
|
||||
for (dt = 0; dt < dim; dt++)
|
||||
{
|
||||
long target_idx = (long)target_data[dt] - TH_INDEX_BASE;
|
||||
real input_target;
|
||||
if (target_idx < 0)
|
||||
break;
|
||||
|
||||
input_target = input_data[target_idx];
|
||||
for (d = 0; d < dim; d++)
|
||||
{
|
||||
if (!isTarget_data[d])
|
||||
{
|
||||
real z = 1 - input_target + input_data[d];
|
||||
if (z > 0)
|
||||
{
|
||||
gradInput_data[target_idx] -= g;
|
||||
gradInput_data[d] += g;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
input_data += dim;
|
||||
target_data += dim;
|
||||
isTarget_data += dim;
|
||||
gradInput_data += dim;
|
||||
}
|
||||
|
||||
THTensor_(free)(input);
|
||||
THTensor_(free)(target);
|
||||
THTensor_(free)(isTarget);
|
||||
}
|
||||
|
||||
#endif
|
||||
159
torch/lib/THNN/generic/MultiMarginCriterion.c
Normal file
159
torch/lib/THNN/generic/MultiMarginCriterion.c
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/MultiMarginCriterion.c"
|
||||
#else
|
||||
|
||||
void THNN_(MultiMarginCriterion_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *output,
|
||||
bool sizeAverage,
|
||||
int p,
|
||||
THTensor *weights,
|
||||
real margin)
|
||||
{
|
||||
real *input_data, *target_data, *weights_data;
|
||||
long nframe, dim;
|
||||
long t, d;
|
||||
real sum;
|
||||
|
||||
THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
|
||||
|
||||
if (input->nDimension == 1)
|
||||
{
|
||||
nframe = 1;
|
||||
dim = input->size[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
nframe = input->size[0];
|
||||
dim = input->size[1];
|
||||
THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size");
|
||||
}
|
||||
|
||||
for (t = 0; t < nframe; t++)
|
||||
{
|
||||
real idx = THTensor_(get1d)(target, t);
|
||||
THArgCheck((idx >= TH_INDEX_BASE) && (idx < dim + TH_INDEX_BASE), 3, "target out of range");
|
||||
}
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
target = THTensor_(newContiguous)(target);
|
||||
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
|
||||
input_data = THTensor_(data)(input);
|
||||
target_data = THTensor_(data)(target);
|
||||
weights_data = weights ? THTensor_(data)(weights) : NULL;
|
||||
|
||||
sum = 0;
|
||||
for (t = 0; t < nframe; t++)
|
||||
{
|
||||
long target_idx = (long)(target_data[t] - TH_INDEX_BASE);
|
||||
real input_target = input_data[target_idx];
|
||||
for (d = 0; d < dim; d++)
|
||||
{
|
||||
real z = margin - input_target + input_data[d];
|
||||
if (d == target_idx)
|
||||
continue;
|
||||
|
||||
if (z > 0) {
|
||||
real h = (p==1) ? z : z*z;
|
||||
if(weights_data)
|
||||
h *= weights_data[target_idx];
|
||||
sum += h;
|
||||
}
|
||||
}
|
||||
input_data += dim;
|
||||
}
|
||||
|
||||
sum /= dim;
|
||||
if(sizeAverage)
|
||||
sum /= nframe;
|
||||
|
||||
THTensor_(set1d)(output, 0, sum);
|
||||
|
||||
THTensor_(free)(input);
|
||||
THTensor_(free)(target);
|
||||
if(weights)
|
||||
THTensor_(free)(weights);
|
||||
}
|
||||
|
||||
void THNN_(MultiMarginCriterion_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *gradInput,
|
||||
bool sizeAverage,
|
||||
int p,
|
||||
THTensor *weights,
|
||||
real margin)
|
||||
{
|
||||
real *input_data;
|
||||
real *gradInput_data;
|
||||
real *target_data;
|
||||
real *weights_data;
|
||||
long nframe, dim;
|
||||
long t, d;
|
||||
real g;
|
||||
|
||||
THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
|
||||
|
||||
if (input->nDimension == 1)
|
||||
{
|
||||
nframe = 1;
|
||||
dim = input->size[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
nframe = input->size[0];
|
||||
dim = input->size[1];
|
||||
THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size");
|
||||
}
|
||||
|
||||
g = (sizeAverage ? 1./((real)(nframe*dim)) : 1./((real)dim));
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
target = THTensor_(newContiguous)(target);
|
||||
input_data = THTensor_(data)(input);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
|
||||
target_data = THTensor_(data)(target);
|
||||
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
|
||||
weights_data = weights ? THTensor_(data)(weights) : NULL;
|
||||
|
||||
for (t = 0; t < nframe; t++)
|
||||
{
|
||||
long target_idx = (long)(target_data[t]) - TH_INDEX_BASE;
|
||||
real input_target = input_data[target_idx];
|
||||
real gradInput_target = 0;
|
||||
for (d = 0; d < dim; d++)
|
||||
{
|
||||
real z = margin - input_target + input_data[d];
|
||||
if (d == target_idx)
|
||||
continue;
|
||||
|
||||
if (z > 0)
|
||||
{
|
||||
real h = (p == 1) ? g : 2*g*z;
|
||||
if(weights_data)
|
||||
h *= weights_data[target_idx];
|
||||
gradInput_target -= h;
|
||||
gradInput_data[d] = h;
|
||||
}
|
||||
else
|
||||
gradInput_data[d] = 0;
|
||||
}
|
||||
gradInput_data[target_idx] = gradInput_target;
|
||||
|
||||
input_data += dim;
|
||||
gradInput_data += dim;
|
||||
}
|
||||
|
||||
THTensor_(free)(input);
|
||||
THTensor_(free)(target);
|
||||
if(weights)
|
||||
THTensor_(free)(weights);
|
||||
}
|
||||
|
||||
#endif
|
||||
228
torch/lib/THNN/generic/PReLU.c
Normal file
228
torch/lib/THNN/generic/PReLU.c
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/PReLU.c"
|
||||
#else
|
||||
|
||||
void THNN_(PReLU_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THIndex_t nOutputPlane)
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
|
||||
if (nOutputPlane == 0)
|
||||
{
|
||||
// handle shared parameter case
|
||||
real w = *THTensor_(data)(weight);
|
||||
TH_TENSOR_APPLY2(real, output, real, input,
|
||||
*output_data = (*input_data > 0) ? *input_data : w*(*input_data);
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
long bs, ks;
|
||||
{
|
||||
long input_ndim = THTensor_(nDimension)(input);
|
||||
switch (input_ndim)
|
||||
{
|
||||
case 1:
|
||||
bs = 1;
|
||||
ks = 1;
|
||||
break;
|
||||
case 2:
|
||||
bs = input->size[0];
|
||||
ks = 1;
|
||||
break;
|
||||
case 3:
|
||||
bs = 1;
|
||||
ks = input->size[1] * input->size[2];
|
||||
break;
|
||||
case 4:
|
||||
bs = input->size[0];
|
||||
ks = input->size[2] * input->size[3];
|
||||
break;
|
||||
}
|
||||
|
||||
if (input->size[(input_ndim + 1) % 2] != nOutputPlane)
|
||||
THError("wrong number of input planes");
|
||||
}
|
||||
|
||||
real *output_data = THTensor_(data)(output);
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *weight_data = THTensor_(data)(weight);
|
||||
THIndex_t i, j, k;
|
||||
#pragma omp parallel for private(j,k)
|
||||
for (i = 0; i < bs; ++i)
|
||||
{
|
||||
real* n_input_data = input_data + i*nOutputPlane*ks;
|
||||
real* n_output_data = output_data + i*nOutputPlane*ks;
|
||||
for (j = 0; j < nOutputPlane; ++j)
|
||||
{
|
||||
for (k = 0; k < ks; ++k)
|
||||
n_output_data[k] = (n_input_data[k] > 0) ? n_input_data[k] : weight_data[j] * n_input_data[k];
|
||||
n_input_data += ks;
|
||||
n_output_data += ks;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(PReLU_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
THIndex_t nOutputPlane)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
|
||||
if (nOutputPlane == 0)
|
||||
{
|
||||
real w = THTensor_(data)(weight)[0];
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
|
||||
if ((*input_data) > 0)
|
||||
*gradInput_data = *gradOutput_data;
|
||||
else
|
||||
*gradInput_data = w * (*gradOutput_data);
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
const real *input_data = THTensor_(data)(input);
|
||||
const real *gradOutput_data = THTensor_(data)(gradOutput);
|
||||
const real *weight_data = THTensor_(data)(weight);
|
||||
real *gradInput_data = THTensor_(data)(gradInput);
|
||||
|
||||
long bs, ks;
|
||||
{
|
||||
long input_ndim = THTensor_(nDimension)(input);
|
||||
switch (input_ndim)
|
||||
{
|
||||
case 1:
|
||||
bs = 1;
|
||||
ks = 1;
|
||||
break;
|
||||
case 2:
|
||||
bs = input->size[0];
|
||||
ks = 1;
|
||||
break;
|
||||
case 3:
|
||||
bs = 1;
|
||||
ks = input->size[1] * input->size[2];
|
||||
break;
|
||||
case 4:
|
||||
bs = input->size[0];
|
||||
ks = input->size[2] * input->size[3];
|
||||
break;
|
||||
}
|
||||
|
||||
if (input->size[(input_ndim + 1) % 2] != nOutputPlane)
|
||||
THError("wrong number of input planes");
|
||||
}
|
||||
|
||||
THIndex_t i, j, k;
|
||||
#pragma omp parallel for private(j,k)
|
||||
for (i = 0; i < bs; ++i)
|
||||
{
|
||||
const real *n_input_data = input_data + i*nOutputPlane*ks;
|
||||
const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
|
||||
real *n_gradInput_data = gradInput_data + i*nOutputPlane*ks;
|
||||
|
||||
for (j = 0; j < nOutputPlane; ++j)
|
||||
{
|
||||
real w = weight_data[j];
|
||||
for (k = 0; k < ks; ++k)
|
||||
{
|
||||
if (n_input_data[k] > 0)
|
||||
n_gradInput_data[k] = n_gradOutput_data[k];
|
||||
else
|
||||
n_gradInput_data[k] = n_gradOutput_data[k] * w;
|
||||
}
|
||||
n_input_data += ks;
|
||||
n_gradInput_data += ks;
|
||||
n_gradOutput_data += ks;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(PReLU_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradWeightBuf,
|
||||
THTensor *gradWeightBuf2,
|
||||
THIndex_t nOutputPlane,
|
||||
real scale)
|
||||
{
|
||||
real *gradWeight_data = THTensor_(data)(gradWeight);
|
||||
|
||||
if (nOutputPlane == 0)
|
||||
{
|
||||
real sum = 0;
|
||||
TH_TENSOR_APPLY2(real, input, real, gradOutput,
|
||||
if ((*input_data) <= 0)
|
||||
sum += (*input_data) * (*gradOutput_data);
|
||||
);
|
||||
gradWeight_data[0] += scale * sum;
|
||||
}
|
||||
else
|
||||
{
|
||||
long bs, ks;
|
||||
{
|
||||
long input_ndim = THTensor_(nDimension)(input);
|
||||
switch (input_ndim)
|
||||
{
|
||||
case 1:
|
||||
bs = 1;
|
||||
ks = 1;
|
||||
break;
|
||||
case 2:
|
||||
bs = input->size[0];
|
||||
ks = 1;
|
||||
break;
|
||||
case 3:
|
||||
bs = 1;
|
||||
ks = input->size[1] * input->size[2];
|
||||
break;
|
||||
case 4:
|
||||
bs = input->size[0];
|
||||
ks = input->size[2] * input->size[3];
|
||||
break;
|
||||
}
|
||||
|
||||
if (input->size[(input_ndim + 1) % 2] != nOutputPlane)
|
||||
THError("wrong number of input planes");
|
||||
}
|
||||
|
||||
const real *input_data = THTensor_(data)(input);
|
||||
const real *gradOutput_data = THTensor_(data)(gradOutput);
|
||||
const real *weight_data = THTensor_(data)(weight);
|
||||
real *gradWeight_data = THTensor_(data)(gradWeight);
|
||||
|
||||
THIndex_t i, j, k;
|
||||
for (i = 0; i < bs; ++i)
|
||||
{
|
||||
const real *n_input_data = input_data + i*nOutputPlane*ks;
|
||||
const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
|
||||
|
||||
for (j = 0; j < nOutputPlane; ++j)
|
||||
{
|
||||
real sum = 0;
|
||||
for (k = 0; k < ks; ++k)
|
||||
if (n_input_data[k] <= 0)
|
||||
sum += n_gradOutput_data[k] * n_input_data[k];
|
||||
gradWeight_data[j] += scale * sum;
|
||||
n_input_data += ks;
|
||||
n_gradOutput_data += ks;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
127
torch/lib/THNN/generic/RReLU.c
Normal file
127
torch/lib/THNN/generic/RReLU.c
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/RReLU.c"
|
||||
#else
|
||||
|
||||
void THNN_(RReLU_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *noise,
|
||||
real lower,
|
||||
real upper,
|
||||
bool train,
|
||||
bool inplace,
|
||||
THGenerator *generator)
|
||||
{
|
||||
if (train)
|
||||
{
|
||||
// get default random generator
|
||||
THTensor_(resizeAs)(noise, input);
|
||||
if (inplace)
|
||||
{
|
||||
TH_TENSOR_APPLY2(real, input, real, noise,
|
||||
if (*input_data <= 0)
|
||||
{
|
||||
const real r = (real)THRandom_uniform(generator, lower, upper);
|
||||
*input_data = (*input_data) * r;
|
||||
*noise_data = r;
|
||||
}
|
||||
else
|
||||
{
|
||||
*noise_data = 1;
|
||||
}
|
||||
);
|
||||
THTensor_(set)(output, input);
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
TH_TENSOR_APPLY3(real, input, real, output, real, noise,
|
||||
if (*input_data <= 0)
|
||||
{
|
||||
const real r = (real)THRandom_uniform(generator, lower, upper);
|
||||
*output_data = (*input_data) * r;
|
||||
*noise_data = r;
|
||||
}
|
||||
else
|
||||
{
|
||||
*output_data = *input_data;
|
||||
*noise_data = 1;
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const real negSlope = (lower + upper) / 2;
|
||||
if (inplace)
|
||||
{
|
||||
TH_TENSOR_APPLY(real, input,
|
||||
if (*input_data <= 0)
|
||||
{
|
||||
*input_data = *input_data * negSlope;
|
||||
}
|
||||
);
|
||||
THTensor_(set)(output, input);
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
TH_TENSOR_APPLY2(real, input, real, output,
|
||||
const real r = (*input_data) <= 0 ? negSlope : 1;
|
||||
*output_data = *input_data * r;
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(RReLU_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *noise,
|
||||
real lower,
|
||||
real upper,
|
||||
bool train,
|
||||
bool inplace)
|
||||
{
|
||||
if (train && upper - lower > 1E-6) // e.g. if upper == lower, RReLU behaves like LeakyReLU
|
||||
{
|
||||
// multiply the gradient by the noise tensor
|
||||
if (inplace)
|
||||
{
|
||||
THTensor_(cmul)(gradOutput, gradOutput, noise);
|
||||
THTensor_(set)(gradInput, gradOutput);
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(cmul)(gradInput, gradOutput, noise);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// use constant factor for negative input values
|
||||
const real negSlope = (lower + upper) / 2;
|
||||
if (inplace)
|
||||
{
|
||||
TH_TENSOR_APPLY2(real, gradOutput, real, input,
|
||||
if (*input_data <= 0)
|
||||
{
|
||||
*gradOutput_data = (*gradOutput_data) * negSlope;
|
||||
}
|
||||
);
|
||||
THTensor_(set)(gradInput, gradOutput);
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
|
||||
*gradInput_data = (*input_data) <= 0 ? (*gradOutput_data) * negSlope : (*gradOutput_data);
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
31
torch/lib/THNN/generic/Sigmoid.c
Normal file
31
torch/lib/THNN/generic/Sigmoid.c
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/Sigmoid.c"
|
||||
#else
|
||||
|
||||
void THNN_(Sigmoid_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output)
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
|
||||
TH_TENSOR_APPLY2(real, output, real, input,
|
||||
*output_data = 1./(1.+ exp(- *input_data));
|
||||
);
|
||||
}
|
||||
|
||||
void THNN_(Sigmoid_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *output)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, output);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
|
||||
real z = *output_data;
|
||||
*gradInput_data = *gradOutput_data * (1. - z) * z;
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
45
torch/lib/THNN/generic/SmoothL1Criterion.c
Normal file
45
torch/lib/THNN/generic/SmoothL1Criterion.c
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SmoothL1Criterion.c"
|
||||
#else
|
||||
|
||||
void THNN_(SmoothL1Criterion_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *output,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real sum = 0;
|
||||
TH_TENSOR_APPLY2(real, input, real, target,
|
||||
real z = fabs(*input_data - *target_data);
|
||||
sum += z < 1 ? 0.5*z*z : z - 0.5;
|
||||
);
|
||||
|
||||
if (sizeAverage)
|
||||
sum /= THTensor_(nElement)(input);
|
||||
|
||||
THTensor_(set1d)(output, 0, sum);
|
||||
}
|
||||
|
||||
void THNN_(SmoothL1Criterion_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *gradInput,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
|
||||
real x = *input_data - *target_data;
|
||||
if (x < -1.)
|
||||
*gradInput_data = - norm;
|
||||
else if (x > 1.)
|
||||
*gradInput_data = norm;
|
||||
else
|
||||
*gradInput_data = norm * x;
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
40
torch/lib/THNN/generic/SoftMarginCriterion.c
Normal file
40
torch/lib/THNN/generic/SoftMarginCriterion.c
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SoftMarginCriterion.c"
|
||||
#else
|
||||
|
||||
void THNN_(SoftMarginCriterion_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *output,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real sum;
|
||||
|
||||
sum = 0;
|
||||
TH_TENSOR_APPLY2(real, input, real, target,
|
||||
real z = log(1. + exp(-*input_data* *target_data));
|
||||
sum += z;)
|
||||
|
||||
if(sizeAverage)
|
||||
sum /= THTensor_(nElement)(input);
|
||||
|
||||
THTensor_(set1d)(output, 0, sum);
|
||||
}
|
||||
|
||||
void THNN_(SoftMarginCriterion_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *target,
|
||||
THTensor *gradInput,
|
||||
bool sizeAverage)
|
||||
{
|
||||
real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
|
||||
real z = exp(-*target_data * *input_data);
|
||||
*gradInput_data = -norm*(*target_data)*z/(1. + z);)
|
||||
}
|
||||
|
||||
#endif
|
||||
149
torch/lib/THNN/generic/SoftMax.c
Normal file
149
torch/lib/THNN/generic/SoftMax.c
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SoftMax.c"
|
||||
#else
|
||||
|
||||
void THNN_(SoftMax_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output)
|
||||
{
|
||||
real *input_data, *output_data;
|
||||
long nframe = 0, dim = 0, stride = 0;
|
||||
long t;
|
||||
|
||||
if (input->nDimension == 1)
|
||||
{
|
||||
nframe = 1;
|
||||
dim = input->size[0];
|
||||
stride = 1;
|
||||
}
|
||||
else if (input->nDimension == 2)
|
||||
{
|
||||
nframe = input->size[0];
|
||||
dim = input->size[1];
|
||||
stride = 1;
|
||||
}
|
||||
else if (input->nDimension == 3)
|
||||
{
|
||||
nframe = 1;
|
||||
dim = input->size[0];
|
||||
stride = input->size[1]*input->size[2];
|
||||
}
|
||||
else if (input->nDimension == 4)
|
||||
{
|
||||
nframe = input->size[0];
|
||||
dim = input->size[1];
|
||||
stride = input->size[2]*input->size[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
THArgCheck(0, 2, "1D, 2D, 3D or 4D tensor expected");
|
||||
}
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
THTensor_(resizeAs)(output, input);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
#pragma omp parallel for private(t)
|
||||
for (t = 0; t < stride*nframe; t++)
|
||||
{
|
||||
real *input_ptr = input_data + (t/stride)*dim*stride + t % stride;
|
||||
real *output_ptr = output_data + (t/stride)*dim*stride + t % stride;
|
||||
|
||||
real inputMax = -THInf;
|
||||
accreal sum;
|
||||
|
||||
long d;
|
||||
for (d = 0; d < dim; d++)
|
||||
{
|
||||
if (input_ptr[d*stride] >= inputMax) inputMax = input_ptr[d*stride];
|
||||
}
|
||||
|
||||
sum = 0;
|
||||
for (d = 0; d < dim; d++)
|
||||
{
|
||||
real z = exp(input_ptr[d*stride] - inputMax);
|
||||
output_ptr[d*stride] = z;
|
||||
sum += z;
|
||||
}
|
||||
|
||||
for (d = 0; d < dim; d++)
|
||||
{
|
||||
output_ptr[d*stride] *= 1/sum;
|
||||
}
|
||||
}
|
||||
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
void THNN_(SoftMax_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *output)
|
||||
{
|
||||
real *gradInput_data, *gradOutput_data, *output_data;
|
||||
long nframe = 0, dim = 0, stride = 0;
|
||||
long t;
|
||||
|
||||
if (output->nDimension == 1)
|
||||
{
|
||||
nframe = 1;
|
||||
dim = output->size[0];
|
||||
stride = 1;
|
||||
}
|
||||
else if (output->nDimension == 2)
|
||||
{
|
||||
nframe = output->size[0];
|
||||
dim = output->size[1];
|
||||
stride = 1;
|
||||
}
|
||||
else if (output->nDimension == 3)
|
||||
{
|
||||
nframe = 1;
|
||||
dim = output->size[0];
|
||||
stride = output->size[1]*output->size[2];
|
||||
}
|
||||
else if (output->nDimension == 4)
|
||||
{
|
||||
nframe = output->size[0];
|
||||
dim = output->size[1];
|
||||
stride = output->size[2]*output->size[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
THError("1D, 2D, 3D or 4D tensor expected");
|
||||
}
|
||||
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
output = THTensor_(newContiguous)(output);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, output);
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
output_data = THTensor_(data)(output);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
|
||||
#pragma omp parallel for private(t)
|
||||
for (t = 0; t < stride*nframe; t++)
|
||||
{
|
||||
real *gradInput_ptr = gradInput_data + (t/stride)*dim*stride + t % stride;
|
||||
real *output_ptr = output_data + (t/stride)*dim*stride + t % stride;
|
||||
real *gradOutput_ptr = gradOutput_data + (t/stride)*dim*stride + t % stride;
|
||||
|
||||
long d;
|
||||
accreal sum = 0;
|
||||
for (d = 0; d < dim; d++)
|
||||
sum += (accreal)gradOutput_ptr[d*stride] * output_ptr[d*stride];
|
||||
|
||||
for (d = 0; d < dim; d++)
|
||||
gradInput_ptr[d*stride] = output_ptr[d*stride] * (gradOutput_ptr[d*stride] - sum);
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOutput);
|
||||
THTensor_(free)(output);
|
||||
}
|
||||
|
||||
#endif
|
||||
42
torch/lib/THNN/generic/SoftPlus.c
Normal file
42
torch/lib/THNN/generic/SoftPlus.c
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SoftPlus.c"
|
||||
#else
|
||||
|
||||
void THNN_(SoftPlus_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
real beta,
|
||||
real threshold)
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
|
||||
// f(x) = 1/beta * log(1 + exp(beta * x))
|
||||
TH_TENSOR_APPLY2(real, output, real, input, \
|
||||
*output_data = (*input_data * beta) > threshold ? *input_data : THLog1p(exp(*input_data * beta)) / beta;
|
||||
);
|
||||
}
|
||||
|
||||
void THNN_(SoftPlus_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *output,
|
||||
real beta,
|
||||
real threshold)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, output);
|
||||
|
||||
// d/dx[log(1+exp(k*x))/k] = exp(kx) / (exp(kx) + 1)
|
||||
// SINCE
|
||||
// y = (1/k)*log(1+exp(k*x)) --> x = (1/k)*log(exp(k*y)-1)
|
||||
// THEREFORE:
|
||||
// d/dx(f(x)) = (exp(k*y) - 1) / exp(k*y)
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
|
||||
real z = exp(*output_data * beta);
|
||||
*gradInput_data = (*output_data * beta) > threshold ? *gradOutput_data : *gradOutput_data * (z - 1.)/z;
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
39
torch/lib/THNN/generic/SoftShrink.c
Normal file
39
torch/lib/THNN/generic/SoftShrink.c
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SoftShrink.c"
|
||||
#else
|
||||
|
||||
void THNN_(SoftShrink_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
real lambda)
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
|
||||
TH_TENSOR_APPLY2(real, output, real, input,
|
||||
if ((*input_data) > lambda)
|
||||
*output_data = *input_data - lambda;
|
||||
else if ((*input_data) < -lambda)
|
||||
*output_data = *input_data + lambda;
|
||||
else
|
||||
*output_data = 0;
|
||||
);
|
||||
}
|
||||
|
||||
void THNN_(SoftShrink_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
real lambda)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
|
||||
if ((*input_data) > lambda || (*input_data) < -lambda)
|
||||
*gradInput_data = (*gradOutput_data);
|
||||
else
|
||||
*gradInput_data = 0;
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
550
torch/lib/THNN/generic/SparseLinear.c
Normal file
550
torch/lib/THNN/generic/SparseLinear.c
Normal file
|
|
@ -0,0 +1,550 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SparseLinear.c"
|
||||
#else
|
||||
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#define ROW_PTR2(t, r) (THTensor_(data)(t) + (r) * (t)->stride[0])
|
||||
#define COL_PTR2(t, c) (THTensor_(data)(t) + (c) * (t)->stride[1])
|
||||
|
||||
static bool THNN_(checkLegacyInput)(THTensor* t)
|
||||
{
|
||||
return t->nDimension == 3 && t->size[2] == 2;
|
||||
}
|
||||
|
||||
static bool THNN_(checkInput)(THTensor* t)
|
||||
{
|
||||
return t->nDimension == 2 && t->size[1] == 3;
|
||||
}
|
||||
|
||||
static bool THNN_(checkSize2D)(THTensor* t, long size0, long size1)
|
||||
{
|
||||
return t->nDimension == 2 && t->size[0] == size0 && t->size[1] == size1;
|
||||
}
|
||||
|
||||
static bool THNN_(checkSize1D)(THTensor* t, long size0)
|
||||
{
|
||||
return t->nDimension == 1 && t->size[0] == size0;
|
||||
}
|
||||
|
||||
static void THNN_(set1d)(THTensor *t, long x0, real value) {
|
||||
THStorage_(set)(t->storage, t->storageOffset + x0*t->stride[0], value);
|
||||
}
|
||||
static real THNN_(get3d)(const THTensor *t, long x0, long x1, long x2) {
|
||||
return THStorage_(get)(t->storage, t->storageOffset +
|
||||
x0*t->stride[0] + x1*t->stride[1] + x2*t->stride[2]);
|
||||
}
|
||||
static real THNN_(get2d)(const THTensor *t, long x0, long x1) {
|
||||
return THStorage_(get)(t->storage, t->storageOffset +
|
||||
x0*t->stride[0] + x1*t->stride[1]);
|
||||
}
|
||||
|
||||
void THNN_(SparseLinear_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias)
|
||||
{
|
||||
long h, i, j, hp0, hp1;
|
||||
long outDim = THTensor_(size)(weight, 0);
|
||||
long inDim = THTensor_(size)(weight, 1);
|
||||
long batchSize = THTensor_(size)(output, 0);
|
||||
|
||||
THArgCheck(THNN_(checkInput)(input), 2, "input must be in coo format, nnz x 3");
|
||||
THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
|
||||
THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong");
|
||||
|
||||
long nnz = THTensor_(size)(input, 0);
|
||||
|
||||
THLongTensor * csr = THLongTensor_newWithSize1d(batchSize+1);
|
||||
THLongTensor_zero(csr);
|
||||
|
||||
//#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000)
|
||||
for (i=0; i<nnz; i++) {
|
||||
hp0 = (long)(THNN_(get2d)(input, i, 0)) - 1;
|
||||
hp1 = (i+1 == nnz) ?
|
||||
batchSize :
|
||||
(long)(THNN_(get2d)(input, i+1, 0)) - 1;
|
||||
if (hp0 != hp1) for (h = hp0; h < hp1; h++) {
|
||||
THLongTensor_set1d(csr, h+1, i+1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// output = weight * input + bias
|
||||
THTensor_(zero)(output);
|
||||
#pragma omp parallel for private(h, i) schedule(static) if (nnz > 10000)
|
||||
for (h = 0; h < batchSize; h++) {
|
||||
long i_start = THLongTensor_get1d(csr, h);
|
||||
long i_end = THLongTensor_get1d(csr, h+1);
|
||||
for (i = i_start; i < i_end; i++) {
|
||||
real val = THNN_(get2d)(input, i, 2);
|
||||
if (val == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
|
||||
if (offset >= 0 && offset < inDim) {
|
||||
THBlas_(axpy)(outDim,
|
||||
val,
|
||||
COL_PTR2(weight, offset), weight->stride[0],
|
||||
ROW_PTR2(output, h), output->stride[1]);
|
||||
} else {
|
||||
THError("index out of bound. updateOutput: %d not between 1 and %d",
|
||||
offset + 1, inDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
THTensor* output_row = THTensor_(new)();
|
||||
for (h = 0; h < batchSize; h++) {
|
||||
THTensor_(select)(output_row, output, 0, h);
|
||||
THTensor_(cadd)(output_row, bias, 1.0, output_row);
|
||||
}
|
||||
THTensor_(free)(output_row);
|
||||
THLongTensor_free(csr);
|
||||
}
|
||||
|
||||
void THNN_(SparseLinear_legacyUpdateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias)
|
||||
{
|
||||
long h, i;
|
||||
long outDim = THTensor_(size)(weight, 0);
|
||||
long inDim = THTensor_(size)(weight, 1);
|
||||
|
||||
THArgCheck(THNN_(checkLegacyInput)(input), 2, "input size must be batchsize x nnz x 2");
|
||||
THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
|
||||
THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong");
|
||||
|
||||
long batchSize = THTensor_(size)(input, 0);
|
||||
long nnz = THTensor_(size)(input, 1);
|
||||
THTensor_(resize2d)(output, batchSize, outDim);
|
||||
|
||||
// output = weight * input + bias
|
||||
THTensor_(zero)(output);
|
||||
#pragma omp parallel for private(h, i) schedule(static) if ( \
|
||||
batchSize > 1 && batchSize * nnz * outDim > 10000)
|
||||
for (h = 0; h < batchSize; h++) {
|
||||
for (i = 0; i < nnz; i++) {
|
||||
real val = THNN_(get3d)(input, h, i, 1);
|
||||
if (val == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
long offset = (long)(THNN_(get3d)(input, h, i, 0)) - 1;
|
||||
if (offset >= 0 && offset < inDim) {
|
||||
THBlas_(axpy)(outDim,
|
||||
val,
|
||||
COL_PTR2(weight, offset), weight->stride[0],
|
||||
ROW_PTR2(output, h), output->stride[1]);
|
||||
} else {
|
||||
THError("index out of bound. updateOutput: %d not between 1 and %d",
|
||||
offset + 1, inDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
THTensor* output_row = THTensor_(new)();
|
||||
for (h = 0; h < batchSize; h++) {
|
||||
THTensor_(select)(output_row, output, 0, h);
|
||||
THTensor_(cadd)(output_row, bias, 1.0, output_row);
|
||||
}
|
||||
THTensor_(free)(output_row);
|
||||
}
|
||||
|
||||
void THNN_(SparseLinear_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
real weightDecay,
|
||||
real scale)
|
||||
{
|
||||
long h, i, col, hp0, hp1;
|
||||
long outDim = THTensor_(size)(weight, 0);
|
||||
long inDim = THTensor_(size)(weight, 1);
|
||||
|
||||
THArgCheck(THNN_(checkInput)(input), 2,
|
||||
"input must be in coo format, nnz x 3");
|
||||
THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
|
||||
"gradWeight size wrong");
|
||||
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5,
|
||||
"gradBias size wrong");
|
||||
THArgCheck(THTensor_(isContiguous)(gradOutput), 1,
|
||||
"gradOutput must be contiguous");
|
||||
|
||||
long nnz = THTensor_(size)(input, 0);
|
||||
|
||||
THLongTensor* csc = THLongTensor_newWithSize1d(inDim+1);
|
||||
THLongTensor_zero(csc);
|
||||
|
||||
#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000)
|
||||
for (i = 0; i < nnz; i++) {
|
||||
hp0 = (long)(THNN_(get2d)(input, i, 1)) - 1;
|
||||
hp1 = (i+1 == nnz) ?
|
||||
inDim :
|
||||
(long)(THNN_(get2d)(input, i+1, 1)) - 1;
|
||||
if (hp0 != hp1) for (h = hp0; h < hp1; h++) {
|
||||
THLongTensor_set1d(csc, h+1, i+1);
|
||||
}
|
||||
}
|
||||
|
||||
// gradWeight += gradOutput * input
|
||||
#pragma omp parallel for private(h, i, col) schedule(static) if (nnz > 10000)
|
||||
for (col = 0; col < inDim; col++) {
|
||||
long i_start = THLongTensor_get1d(csc, col);
|
||||
long i_end = THLongTensor_get1d(csc, col+1);
|
||||
for (i = i_start; i < i_end; i++) {
|
||||
real val = scale * THNN_(get2d)(input, i, 2);
|
||||
|
||||
h = (long)(THNN_(get2d)(input, i, 0)) - 1;
|
||||
long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
|
||||
if (offset >= 0 && offset < inDim) {
|
||||
THBlas_(axpy)(outDim,
|
||||
val,
|
||||
ROW_PTR2(gradOutput, h), gradOutput->stride[1],
|
||||
COL_PTR2(gradWeight, offset), gradWeight->stride[0]);
|
||||
} else {
|
||||
THError(
|
||||
"index out of bound. accGradParameters: %d not between 1 and %d",
|
||||
offset + 1,
|
||||
inDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// gradBias += gradOutput
|
||||
THTensor* buf = THTensor_(new)();
|
||||
THTensor_(sum)(buf, gradOutput, 0);
|
||||
THTensor_(cadd)(gradBias, gradBias, scale, buf);
|
||||
THTensor_(free)(buf);
|
||||
THLongTensor_free(csc);
|
||||
|
||||
if (weightDecay != 0) {
|
||||
THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SparseLinear_legacyAccGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
real weightDecay,
|
||||
real scale)
|
||||
{
|
||||
long h, i;
|
||||
long outDim = THTensor_(size)(weight, 0);
|
||||
long inDim = THTensor_(size)(weight, 1);
|
||||
|
||||
THArgCheck(THNN_(checkLegacyInput)(input), 2,
|
||||
"input size must be batchsize x nnz x 2");
|
||||
THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
|
||||
"gradWeight size wrong");
|
||||
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5,
|
||||
"gradBias size wrong");
|
||||
THArgCheck(THTensor_(isContiguous)(gradOutput), 1,
|
||||
"gradOutput must be contiguous");
|
||||
|
||||
long batchSize = THTensor_(size)(input, 0);
|
||||
long nnz = THTensor_(size)(input, 1);
|
||||
THTensor_(resize2d)(gradOutput, batchSize, outDim);
|
||||
|
||||
// gradWeight += gradOutput * input
|
||||
#pragma omp parallel for private(h, i) schedule(static) if (\
|
||||
batchSize * nnz * outDim > 10000)
|
||||
for (i = 0; i < nnz; i++) {
|
||||
for (h = 0; h < batchSize; h++) {
|
||||
real val = scale * THNN_(get3d)(input, h, i, 1);
|
||||
if (val == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
long offset = (long)(THNN_(get3d)(input, h, i, 0)) - 1;
|
||||
if (offset >= 0 && offset < inDim) {
|
||||
THBlas_(axpy)(outDim,
|
||||
val,
|
||||
ROW_PTR2(gradOutput, h), gradOutput->stride[1],
|
||||
COL_PTR2(gradWeight, offset), gradWeight->stride[0]);
|
||||
} else {
|
||||
THError(
|
||||
"index out of bound. accGradParameters: %d not between 1 and %d",
|
||||
offset + 1,
|
||||
inDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// gradBias += gradOutput
|
||||
THTensor* gradOutput_row = THTensor_(new)();
|
||||
for (h = 0; h < batchSize; h++) {
|
||||
THTensor_(select)(gradOutput_row, gradOutput, 0, h);
|
||||
THTensor_(cadd)(gradBias, gradBias, scale, gradOutput_row);
|
||||
}
|
||||
THTensor_(free)(gradOutput_row);
|
||||
|
||||
if (weightDecay != 0) {
|
||||
THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SparseLinear_updateParameters)(
|
||||
THNNState *state,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *lastInput,
|
||||
real learningRate)
|
||||
{
|
||||
long h, i;
|
||||
long outDim = weight->size[0];
|
||||
long inDim = weight->size[1];
|
||||
|
||||
THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
|
||||
"gradWeight size wrong");
|
||||
THArgCheck(THNN_(checkSize1D)(bias, outDim), 3, "bias size wrong");
|
||||
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, "gradBias size wrong");
|
||||
THArgCheck(THNN_(checkInput)(lastInput), 6,
|
||||
"input must be in coo format, nnz x 3");
|
||||
|
||||
|
||||
long nnz = THTensor_(size)(lastInput, 0);
|
||||
|
||||
// collect unique offsets of non-0 val in input
|
||||
THTensor* offsets = THTensor_(newWithSize1d)(nnz);
|
||||
long cnt = 0;
|
||||
for (i = 0; i < nnz; i++) {
|
||||
real val = THNN_(get2d)(lastInput, i, 2);
|
||||
if (val == 0) {
|
||||
continue;
|
||||
}
|
||||
long offset = (long)(THNN_(get2d)(lastInput, i, 1)) - 1;
|
||||
if (offset >= 0 && offset < inDim) {
|
||||
THNN_(set1d)(offsets, cnt++, offset);
|
||||
} else {
|
||||
THError(
|
||||
"index out of bound. updateParameters: %d not between 1 and %d",
|
||||
offset + 1,
|
||||
inDim);
|
||||
}
|
||||
}
|
||||
if (cnt == 0) return;
|
||||
THTensor_(resize1d)(offsets, cnt);
|
||||
|
||||
THTensor* uniqueOffsets = THTensor_(new)();
|
||||
THLongTensor* ri = THLongTensor_new();
|
||||
THTensor_(sort)(uniqueOffsets, ri, offsets, 0, 0);
|
||||
THLongTensor_free(ri);
|
||||
THTensor_(free)(offsets);
|
||||
|
||||
cnt = 1;
|
||||
real* uniqueOffsets_p = THTensor_(data)(uniqueOffsets);
|
||||
for (i = 1; i < THTensor_(size)(uniqueOffsets, 0); i++) {
|
||||
if (uniqueOffsets_p[i] != uniqueOffsets_p[i - 1]) {
|
||||
uniqueOffsets_p[cnt++] = uniqueOffsets_p[i];
|
||||
}
|
||||
}
|
||||
THTensor_(resize1d)(uniqueOffsets, cnt);
|
||||
|
||||
// weight += -learningRate * gradWeight
|
||||
THTensor_(cadd)(bias, bias, -learningRate, gradBias);
|
||||
#pragma omp parallel for private(i) schedule(static) if (cnt * outDim > 10000)
|
||||
for (i = 0; i < cnt; i++) {
|
||||
long offset = (long)uniqueOffsets_p[i];
|
||||
THBlas_(axpy)(outDim,
|
||||
-learningRate,
|
||||
COL_PTR2(gradWeight, offset), gradWeight->stride[0],
|
||||
COL_PTR2(weight, offset), weight->stride[0]);
|
||||
}
|
||||
|
||||
THTensor_(free)(uniqueOffsets);
|
||||
}
|
||||
|
||||
void THNN_(SparseLinear_legacyUpdateParameters)(
|
||||
THNNState *state,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *lastInput,
|
||||
real learningRate)
|
||||
{
|
||||
long h, i;
|
||||
long outDim = weight->size[0];
|
||||
long inDim = weight->size[1];
|
||||
|
||||
THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
|
||||
"gradWeight size wrong");
|
||||
THArgCheck(THNN_(checkSize1D)(bias, outDim), 3, "bias size wrong");
|
||||
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, "gradBias size wrong");
|
||||
THArgCheck(THNN_(checkLegacyInput)(lastInput), 6,
|
||||
"input size must be batchsize x nnz x 2");
|
||||
|
||||
|
||||
long batchSize = THTensor_(size)(lastInput, 0);
|
||||
long nnz = THTensor_(size)(lastInput, 1);
|
||||
|
||||
// collect unique offsets of non-0 val in input
|
||||
THTensor* offsets = THTensor_(newWithSize1d)(batchSize * nnz);
|
||||
long cnt = 0;
|
||||
for (h = 0; h < batchSize; h++) {
|
||||
for (i = 0; i < nnz; i++) {
|
||||
real val = THNN_(get3d)(lastInput, h, i, 1);
|
||||
if (val == 0 ) {
|
||||
continue;
|
||||
}
|
||||
long offset = (long)(THNN_(get3d)(lastInput, h, i, 0)) - 1;
|
||||
if (offset >= 0 && offset < inDim) {
|
||||
THNN_(set1d)(offsets, cnt++, offset);
|
||||
} else {
|
||||
THError(
|
||||
"index out of bound. updateParameters: %d not between 1 and %d",
|
||||
offset + 1,
|
||||
inDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
THTensor_(resize1d)(offsets, cnt);
|
||||
|
||||
THTensor* uniqueOffsets = THTensor_(new)();
|
||||
THLongTensor* ri = THLongTensor_new();
|
||||
THTensor_(sort)(uniqueOffsets, ri, offsets, 0, 0);
|
||||
THLongTensor_free(ri);
|
||||
THTensor_(free)(offsets);
|
||||
|
||||
cnt = 1;
|
||||
real* uniqueOffsets_p = THTensor_(data)(uniqueOffsets);
|
||||
for (i = 1; i < THTensor_(size)(uniqueOffsets, 0); i++) {
|
||||
if (uniqueOffsets_p[i] != uniqueOffsets_p[i - 1]) {
|
||||
uniqueOffsets_p[cnt++] = uniqueOffsets_p[i];
|
||||
}
|
||||
}
|
||||
THTensor_(resize1d)(uniqueOffsets, cnt);
|
||||
|
||||
// weight += -learningRate * gradWeight
|
||||
THTensor_(cadd)(bias, bias, -learningRate, gradBias);
|
||||
#pragma omp parallel for private(i) schedule(static) if (cnt * outDim > 10000)
|
||||
for (i = 0; i < cnt; i++) {
|
||||
long offset = (long)uniqueOffsets_p[i];
|
||||
THBlas_(axpy)(outDim,
|
||||
-learningRate,
|
||||
COL_PTR2(gradWeight, offset), gradWeight->stride[0],
|
||||
COL_PTR2(weight, offset), weight->stride[0]);
|
||||
}
|
||||
|
||||
THTensor_(free)(uniqueOffsets);
|
||||
}
|
||||
|
||||
void THNN_(SparseLinear_zeroGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *lastInput)
|
||||
{
|
||||
long h, i, j;
|
||||
|
||||
long outDim = gradWeight->size[0];
|
||||
long inDim = gradWeight->size[1];
|
||||
|
||||
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 3, "gradBias size wrong");
|
||||
THArgCheck(THNN_(checkInput)(lastInput), 4,
|
||||
"input must be in coo format, nnz x 3");
|
||||
|
||||
THTensor_(zero)(gradBias);
|
||||
|
||||
long nnz = THTensor_(size)(lastInput, 0);
|
||||
|
||||
#pragma omp parallel for private(i, j) schedule(static) if ( \
|
||||
nnz * outDim > 10000)
|
||||
for (i = 0; i < nnz; i++) {
|
||||
if (THNN_(get2d)(lastInput, i, 2) == 0 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
long offset = (long)(THNN_(get2d)(lastInput, i, 1)) - 1;
|
||||
if (offset >= 0 && offset < inDim) {
|
||||
real* pGradWeight = COL_PTR2(gradWeight, offset);
|
||||
if (gradWeight->stride[0] == 1) {
|
||||
THVector_(fill)(pGradWeight, 0, outDim);
|
||||
} else {
|
||||
long stride = gradWeight->stride[0];
|
||||
for (j = 0; j < outDim; ++j) {
|
||||
pGradWeight[j * stride] = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
THError(
|
||||
"index out of bound. zeroGradParameters: %d not between 1 and %d",
|
||||
offset + 1,
|
||||
inDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SparseLinear_legacyZeroGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *lastInput)
|
||||
{
|
||||
long h, i, j;
|
||||
|
||||
long outDim = gradWeight->size[0];
|
||||
long inDim = gradWeight->size[1];
|
||||
|
||||
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 3, "gradBias size wrong");
|
||||
THArgCheck(THNN_(checkLegacyInput)(lastInput), 4,
|
||||
"input size must be batchsize x nnz x 2");
|
||||
|
||||
THTensor_(zero)(gradBias);
|
||||
|
||||
long batchSize = THTensor_(size)(lastInput, 0);
|
||||
long nnz = THTensor_(size)(lastInput, 1);
|
||||
|
||||
#pragma omp parallel for private(h, i, j) schedule(static) if ( \
|
||||
batchSize > 1 && batchSize * nnz * outDim > 10000)
|
||||
for (h = 0; h < batchSize; h++) {
|
||||
for (i = 0; i < nnz; i++) {
|
||||
if (THNN_(get3d)(lastInput, h, i, 1) == 0 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
long offset = (long)(THNN_(get3d)(lastInput, h, i, 0)) - 1;
|
||||
if (offset >= 0 && offset < inDim) {
|
||||
real* pGradWeight = COL_PTR2(gradWeight, offset);
|
||||
if (gradWeight->stride[0] == 1) {
|
||||
THVector_(fill)(pGradWeight, 0, outDim);
|
||||
} else {
|
||||
long stride = gradWeight->stride[0];
|
||||
for (j = 0; j < outDim; ++j) {
|
||||
pGradWeight[j * stride] = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
THError(
|
||||
"index out of bound. zeroGradParameters: %d not between 1 and %d",
|
||||
offset + 1,
|
||||
inDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef ROW_PTR2
|
||||
#undef COL_PTR2
|
||||
|
||||
#endif
|
||||
274
torch/lib/THNN/generic/SpatialAdaptiveMaxPooling.c
Normal file
274
torch/lib/THNN/generic/SpatialAdaptiveMaxPooling.c
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialAdaptiveMaxPooling.c"
|
||||
#else
|
||||
|
||||
static void THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(
|
||||
real *input_p,
|
||||
real *output_p,
|
||||
real *indx_p,
|
||||
real *indy_p,
|
||||
long nslices,
|
||||
long iwidth,
|
||||
long iheight,
|
||||
long owidth,
|
||||
long oheight,
|
||||
long stridew,
|
||||
long strideh,
|
||||
long strided)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
/* loop over output */
|
||||
long i, j;
|
||||
for(i = 0; i < oheight; i++)
|
||||
{
|
||||
int y_start = (int)floor((float)i / oheight * iheight);
|
||||
int y_end = (int)ceil((float)(i + 1) / oheight * iheight);
|
||||
int kH = y_end-y_start;
|
||||
|
||||
for(j = 0; j < owidth; j++)
|
||||
{
|
||||
|
||||
int x_start = (int)floor((float)j / owidth * iwidth);
|
||||
int x_end = (int)ceil((float)(j + 1) / owidth * iwidth);
|
||||
int kW = x_end-x_start;
|
||||
|
||||
/* local pointers */
|
||||
real *ip = input_p + k*strided + y_start*strideh + x_start*stridew;
|
||||
real *op = output_p + k*owidth*oheight + i*owidth + j;
|
||||
real *indyp = indy_p + k*owidth*oheight + i*owidth + j;
|
||||
real *indxp = indx_p + k*owidth*oheight + i*owidth + j;
|
||||
|
||||
/* compute local max: */
|
||||
long maxindex = -1;
|
||||
real maxval = -FLT_MAX;
|
||||
long tcntr = 0;
|
||||
int x,y;
|
||||
for(y = 0; y < kH; y++)
|
||||
{
|
||||
for(x = 0; x < kW; x++)
|
||||
{
|
||||
real val = *(ip + y*strideh + x*stridew);
|
||||
if (val > maxval)
|
||||
{
|
||||
maxval = val;
|
||||
maxindex = tcntr;
|
||||
}
|
||||
tcntr++;
|
||||
}
|
||||
}
|
||||
|
||||
/* set output to local max */
|
||||
*op = maxval;
|
||||
|
||||
/* store location of max (x,y) */
|
||||
*indyp = (int)(maxindex / kW) + TH_INDEX_BASE;
|
||||
*indxp = (maxindex % kW) + TH_INDEX_BASE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialAdaptiveMaxPooling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *indices,
|
||||
int owidth,
|
||||
int oheight)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
long nbatch = 1;
|
||||
long nslices;
|
||||
long iheight;
|
||||
long iwidth;
|
||||
|
||||
long istride_d;
|
||||
long istride_h;
|
||||
long istride_w;
|
||||
long istride_b;
|
||||
|
||||
real *input_data;
|
||||
real *output_data;
|
||||
real *indices_data;
|
||||
|
||||
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
|
||||
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
istride_b = input->stride[0];
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimh-1];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
/* strides */
|
||||
istride_d = input->stride[dimh-1];
|
||||
istride_h = input->stride[dimh];
|
||||
istride_w = input->stride[dimw];
|
||||
|
||||
/* resize output */
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
THTensor_(resize3d)(output, nslices, oheight, owidth);
|
||||
/* indices will contain i,j locations for each output point */
|
||||
THTensor_(resize4d)(indices, 2, nslices, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data,
|
||||
indices_data+nslices*owidth*oheight, indices_data,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
istride_w,istride_h,
|
||||
istride_d);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
|
||||
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
|
||||
/* indices will contain i,j locations for each output point */
|
||||
THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight,
|
||||
indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
istride_w,istride_h,
|
||||
istride_d);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(
|
||||
real *gradInput_p,
|
||||
real *gradOutput_p,
|
||||
real *indx_p,
|
||||
real *indy_p,
|
||||
long nslices,
|
||||
long iwidth,
|
||||
long iheight,
|
||||
long owidth,
|
||||
long oheight)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
|
||||
real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
|
||||
real *indx_p_k = indx_p + k*owidth*oheight;
|
||||
real *indy_p_k = indy_p + k*owidth*oheight;
|
||||
|
||||
/* calculate max points */
|
||||
long i, j;
|
||||
for(i = 0; i < oheight; i++)
|
||||
{
|
||||
int y_start = (int)floor((float) i / oheight * iheight);
|
||||
for(j = 0; j < owidth; j++)
|
||||
{
|
||||
int x_start = (int)floor((float) j / owidth * iwidth);
|
||||
/* retrieve position of max */
|
||||
long maxi = indy_p_k[i*owidth + j] - TH_INDEX_BASE + y_start;
|
||||
long maxj = indx_p_k[i*owidth + j] - TH_INDEX_BASE + x_start;
|
||||
|
||||
/* update gradient */
|
||||
gradInput_p_k[maxi*iwidth + maxj] += gradOutput_p_k[i*owidth + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialAdaptiveMaxPooling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *indices)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
long nbatch = 1;
|
||||
int nslices;
|
||||
int iheight;
|
||||
int iwidth;
|
||||
int oheight;
|
||||
int owidth;
|
||||
real *gradInput_data;
|
||||
real *gradOutput_data;
|
||||
real *indices_data;
|
||||
|
||||
/* get contiguous gradOutput */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* resize */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
if (input->nDimension == 4) {
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimh-1];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
oheight = gradOutput->size[dimh];
|
||||
owidth = gradOutput->size[dimw];
|
||||
|
||||
/* get raw pointers */
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
/* backprop */
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
|
||||
indices_data+nslices*owidth*oheight, indices_data,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
|
||||
indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
258
torch/lib/THNN/generic/SpatialAveragePooling.c
Normal file
258
torch/lib/THNN/generic/SpatialAveragePooling.c
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialAveragePooling.c"
|
||||
#else
|
||||
|
||||
void THNN_(SpatialAveragePooling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH,
|
||||
bool ceil_mode,
|
||||
bool count_include_pad)
|
||||
{
|
||||
real *output_data;
|
||||
real *input_data;
|
||||
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
int dimc = 0;
|
||||
long nbatch = 1;
|
||||
|
||||
long inputWidth;
|
||||
long inputHeight;
|
||||
long outputWidth;
|
||||
long outputHeight;
|
||||
long nInputPlane; // number of channels (or colors)
|
||||
|
||||
long k;
|
||||
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
|
||||
THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size");
|
||||
|
||||
if (input->nDimension == 4) {
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
dimc++;
|
||||
}
|
||||
|
||||
inputWidth = input->size[dimw];
|
||||
inputHeight = input->size[dimh];
|
||||
nInputPlane = input->size[dimc];
|
||||
|
||||
if(ceil_mode)
|
||||
{
|
||||
outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1;
|
||||
outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1;
|
||||
outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1;
|
||||
}
|
||||
if (padW || padH)
|
||||
{
|
||||
// ensure that the last pooling starts inside the image
|
||||
// needed to avoid problems in ceil mode
|
||||
if ((outputHeight - 1)*dH >= inputHeight + padH)
|
||||
--outputHeight;
|
||||
if ((outputWidth - 1)*dW >= inputWidth + padW)
|
||||
--outputWidth;
|
||||
}
|
||||
|
||||
THArgCheck(inputWidth >= kW - 2 * padW && inputHeight >= kH - 2 * padH, 2, "input image smaller than kernel size");
|
||||
|
||||
if (input->nDimension == 3)
|
||||
THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
|
||||
else
|
||||
THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth);
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
#pragma omp parallel for private(k)
|
||||
for(k = 0; k < nInputPlane; k++)
|
||||
{
|
||||
long p;
|
||||
for(p = 0; p < nbatch; p++)
|
||||
{
|
||||
long xx, yy;
|
||||
/* For all output pixels... */
|
||||
real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight;
|
||||
real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
|
||||
long i;
|
||||
for(i = 0; i < outputWidth*outputHeight; i++)
|
||||
ptr_output[i] = 0;
|
||||
|
||||
for(yy = 0; yy < outputHeight; yy++)
|
||||
{
|
||||
for(xx = 0; xx < outputWidth; xx++)
|
||||
{
|
||||
/* Compute the mean of the input image... */
|
||||
long hstart = yy * dH - padH;
|
||||
long wstart = xx * dW - padW;
|
||||
long hend = fminf(hstart + kH, inputHeight + padH);
|
||||
long wend = fminf(wstart + kW, inputWidth + padW);
|
||||
int pool_size = (hend - hstart) * (wend - wstart);
|
||||
hstart = fmaxf(hstart, 0);
|
||||
wstart = fmaxf(wstart, 0);
|
||||
hend = fminf(hend, inputHeight);
|
||||
wend = fminf(wend, inputWidth);
|
||||
|
||||
real sum = 0;
|
||||
|
||||
int divide_factor;
|
||||
if(count_include_pad)
|
||||
divide_factor = pool_size;
|
||||
else
|
||||
divide_factor = (hend - hstart) * (wend - wstart);
|
||||
|
||||
long kx, ky;
|
||||
|
||||
for(ky = hstart; ky < hend; ky++)
|
||||
{
|
||||
for(kx = wstart; kx < wend; kx++)
|
||||
sum += ptr_input[ky*inputWidth + kx];
|
||||
}
|
||||
/* Update output */
|
||||
*ptr_output++ += sum/divide_factor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
void THNN_(SpatialAveragePooling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH,
|
||||
bool ceil_mode,
|
||||
bool count_include_pad)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
int dimc = 0;
|
||||
long nbatch = 1;
|
||||
|
||||
long inputWidth;
|
||||
long inputHeight;
|
||||
long outputWidth;
|
||||
long outputHeight;
|
||||
long nInputPlane; // number of channels (or colors)
|
||||
|
||||
real *gradOutput_data;
|
||||
real *input_data, *gradInput_data;
|
||||
|
||||
long k;
|
||||
|
||||
if (input->nDimension == 4) {
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
dimc++;
|
||||
}
|
||||
|
||||
inputWidth = input->size[dimw];
|
||||
inputHeight = input->size[dimh];
|
||||
nInputPlane = input->size[dimc];
|
||||
|
||||
if(ceil_mode)
|
||||
{
|
||||
outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1;
|
||||
outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1;
|
||||
outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1;
|
||||
}
|
||||
if (padW || padH)
|
||||
{
|
||||
// ensure that the last pooling starts inside the image
|
||||
// needed to avoid problems in ceil mode
|
||||
if ((outputHeight - 1)*dH >= inputHeight + padH)
|
||||
--outputHeight;
|
||||
if ((outputWidth - 1)*dW >= inputWidth + padW)
|
||||
--outputWidth;
|
||||
}
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
THArgCheck(THTensor_(isContiguous)(gradInput), 4, "gradInput must be contiguous");
|
||||
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
|
||||
#pragma omp parallel for private(k)
|
||||
for(k = 0; k < nInputPlane; k++)
|
||||
{
|
||||
long p;
|
||||
for(p = 0; p < nbatch; p++)
|
||||
{
|
||||
real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
|
||||
long xx, yy;
|
||||
|
||||
real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
|
||||
real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
|
||||
|
||||
long i;
|
||||
for(i=0; i<inputWidth*inputHeight; i++)
|
||||
ptr_gi[i] = 0.0;
|
||||
|
||||
for(yy = 0; yy < outputHeight; yy++)
|
||||
{
|
||||
for(xx = 0; xx < outputWidth; xx++)
|
||||
{
|
||||
long hstart = yy * dH - padH;
|
||||
long wstart = xx * dW - padW;
|
||||
long hend = fminf(hstart + kH, inputHeight + padH);
|
||||
long wend = fminf(wstart + kW, inputWidth + padW);
|
||||
int pool_size = (hend - hstart) * (wend - wstart);
|
||||
hstart = fmaxf(hstart, 0);
|
||||
wstart = fmaxf(wstart, 0);
|
||||
hend = fminf(hend, inputHeight);
|
||||
wend = fminf(wend, inputWidth);
|
||||
|
||||
real z = *ptr_gradOutput++;
|
||||
|
||||
int divide_factor;
|
||||
if(count_include_pad)
|
||||
divide_factor = pool_size;
|
||||
else
|
||||
divide_factor = (hend - hstart) * (wend - wstart);
|
||||
|
||||
long kx, ky;
|
||||
for(ky = hstart ; ky < hend; ky++)
|
||||
{
|
||||
for(kx = wstart; kx < wend; kx++)
|
||||
ptr_gradInput[ky*inputWidth + kx] += z/divide_factor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
THTensor_(free)(input);
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
128
torch/lib/THNN/generic/SpatialClassNLLCriterion.c
Normal file
128
torch/lib/THNN/generic/SpatialClassNLLCriterion.c
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialClassNLLCriterion.c"
|
||||
#else
|
||||
|
||||
#define INITIAL_CHECK \
|
||||
THArgCheck(THIndexTensor_(nDimension)(target) == 3, 3, \
|
||||
"only batches of spatial targets supported (3D tensors)"); \
|
||||
THArgCheck(THTensor_(nDimension)(input) == 4, 2, \
|
||||
"only batches of spatial inputs supported (4D tensors)"); \
|
||||
if (weights && THTensor_(nElement)(weights) != THTensor_(size)(input, 1)) { \
|
||||
THError("weight tensor should be defined either for all or no classes"); \
|
||||
} \
|
||||
\
|
||||
{ \
|
||||
long input0 = THTensor_(size)(input, 0); \
|
||||
long input1 = THTensor_(size)(input, 1); \
|
||||
long input2 = THTensor_(size)(input, 2); \
|
||||
long input3 = THTensor_(size)(input, 3); \
|
||||
long target0 = THIndexTensor_(size)(target, 0); \
|
||||
long target1 = THIndexTensor_(size)(target, 1); \
|
||||
long target2 = THIndexTensor_(size)(target, 2); \
|
||||
THAssertMsg(input0 == target0 && input2 == target1 && input3 == target2, \
|
||||
"size mismatch (got input: %ldx%ldx%ldx%ld, target: %ldx%ldx%ld)", \
|
||||
input0, input1, input2, input3, target0, target1, target2); \
|
||||
}
|
||||
|
||||
void THNN_(SpatialClassNLLCriterion_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THIndexTensor *target,
|
||||
THTensor *output,
|
||||
bool sizeAverage,
|
||||
THTensor *weights,
|
||||
THTensor *total_weight)
|
||||
{
|
||||
INITIAL_CHECK;
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
target = THIndexTensor_(newContiguous)(target);
|
||||
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
|
||||
|
||||
real *input_data = THTensor_(data)(input);
|
||||
THIndex_t *target_data = THIndexTensor_(data)(target);
|
||||
real *weights_data = weights ? THTensor_(data)(weights) : NULL;
|
||||
real *output_data = THTensor_(data)(output);
|
||||
real *total_weight_data = THTensor_(data)(total_weight);
|
||||
|
||||
long batch_size = THTensor_(size)(input, 0);
|
||||
long n_classes = THTensor_(size)(input, 1);
|
||||
long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3);
|
||||
long sample_size = map_size * n_classes;
|
||||
|
||||
real total_weight_acc = 0;
|
||||
real output_acc = 0;
|
||||
for (int b = 0; b < batch_size; b++) {
|
||||
for (int elem = 0; elem < map_size; elem++) {
|
||||
int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE;
|
||||
THAssert(cur_target >= 0 && cur_target < n_classes);
|
||||
|
||||
real cur_weight = weights ? weights_data[cur_target] : 1.0f;
|
||||
total_weight_acc += cur_weight;
|
||||
output_acc -= input_data[b * sample_size + cur_target * map_size + elem] * cur_weight;
|
||||
}
|
||||
}
|
||||
*total_weight_data = total_weight_acc;
|
||||
*output_data = output_acc;
|
||||
|
||||
if (sizeAverage && *total_weight_data)
|
||||
*output_data /= *total_weight_data;
|
||||
|
||||
THTensor_(free)(input);
|
||||
THIndexTensor_(free)(target);
|
||||
if (weights)
|
||||
THTensor_(free)(weights);
|
||||
}
|
||||
|
||||
void THNN_(SpatialClassNLLCriterion_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THIndexTensor *target,
|
||||
THTensor *gradInput,
|
||||
bool sizeAverage,
|
||||
THTensor *weights,
|
||||
THTensor *total_weight)
|
||||
{
|
||||
INITIAL_CHECK;
|
||||
THArgCheck(THTensor_(isContiguous)(gradInput), 4,
|
||||
"gradInput must be contiguous");
|
||||
|
||||
real *total_weight_data = THTensor_(data)(total_weight);
|
||||
if (*total_weight_data <= 0)
|
||||
return;
|
||||
|
||||
target = THIndexTensor_(newContiguous)(target);
|
||||
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
|
||||
|
||||
THIndex_t *target_data = THIndexTensor_(data)(target);
|
||||
real *weights_data = weights ? THTensor_(data)(weights) : NULL;
|
||||
real *gradInput_data = THTensor_(data)(gradInput);
|
||||
|
||||
long batch_size = THTensor_(size)(input, 0);
|
||||
long n_classes = THTensor_(size)(input, 1);
|
||||
long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3);
|
||||
long sample_size = map_size * n_classes;
|
||||
|
||||
real normalize = sizeAverage ? *total_weight_data : 1.0f;
|
||||
|
||||
int b;
|
||||
#pragma omp parallel for
|
||||
for (b = 0; b < batch_size; b++) {
|
||||
int elem;
|
||||
for (elem = 0; elem < map_size; elem++) {
|
||||
int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE;
|
||||
THAssert(cur_target >= 0 && cur_target < n_classes);
|
||||
|
||||
gradInput_data[b * sample_size + cur_target * map_size + elem] =
|
||||
-(weights ? weights_data[cur_target] : 1.0f) / normalize;
|
||||
}
|
||||
}
|
||||
|
||||
THIndexTensor_(free)(target);
|
||||
if (weights)
|
||||
THTensor_(free)(weights);
|
||||
}
|
||||
|
||||
#undef INITIAL_CHECK
|
||||
|
||||
#endif
|
||||
241
torch/lib/THNN/generic/SpatialConvolutionLocal.c
Normal file
241
torch/lib/THNN/generic/SpatialConvolutionLocal.c
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialConvolutionLocal.c"
|
||||
#else
|
||||
|
||||
|
||||
static void THNN_(SpatialConvolutionLocal_updateOutput_frame)(THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *finput,
|
||||
int kW, int kH, int dW, int dH, int padW, int padH,
|
||||
long nInputPlane, long inputWidth, long inputHeight,
|
||||
long nOutputPlane, long outputWidth, long outputHeight)
|
||||
{
|
||||
long i;
|
||||
THTensor *output3d, *finput3d;
|
||||
|
||||
THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight);
|
||||
|
||||
THTensor_(copy)(output, bias);
|
||||
|
||||
output3d = THTensor_(newWithStorage3d)(output->storage, output->storageOffset,
|
||||
outputHeight*outputWidth, 1,
|
||||
nOutputPlane, outputHeight*outputWidth,
|
||||
1, nOutputPlane*outputHeight*outputWidth);
|
||||
|
||||
finput3d = THTensor_(newWithStorage3d)(finput->storage, finput->storageOffset,
|
||||
outputHeight*outputWidth, 1,
|
||||
kW*kH*nInputPlane, outputHeight*outputWidth,
|
||||
1, kW*kH*nInputPlane*outputHeight*outputWidth);
|
||||
// weight: oH*oW x nOutputPlane x nInputPlane*kH*kW
|
||||
// finput3d: oH*oW x nInputPlane*kH*kW x 1
|
||||
THTensor_(baddbmm)(output3d, 1.0, output3d, 1.0, weight, finput3d);
|
||||
// output3d: oH*oW x nOutputPlane x 1
|
||||
|
||||
THTensor_(free)(output3d);
|
||||
THTensor_(free)(finput3d);
|
||||
}
|
||||
|
||||
void THNN_(SpatialConvolutionLocal_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput,
|
||||
int kW, int kH,
|
||||
int dW, int dH,
|
||||
int padW, int padH,
|
||||
long inputWidth, long inputHeight,
|
||||
long outputWidth, long outputHeight)
|
||||
{
|
||||
long nInputPlane = THTensor_(size)(weight,2)/(kW*kH);
|
||||
long nOutputPlane = THTensor_(size)(weight,1);
|
||||
|
||||
if(input->nDimension == 3)
|
||||
{
|
||||
THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth);
|
||||
THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
|
||||
|
||||
THNN_(SpatialConvolutionLocal_updateOutput_frame)(input, output, weight, bias, finput,
|
||||
kW, kH, dW, dH, padW, padH,
|
||||
nInputPlane, inputWidth, inputHeight,
|
||||
nOutputPlane, outputWidth, outputHeight);
|
||||
}
|
||||
else
|
||||
{
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth);
|
||||
THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth);
|
||||
|
||||
#pragma omp parallel for private(t)
|
||||
for(t = 0; t < T; t++)
|
||||
{
|
||||
THTensor *input_t = THTensor_(newSelect)(input, 0, t);
|
||||
THTensor *output_t = THTensor_(newSelect)(output, 0, t);
|
||||
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
|
||||
|
||||
THNN_(SpatialConvolutionLocal_updateOutput_frame)(input_t, output_t, weight, bias, finput_t,
|
||||
kW, kH, dW, dH, padW, padH,
|
||||
nInputPlane, inputWidth, inputHeight,
|
||||
nOutputPlane, outputWidth, outputHeight);
|
||||
|
||||
THTensor_(free)(input_t);
|
||||
THTensor_(free)(output_t);
|
||||
THTensor_(free)(finput_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void THNN_(SpatialConvolutionLocal_updateGradInput_frame)(THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput,
|
||||
int kW, int kH, int dW, int dH, int padW, int padH,
|
||||
long nInputPlane, long inputWidth, long inputHeight,
|
||||
long nOutputPlane, long outputWidth, long outputHeight)
|
||||
{
|
||||
THTensor *gradOutput3d, *fgradInput3d;
|
||||
gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset,
|
||||
outputHeight*outputWidth, 1,
|
||||
nOutputPlane, outputHeight*outputWidth,
|
||||
1, nOutputPlane*outputHeight*outputWidth);
|
||||
fgradInput3d = THTensor_(newWithStorage3d)(fgradInput->storage, fgradInput->storageOffset,
|
||||
outputHeight*outputWidth, 1,
|
||||
kW*kH*nInputPlane, outputHeight*outputWidth,
|
||||
1, kW*kH*nInputPlane*outputHeight*outputWidth);
|
||||
// weight: oH*oW x nInputPlane*kH*kW x nOutputPlane
|
||||
// gradOutput3d: oH*oW x nOutputPlane x 1
|
||||
THTensor_(baddbmm)(fgradInput3d, 0.0, fgradInput3d, 1.0, weight, gradOutput3d);
|
||||
// fgradInput3d: oH*oW x nInputPlane*kH*kW x 1
|
||||
|
||||
THTensor_(free)(gradOutput3d);
|
||||
THTensor_(free)(fgradInput3d);
|
||||
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH,
|
||||
nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight);
|
||||
}
|
||||
|
||||
void THNN_(SpatialConvolutionLocal_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput,
|
||||
int kW, int kH,
|
||||
int dW, int dH,
|
||||
int padW, int padH,
|
||||
long inputWidth, long inputHeight,
|
||||
long outputWidth, long outputHeight)
|
||||
{
|
||||
long nInputPlane = THTensor_(size)(weight,2)/(kW*kH);
|
||||
long nOutputPlane = THTensor_(size)(weight,1);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(resizeAs)(fgradInput, finput);
|
||||
THTensor_(transpose)(weight, weight, 1, 2);
|
||||
|
||||
if(input->nDimension == 3)
|
||||
{
|
||||
THNN_(SpatialConvolutionLocal_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH,
|
||||
nInputPlane, inputWidth, inputHeight,
|
||||
nOutputPlane, outputWidth, outputHeight);
|
||||
}
|
||||
else
|
||||
{
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
#pragma omp parallel for private(t)
|
||||
for(t = 0; t < T; t++)
|
||||
{
|
||||
THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
|
||||
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
|
||||
THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
|
||||
|
||||
THNN_(SpatialConvolutionLocal_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH,
|
||||
nInputPlane, inputWidth, inputHeight,
|
||||
nOutputPlane, outputWidth, outputHeight);
|
||||
|
||||
THTensor_(free)(gradInput_t);
|
||||
THTensor_(free)(gradOutput_t);
|
||||
THTensor_(free)(fgradInput_t);
|
||||
}
|
||||
}
|
||||
|
||||
THTensor_(transpose)(weight, weight, 1, 2);
|
||||
}
|
||||
|
||||
static void THNN_(SpatialConvolutionLocal_accGradParameters_frame)(THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, real scale,
|
||||
int kW, int kH, int dW, int dH, int padW, int padH,
|
||||
long nInputPlane, long inputWidth, long inputHeight,
|
||||
long nOutputPlane, long outputWidth, long outputHeight)
|
||||
{
|
||||
|
||||
THTensor *gradOutput3d, *finput3d;
|
||||
gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset,
|
||||
outputHeight*outputWidth, 1,
|
||||
nOutputPlane, outputHeight*outputWidth,
|
||||
1, nOutputPlane*outputHeight*outputWidth);
|
||||
finput3d = THTensor_(newWithStorage3d)(finput->storage, finput->storageOffset,
|
||||
outputHeight*outputWidth, 1,
|
||||
1, kW*kH*nInputPlane*outputHeight*outputWidth,
|
||||
kW*kH*nInputPlane, outputHeight*outputWidth);
|
||||
// gradOutput3d: oH*oW x nOutputPlane x 1
|
||||
// finput3d: oH*oW x 1 x kW*kH*nInputPlane
|
||||
THTensor_(baddbmm)(gradWeight, 1.0, gradWeight, scale, gradOutput3d, finput3d);
|
||||
// gradWeight: oH*oW x nOutputPlane x kW*kH*nInputPlane
|
||||
|
||||
THTensor_(cadd)(gradBias, gradBias, scale, gradOutput);
|
||||
|
||||
THTensor_(free)(gradOutput3d);
|
||||
THTensor_(free)(finput3d);
|
||||
}
|
||||
|
||||
void THNN_(SpatialConvolutionLocal_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput,
|
||||
int kW, int kH,
|
||||
int dW, int dH,
|
||||
int padW, int padH,
|
||||
long inputWidth, long inputHeight,
|
||||
long outputWidth, long outputHeight,
|
||||
real scale)
|
||||
{
|
||||
long nInputPlane = THTensor_(size)(gradWeight,2)/(kW*kH);
|
||||
long nOutputPlane = THTensor_(size)(gradWeight,1);
|
||||
|
||||
if(input->nDimension == 3)
|
||||
{
|
||||
THNN_(SpatialConvolutionLocal_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale, kW, kH, dW, dH, padW, padH,
|
||||
nInputPlane, inputWidth, inputHeight,
|
||||
nOutputPlane, outputWidth, outputHeight);
|
||||
}
|
||||
else
|
||||
{
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
for(t = 0; t < T; t++)
|
||||
{
|
||||
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
|
||||
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
|
||||
|
||||
THNN_(SpatialConvolutionLocal_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale, kW, kH, dW, dH, padW, padH,
|
||||
nInputPlane, inputWidth, inputHeight,
|
||||
nOutputPlane, outputWidth, outputHeight);
|
||||
|
||||
THTensor_(free)(gradOutput_t);
|
||||
THTensor_(free)(finput_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
284
torch/lib/THNN/generic/SpatialConvolutionMM.c
Normal file
284
torch/lib/THNN/generic/SpatialConvolutionMM.c
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialConvolutionMM.c"
|
||||
#else
|
||||
|
||||
static void THNN_(SpatialConvolutionMM_updateOutput_frame)(
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *finput,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH,
|
||||
long nInputPlane,
|
||||
long inputWidth,
|
||||
long inputHeight,
|
||||
long nOutputPlane,
|
||||
long outputWidth,
|
||||
long outputHeight)
|
||||
{
|
||||
long i;
|
||||
THTensor *output2d;
|
||||
|
||||
THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight);
|
||||
|
||||
output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset,
|
||||
nOutputPlane, -1,
|
||||
outputHeight*outputWidth, -1);
|
||||
if (bias) {
|
||||
for(i = 0; i < nOutputPlane; i++)
|
||||
THVector_(fill)(output->storage->data+output->storageOffset+output->stride[0]*i, THTensor_(get1d)(bias, i), outputHeight*outputWidth);
|
||||
} else {
|
||||
THTensor_(zero)(output);
|
||||
}
|
||||
|
||||
THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput);
|
||||
|
||||
THTensor_(free)(output2d);
|
||||
}
|
||||
|
||||
void THNN_(SpatialConvolutionMM_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH)
|
||||
{
|
||||
int dimf = 0;
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
|
||||
long nInputPlane;
|
||||
long inputWidth;
|
||||
long inputHeight;
|
||||
long nOutputPlane;
|
||||
long outputWidth;
|
||||
long outputHeight;
|
||||
|
||||
THArgCheck( input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor expected");
|
||||
THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
|
||||
THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");
|
||||
|
||||
if (input->nDimension == 4) {
|
||||
dimf++;
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
nInputPlane = input->size[dimf];
|
||||
inputWidth = input->size[dimw];
|
||||
inputHeight = input->size[dimh];
|
||||
nOutputPlane = weight->size[0];
|
||||
outputWidth = (inputWidth + 2*padW - kW) / dW + 1;
|
||||
outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
|
||||
|
||||
if (outputWidth < 1 || outputHeight < 1)
|
||||
THError("Given input size: (%dx%dx%d). Calculated output size: (%dx%dx%d). Output size is too small",
|
||||
nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);
|
||||
|
||||
if (nInputPlane*kW*kH != weight->size[1])
|
||||
THError("Wrong number of input channels! Input has %d channels, expected %d",nInputPlane,weight->size[1]/(kW*kH));
|
||||
|
||||
if(input->nDimension == 3)
|
||||
{
|
||||
THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth);
|
||||
THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
|
||||
|
||||
THNN_(SpatialConvolutionMM_updateOutput_frame)(input, output, weight, bias, finput,
|
||||
kW, kH, dW, dH, padW, padH,
|
||||
nInputPlane, inputWidth, inputHeight,
|
||||
nOutputPlane, outputWidth, outputHeight);
|
||||
}
|
||||
else
|
||||
{
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth);
|
||||
THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth);
|
||||
|
||||
#pragma omp parallel for private(t)
|
||||
for(t = 0; t < T; t++)
|
||||
{
|
||||
THTensor *input_t = THTensor_(newSelect)(input, 0, t);
|
||||
THTensor *output_t = THTensor_(newSelect)(output, 0, t);
|
||||
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
|
||||
|
||||
THNN_(SpatialConvolutionMM_updateOutput_frame)(input_t, output_t, weight, bias, finput_t,
|
||||
kW, kH, dW, dH, padW, padH,
|
||||
nInputPlane, inputWidth, inputHeight,
|
||||
nOutputPlane, outputWidth, outputHeight);
|
||||
|
||||
THTensor_(free)(input_t);
|
||||
THTensor_(free)(output_t);
|
||||
THTensor_(free)(finput_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void THNN_(SpatialConvolutionMM_updateGradInput_frame)(
|
||||
THTensor *gradInput,
|
||||
THTensor *gradOutput,
|
||||
THTensor *weight,
|
||||
THTensor *fgradInput,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH)
|
||||
{
|
||||
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset,
|
||||
gradOutput->size[0], -1,
|
||||
gradOutput->size[1]*gradOutput->size[2], -1);
|
||||
THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
|
||||
THTensor_(free)(gradOutput2d);
|
||||
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH, gradInput->size[0], gradInput->size[2], gradInput->size[1], gradOutput->size[2], gradOutput->size[1]);
|
||||
}
|
||||
|
||||
void THNN_(SpatialConvolutionMM_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH)
|
||||
{
|
||||
long nOutputPlane = weight->size[0];
|
||||
|
||||
THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
|
||||
THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero");
|
||||
THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero");
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(resizeAs)(fgradInput, finput);
|
||||
// depending on the BLAS library, fgradInput (result tensor) might
|
||||
// be left uninitialized on zero alpha, which might lead to weird behavior
|
||||
// hence, to be safe, zero it
|
||||
THTensor_(zero)(fgradInput);
|
||||
THTensor_(transpose)(weight, weight, 0, 1);
|
||||
|
||||
if(input->nDimension == 3)
|
||||
{
|
||||
THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH);
|
||||
}
|
||||
else
|
||||
{
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
#pragma omp parallel for private(t)
|
||||
for(t = 0; t < T; t++)
|
||||
{
|
||||
THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
|
||||
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
|
||||
THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
|
||||
|
||||
THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH);
|
||||
|
||||
THTensor_(free)(gradInput_t);
|
||||
THTensor_(free)(gradOutput_t);
|
||||
THTensor_(free)(fgradInput_t);
|
||||
}
|
||||
}
|
||||
|
||||
THTensor_(transpose)(weight, weight, 0, 1);
|
||||
}
|
||||
|
||||
static void THNN_(SpatialConvolutionMM_accGradParameters_frame)(
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *finput,
|
||||
real scale)
|
||||
{
|
||||
long i;
|
||||
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset,
|
||||
gradOutput->size[0], -1,
|
||||
gradOutput->size[1]*gradOutput->size[2], -1);
|
||||
|
||||
THTensor_(transpose)(finput, finput, 0, 1);
|
||||
THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput);
|
||||
THTensor_(transpose)(finput, finput, 0, 1);
|
||||
|
||||
if (gradBias) {
|
||||
for(i = 0; i < gradBias->size[0]; i++)
|
||||
{
|
||||
long k;
|
||||
real sum = 0;
|
||||
real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0];
|
||||
for(k = 0; k < gradOutput2d->size[1]; k++)
|
||||
sum += data[k];
|
||||
(gradBias->storage->data + gradBias->storageOffset)[i] += scale*sum;
|
||||
}
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOutput2d);
|
||||
}
|
||||
|
||||
void THNN_(SpatialConvolutionMM_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH,
|
||||
real scale)
|
||||
{
|
||||
long nOutputPlane = gradWeight->size[0];
|
||||
THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
|
||||
THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
|
||||
THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");
|
||||
|
||||
if(input->nDimension == 3)
|
||||
{
|
||||
THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
|
||||
}
|
||||
else
|
||||
{
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
for(t = 0; t < T; t++)
|
||||
{
|
||||
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
|
||||
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
|
||||
|
||||
THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);
|
||||
|
||||
THTensor_(free)(gradOutput_t);
|
||||
THTensor_(free)(finput_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
259
torch/lib/THNN/generic/SpatialConvolutionMap.c
Normal file
259
torch/lib/THNN/generic/SpatialConvolutionMap.c
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialConvolutionMap.c"
|
||||
#else
|
||||
|
||||
void THNN_(SpatialConvolutionMap_updateOutput)(
|
||||
THNNState *state, THTensor *input, THTensor *output, THTensor *weight, THTensor *bias,
|
||||
THTensor *connTable, int nInputPlane, int nOutputPlane,
|
||||
int dW, int dH)
|
||||
{
|
||||
THArgCheck(
|
||||
weight != NULL && weight->nDimension == 3
|
||||
&& connTable != NULL && connTable->size[0] == weight->size[0], 4,
|
||||
"3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
|
||||
);
|
||||
|
||||
real *weight_data = THTensor_(data)(weight);
|
||||
real *bias_data = THTensor_(data)(bias);
|
||||
real *connTable_data = THTensor_(data)(connTable);
|
||||
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
int dimc = 0;
|
||||
long nbatch = 1;
|
||||
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
|
||||
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimc++;
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
const long kH = weight->size[1];
|
||||
const long kW = weight->size[2];
|
||||
|
||||
THArgCheck(input->size[dimc] >= nInputPlane, 2, "invalid number of input planes");
|
||||
THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH, 2, "input image smaller than kernel size");
|
||||
|
||||
const long input_w = input->size[dimw];
|
||||
const long input_h = input->size[dimh];
|
||||
const long output_w = (input_w - kW) / dW + 1;
|
||||
const long output_h = (input_h - kH) / dH + 1;
|
||||
|
||||
if (input->nDimension == 3)
|
||||
THTensor_(resize3d)(output, nOutputPlane, output_h, output_w);
|
||||
else
|
||||
THTensor_(resize4d)(output, input->size[0], nOutputPlane, output_h, output_w);
|
||||
|
||||
/* contiguous */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
output = THTensor_(newContiguous)(output);
|
||||
|
||||
/* get raw pointers */
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *output_data = THTensor_(data)(output);
|
||||
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nOutputPlane; p++)
|
||||
{
|
||||
long m;
|
||||
for (m = 0; m < nbatch; m++)
|
||||
{
|
||||
/* add bias */
|
||||
real *ptr_output = output_data + p*output_w*output_h + m*nOutputPlane*output_w*output_h;
|
||||
long j, k;
|
||||
real z= bias_data[p];
|
||||
for (j = 0; j < output_h*output_w; j++)
|
||||
ptr_output[j] = z;
|
||||
|
||||
/* convolve all maps */
|
||||
int nweight = connTable->size[0];
|
||||
for (k = 0; k < nweight; k++)
|
||||
{
|
||||
/* get offsets for input/output */
|
||||
int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
|
||||
int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
|
||||
|
||||
if (o == p)
|
||||
{
|
||||
THTensor_(validXCorr2Dptr)(
|
||||
output_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h,
|
||||
1.0,
|
||||
input_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, input_h, input_w,
|
||||
weight_data + k*kW*kH,
|
||||
kH, kW,
|
||||
dH, dW
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
THTensor_(free)(input);
|
||||
THTensor_(free)(output);
|
||||
}
|
||||
|
||||
void THNN_(SpatialConvolutionMap_updateGradInput)(
|
||||
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *weight, THTensor *bias,
|
||||
THTensor *connTable, int nInputPlane, int nOutputPlane,
|
||||
int dW, int dH)
|
||||
{
|
||||
THArgCheck(
|
||||
weight != NULL && weight->nDimension == 3
|
||||
&& connTable != NULL && connTable->size[0] == weight->size[0], 5,
|
||||
"3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
|
||||
);
|
||||
|
||||
real *weight_data = THTensor_(data)(weight);
|
||||
real *connTable_data = THTensor_(data)(connTable);
|
||||
|
||||
/* and dims */
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
long nbatch = 1;
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
const long input_h = input->size[dimh];
|
||||
const long input_w = input->size[dimw];
|
||||
const long output_h = gradOutput->size[dimh];
|
||||
const long output_w = gradOutput->size[dimw];
|
||||
const long kH = weight->size[1];
|
||||
const long kW = weight->size[2];
|
||||
|
||||
/* contiguous */
|
||||
gradInput = THTensor_(newContiguous)(gradInput);
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* Resize/Zero */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
/* get raw pointers */
|
||||
real *gradInput_data = THTensor_(data)(gradInput);
|
||||
real *gradOutput_data = THTensor_(data)(gradOutput);
|
||||
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nInputPlane; p++)
|
||||
{
|
||||
long m;
|
||||
for (m = 0; m < nbatch; m++)
|
||||
{
|
||||
long k;
|
||||
/* backward all */
|
||||
int nkernel = connTable->size[0];
|
||||
for (k = 0; k < nkernel; k++)
|
||||
{
|
||||
int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
|
||||
int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
|
||||
if (i == p)
|
||||
{
|
||||
/* gradient to input */
|
||||
THTensor_(fullConv2Dptr)(
|
||||
gradInput_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, 1.0,
|
||||
gradOutput_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h, output_h, output_w,
|
||||
weight_data + k*kW*kH, kH, kW, dH, dW
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
THTensor_(free)(gradInput);
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
void THNN_(SpatialConvolutionMap_accGradParameters)(
|
||||
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias,
|
||||
THTensor *connTable, int nInputPlane, int nOutputPlane,
|
||||
int dW, int dH, real scale)
|
||||
{
|
||||
THArgCheck(
|
||||
gradWeight != NULL && gradWeight->nDimension == 3
|
||||
&& connTable != NULL && connTable->size[0] == gradWeight->size[0], 5,
|
||||
"3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
|
||||
);
|
||||
|
||||
real *gradWeight_data = THTensor_(data)(gradWeight);
|
||||
real *gradBias_data = THTensor_(data)(gradBias);
|
||||
|
||||
/* and dims */
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
long nbatch = 1;
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
const long input_h = input->size[dimh];
|
||||
const long input_w = input->size[dimw];
|
||||
const long output_h = gradOutput->size[dimh];
|
||||
const long output_w = gradOutput->size[dimw];
|
||||
const long kH = gradWeight->size[1];
|
||||
const long kW = gradWeight->size[2];
|
||||
|
||||
/* contiguous */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* get raw pointers */
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *gradOutput_data = THTensor_(data)(gradOutput);
|
||||
|
||||
long k;
|
||||
/* gradients wrt bias */
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nOutputPlane; k++)
|
||||
{
|
||||
long m;
|
||||
for (m = 0; m < nbatch; m++)
|
||||
{
|
||||
real *ptr_gradOutput = gradOutput_data + k*output_w*output_h + m*nOutputPlane*output_w*output_h;
|
||||
long l;
|
||||
for (l = 0; l < output_h*output_w; l++)
|
||||
gradBias_data[k] += scale*ptr_gradOutput[l];
|
||||
}
|
||||
}
|
||||
|
||||
/* gradients wrt weight */
|
||||
const int nkernel = connTable->size[0];
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nkernel; k++)
|
||||
{
|
||||
long m;
|
||||
for (m = 0; m < nbatch; m++)
|
||||
{
|
||||
int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE;
|
||||
int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE;
|
||||
|
||||
/* gradient to kernel */
|
||||
THTensor_(validXCorr2DRevptr)(
|
||||
gradWeight_data + k*kW*kH,
|
||||
scale,
|
||||
input_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, input_h, input_w,
|
||||
gradOutput_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h , output_h, output_w,
|
||||
dH, dW
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
THTensor_(free)(input);
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
339
torch/lib/THNN/generic/SpatialDilatedConvolution.c
Normal file
339
torch/lib/THNN/generic/SpatialDilatedConvolution.c
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialDilatedConvolution.c"
|
||||
#else
|
||||
|
||||
void THNN_(SpatialDilatedConvolution_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *columns,
|
||||
THTensor *ones,
|
||||
int kW, int kH,
|
||||
int dW, int dH,
|
||||
int padW, int padH,
|
||||
int dilationW, int dilationH)
|
||||
{
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
|
||||
THArgCheck(weight->nDimension == 4, 4, "weight tensor must be 4D (nOutputPlane,nInputPlane,kH,kW)");
|
||||
THArgCheck(!bias || weight->size[0] == bias->size[0], 4, "nOutputPlane mismatch in weight and bias");
|
||||
THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
|
||||
THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");
|
||||
|
||||
// Params:
|
||||
int nInputPlane = weight->size[1];
|
||||
int nOutputPlane = weight->size[0];
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 3) {
|
||||
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
|
||||
} else {
|
||||
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
|
||||
}
|
||||
|
||||
long inputWidth = input->size[3];
|
||||
long inputHeight = input->size[2];
|
||||
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
|
||||
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
|
||||
|
||||
if (outputWidth < 1 || outputHeight < 1)
|
||||
THError("Given input size: (%dx%dx%d). Calculated output size: (%dx%dx%d). Output size is too small",
|
||||
nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);
|
||||
|
||||
// Batch size + input planes
|
||||
long batchSize = input->size[0];
|
||||
|
||||
// Resize output
|
||||
THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);
|
||||
THTensor_(zero)(output);
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(columns, nInputPlane*kW*kH, outputHeight*outputWidth);
|
||||
|
||||
// Define a buffer of ones, for bias accumulation
|
||||
// Note: this buffer can be shared with other modules, it only ever gets increased,
|
||||
// and always contains ones.
|
||||
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
|
||||
// Resize plane and fill with ones...
|
||||
THTensor_(resize2d)(ones, outputHeight, outputWidth);
|
||||
THTensor_(fill)(ones, 1);
|
||||
}
|
||||
|
||||
// Helpers
|
||||
THTensor *input_n = THTensor_(new)();
|
||||
THTensor *output_n = THTensor_(new)();
|
||||
|
||||
// For each elt in batch, do:
|
||||
for (int elt = 0; elt < batchSize; elt ++) {
|
||||
// Matrix mulitply per output:
|
||||
THTensor_(select)(input_n, input, 0, elt);
|
||||
THTensor_(select)(output_n, output, 0, elt);
|
||||
|
||||
// Do Bias first:
|
||||
// M,N,K are dims of matrix A and B
|
||||
long m_ = nOutputPlane;
|
||||
long n_ = outputHeight * outputWidth;
|
||||
long k_ = 1;
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
if (bias) {
|
||||
THBlas_(gemm)(
|
||||
't', 'n',
|
||||
n_, m_, k_,
|
||||
1,
|
||||
THTensor_(data)(ones), k_,
|
||||
THTensor_(data)(bias), k_,
|
||||
0,
|
||||
THTensor_(data)(output_n), n_
|
||||
);
|
||||
} else {
|
||||
THTensor_(zero)(output_n);
|
||||
}
|
||||
|
||||
// Extract columns:
|
||||
THNN_(im2col)(
|
||||
THTensor_(data)(input_n),
|
||||
nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
|
||||
dilationH, dilationW,
|
||||
THTensor_(data)(columns)
|
||||
);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
long m = nOutputPlane;
|
||||
long n = columns->size[1];
|
||||
long k = nInputPlane*kH*kW;
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
'n', 'n',
|
||||
n, m, k,
|
||||
1,
|
||||
THTensor_(data)(columns), n,
|
||||
THTensor_(data)(weight), k,
|
||||
1,
|
||||
THTensor_(data)(output_n), n
|
||||
);
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(input_n);
|
||||
THTensor_(free)(output_n);
|
||||
|
||||
// Resize output
|
||||
if (batch == 0) {
|
||||
THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
|
||||
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialDilatedConvolution_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
THTensor *gradColumns,
|
||||
int kW, int kH,
|
||||
int dW, int dH,
|
||||
int padW, int padH,
|
||||
int dilationW, int dilationH)
|
||||
{
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
|
||||
THArgCheck(weight->nDimension == 4, 4, "weight tensor must be 4D (nOutputPlane,nInputPlane,kH,kW)");
|
||||
THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero");
|
||||
THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero");
|
||||
|
||||
// Params
|
||||
int nInputPlane = weight->size[1];
|
||||
int nOutputPlane = weight->size[0];
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 3) {
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
|
||||
THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
|
||||
}
|
||||
|
||||
long inputWidth = input->size[3];
|
||||
long inputHeight = input->size[2];
|
||||
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
|
||||
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
|
||||
|
||||
// Batch size + input planes
|
||||
long batchSize = input->size[0];
|
||||
|
||||
// Resize output
|
||||
THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth);
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(gradColumns, nInputPlane*kW*kH, outputHeight*outputWidth);
|
||||
THTensor_(zero)(gradColumns);
|
||||
|
||||
// Helpers
|
||||
THTensor *gradInput_n = THTensor_(new)();
|
||||
THTensor *gradOutput_n = THTensor_(new)();
|
||||
|
||||
// For each elt in batch, do:
|
||||
for (int elt = 0; elt < batchSize; elt ++) {
|
||||
// Matrix mulitply per sample:
|
||||
THTensor_(select)(gradInput_n, gradInput, 0, elt);
|
||||
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
long m = nInputPlane*kW*kH;
|
||||
long n = gradColumns->size[1];
|
||||
long k = nOutputPlane;
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
'n', 't',
|
||||
n, m, k,
|
||||
1,
|
||||
THTensor_(data)(gradOutput_n), n,
|
||||
THTensor_(data)(weight), m,
|
||||
0,
|
||||
THTensor_(data)(gradColumns), n
|
||||
);
|
||||
|
||||
// Unpack columns back into input:
|
||||
THNN_(col2im)(
|
||||
THTensor_(data)(gradColumns),
|
||||
nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
|
||||
dilationH, dilationW,
|
||||
THTensor_(data)(gradInput_n)
|
||||
);
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(gradInput_n);
|
||||
THTensor_(free)(gradOutput_n);
|
||||
|
||||
// Resize output
|
||||
if (batch == 0) {
|
||||
THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
|
||||
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
|
||||
THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void THNN_(SpatialDilatedConvolution_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *columns,
|
||||
THTensor *ones,
|
||||
int kW, int kH,
|
||||
int dW, int dH,
|
||||
int padW, int padH,
|
||||
int dilationW, int dilationH,
|
||||
real scale)
|
||||
{
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
|
||||
THArgCheck(gradWeight->nDimension == 4, 4, "gradWeight tensor must be 4D (nOutputPlane,nInputPlane,kH,kW)");
|
||||
THArgCheck(!gradBias || gradWeight->size[0] == gradBias->size[0], 4, "nOutputPlane mismatch in gradWeight and gradBias");
|
||||
THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
|
||||
THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");
|
||||
|
||||
// Params
|
||||
int nInputPlane = gradWeight->size[1];
|
||||
int nOutputPlane = gradWeight->size[0];
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 3) {
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
|
||||
THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
|
||||
}
|
||||
|
||||
long inputWidth = input->size[3];
|
||||
long inputHeight = input->size[2];
|
||||
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
|
||||
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
|
||||
|
||||
// Batch size + input planes
|
||||
long batchSize = input->size[0];
|
||||
|
||||
// Define a buffer of ones, for bias accumulation
|
||||
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
|
||||
// Resize plane and fill with ones...
|
||||
THTensor_(resize2d)(ones, outputHeight, outputWidth);
|
||||
THTensor_(fill)(ones, 1);
|
||||
}
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(columns, nInputPlane*kW*kH, outputHeight*outputWidth);
|
||||
|
||||
// Helpers
|
||||
THTensor *input_n = THTensor_(new)();
|
||||
THTensor *gradOutput_n = THTensor_(new)();
|
||||
|
||||
// For each elt in batch, do:
|
||||
for (int elt = 0; elt < batchSize; elt ++) {
|
||||
// Matrix mulitply per output:
|
||||
THTensor_(select)(input_n, input, 0, elt);
|
||||
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
|
||||
|
||||
// Extract columns:
|
||||
THNN_(im2col)(
|
||||
THTensor_(data)(input_n),
|
||||
nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
|
||||
dilationH, dilationW,
|
||||
THTensor_(data)(columns)
|
||||
);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
long m = nOutputPlane;
|
||||
long n = nInputPlane*kW*kH;
|
||||
long k = columns->size[1];
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
't', 'n',
|
||||
n, m, k,
|
||||
scale,
|
||||
THTensor_(data)(columns), k,
|
||||
THTensor_(data)(gradOutput_n), k,
|
||||
1,
|
||||
THTensor_(data)(gradWeight), n
|
||||
);
|
||||
|
||||
// Do Bias:
|
||||
// M,N,K are dims of matrix A and B
|
||||
long m_ = nOutputPlane;
|
||||
long k_ = outputHeight * outputWidth;
|
||||
|
||||
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
|
||||
if (gradBias) {
|
||||
THBlas_(gemv)(
|
||||
't',
|
||||
k_, m_,
|
||||
scale,
|
||||
THTensor_(data)(gradOutput_n), k_,
|
||||
THTensor_(data)(ones), 1,
|
||||
1,
|
||||
THTensor_(data)(gradBias), 1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(input_n);
|
||||
THTensor_(free)(gradOutput_n);
|
||||
|
||||
// Resize
|
||||
if (batch == 0) {
|
||||
THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
|
||||
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
251
torch/lib/THNN/generic/SpatialFractionalMaxPooling.c
Normal file
251
torch/lib/THNN/generic/SpatialFractionalMaxPooling.c
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialFractionalMaxPooling.c"
|
||||
#else
|
||||
|
||||
static long* THNN_(SpatialFractionalMaxPooling_generateIntervals)(
|
||||
real sample,
|
||||
long inputSize,
|
||||
long outputSize,
|
||||
int poolSize) {
|
||||
real alpha = (real) (inputSize - poolSize) / (real) (outputSize - 1);
|
||||
long* sequence = (long*) THAlloc(sizeof(long) * outputSize);
|
||||
|
||||
long i;
|
||||
for (i = 0; i < outputSize - 1; ++i) {
|
||||
sequence[i] =
|
||||
(long) ((i + sample) * alpha) - (long) (sample * alpha);
|
||||
}
|
||||
sequence[outputSize - 1] = inputSize - poolSize;
|
||||
|
||||
return sequence;
|
||||
}
|
||||
|
||||
static void THNN_(SpatialFractionalMaxPooling_updateOutput_frame)(
|
||||
real* input,
|
||||
real* output,
|
||||
real* indices,
|
||||
real* randomSamples,
|
||||
long numPlanes,
|
||||
long inputW, long inputH,
|
||||
long outputW, long outputH,
|
||||
int poolSizeW, int poolSizeH) {
|
||||
long plane;
|
||||
#pragma omp parallel for private(plane)
|
||||
for (plane = 0; plane < numPlanes; ++plane) {
|
||||
/* each plane contains 2 random samples, one for W and one for H */
|
||||
real* randomSamplesForPlane = randomSamples + plane * 2;
|
||||
|
||||
/* Generate interval sequence */
|
||||
long* sequenceW =
|
||||
THNN_(SpatialFractionalMaxPooling_generateIntervals)(
|
||||
randomSamplesForPlane[0], inputW, outputW, poolSizeW);
|
||||
long* sequenceH =
|
||||
THNN_(SpatialFractionalMaxPooling_generateIntervals)(
|
||||
randomSamplesForPlane[1], inputH, outputH, poolSizeH);
|
||||
|
||||
/* loop over output */
|
||||
long h, w;
|
||||
|
||||
real* inputForPlane = input + plane * inputW * inputH;
|
||||
real* outputForPlane = output + plane * outputW * outputH;
|
||||
real* indicesForPlane = indices + plane * outputW * outputH;
|
||||
|
||||
for (h = 0; h < outputH; ++h) {
|
||||
long inputHStart = sequenceH[h];
|
||||
|
||||
for (w = 0; w < outputW; ++w) {
|
||||
long inputWStart = sequenceW[w];
|
||||
|
||||
real maxVal = -THInf;
|
||||
long maxIndex = -1;
|
||||
|
||||
long h2, w2;
|
||||
for (h2 = inputHStart; h2 < inputHStart + poolSizeH; ++h2) {
|
||||
for (w2 = inputWStart; w2 < inputWStart + poolSizeW; ++w2) {
|
||||
THAssert(h2 >= 0 && h2 < inputH);
|
||||
THAssert(w2 >= 0 && w2 < inputW);
|
||||
|
||||
long planeIndex = h2 * inputW + w2;
|
||||
real val = inputForPlane[planeIndex];
|
||||
if (val > maxVal) {
|
||||
maxVal = val;
|
||||
maxIndex = planeIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
THAssert(maxVal != -THInf);
|
||||
THAssert(maxIndex != -1);
|
||||
|
||||
outputForPlane[h * outputW + w] = maxVal;
|
||||
/* +1 to lua index */
|
||||
indicesForPlane[h * outputW + w] = (real) maxIndex + TH_INDEX_BASE;
|
||||
}
|
||||
}
|
||||
|
||||
THFree(sequenceW);
|
||||
THFree(sequenceH);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialFractionalMaxPooling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int outputW, int outputH,
|
||||
int poolSizeW, int poolSizeH,
|
||||
THTensor *indices,
|
||||
THTensor *randomSamples) {
|
||||
|
||||
long numBatch = 1;
|
||||
int planeDim = 0;
|
||||
int heightDim = 1;
|
||||
int widthDim = 2;
|
||||
|
||||
long numInputDims = THTensor_(nDimension)(input);
|
||||
THArgCheck(numInputDims == 3 || numInputDims == 4, 2,
|
||||
"3D or 4D (batch mode) tensor expected");
|
||||
|
||||
if (numInputDims == 4) {
|
||||
numBatch = THTensor_(size)(input, 0);
|
||||
planeDim++;
|
||||
heightDim++;
|
||||
widthDim++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
long numPlanes = THTensor_(size)(input, planeDim);
|
||||
long inputH = THTensor_(size)(input, heightDim);
|
||||
long inputW = THTensor_(size)(input, widthDim);
|
||||
|
||||
THArgCheck(outputH + poolSizeH - 1 < inputH, 7,
|
||||
"poolSizeH too large relative to input height");
|
||||
THArgCheck(outputW + poolSizeW - 1 < inputW, 6,
|
||||
"poolSizeW too large relative to input width");
|
||||
|
||||
/* get contiguous input */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
|
||||
if (numInputDims == 3) {
|
||||
/* resize output */
|
||||
THTensor_(resize3d)(output, numPlanes, outputH, outputW);
|
||||
/* indices will contain the locations for each output point */
|
||||
THTensor_(resize3d)(indices, numPlanes, outputH, outputW);
|
||||
|
||||
THNN_(SpatialFractionalMaxPooling_updateOutput_frame)(
|
||||
THTensor_(data)(input),
|
||||
THTensor_(data)(output),
|
||||
THTensor_(data)(indices),
|
||||
THTensor_(data)(randomSamples),
|
||||
numPlanes, inputW, inputH, outputW, outputH, poolSizeW, poolSizeH);
|
||||
} else {
|
||||
THTensor_(resize4d)(output, numBatch, numPlanes, outputH, outputW);
|
||||
/* indices will contain the locations for each output point */
|
||||
THTensor_(resize4d)(indices, numBatch, numPlanes, outputH, outputW);
|
||||
|
||||
long batch;
|
||||
#pragma omp parallel for private(batch)
|
||||
for (batch = 0; batch < numBatch; ++batch) {
|
||||
THNN_(SpatialFractionalMaxPooling_updateOutput_frame)(
|
||||
THTensor_(data)(input) + batch * numPlanes * inputH * inputW,
|
||||
THTensor_(data)(output) + batch * numPlanes * outputH * outputW,
|
||||
THTensor_(data)(indices) + batch * numPlanes * outputH * outputW,
|
||||
THTensor_(data)(randomSamples) + batch * numPlanes * 2,
|
||||
numPlanes, inputW, inputH, outputW, outputH, poolSizeW, poolSizeH);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
static void THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)(
|
||||
real* gradInput,
|
||||
real* gradOutput,
|
||||
real* indices,
|
||||
long numPlanes,
|
||||
long inputW, long inputH,
|
||||
long outputW, long outputH) {
|
||||
long plane;
|
||||
#pragma omp parallel for private(plane)
|
||||
for (plane = 0; plane < numPlanes; plane++) {
|
||||
real* gradInputForPlane = gradInput + plane * inputW * inputH;
|
||||
real* gradOutputForPlane = gradOutput + plane * outputW * outputH;
|
||||
real* indicesForPlane = indices + plane * outputW * outputH;
|
||||
|
||||
long h, w;
|
||||
for (h = 0; h < outputH; ++h) {
|
||||
for (w = 0; w < outputW; ++w) {
|
||||
long outputIndex = h * outputW + w;
|
||||
long index = indicesForPlane[outputIndex] - TH_INDEX_BASE;
|
||||
THAssert(index >= 0 && index < inputW * inputH);
|
||||
|
||||
gradInputForPlane[index] += gradOutputForPlane[outputIndex];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialFractionalMaxPooling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int outputW, int outputH,
|
||||
int poolSizeW, int poolSizeH,
|
||||
THTensor *indices) {
|
||||
|
||||
long numBatch = 1;
|
||||
int planeDim = 0;
|
||||
int heightDim = 1;
|
||||
int widthDim = 2;
|
||||
|
||||
long numInputDims = THTensor_(nDimension)(input);
|
||||
if (numInputDims == 4) {
|
||||
numBatch = THTensor_(size)(input, 0);
|
||||
planeDim = 1;
|
||||
heightDim++;
|
||||
widthDim++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
long numPlanes = THTensor_(size)(input, planeDim);
|
||||
long inputH = THTensor_(size)(input, heightDim);
|
||||
long inputW = THTensor_(size)(input, widthDim);
|
||||
|
||||
THArgCheck(outputW == THTensor_(size)(gradOutput, widthDim), 3,
|
||||
"gradOutput width unexpected");
|
||||
THArgCheck(outputH == THTensor_(size)(gradOutput, heightDim), 3,
|
||||
"gradOutput height unexpected");
|
||||
|
||||
/* get contiguous gradOutput */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* resize */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
/* backprop */
|
||||
if (numInputDims == 3) {
|
||||
THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)(
|
||||
THTensor_(data)(gradInput),
|
||||
THTensor_(data)(gradOutput),
|
||||
THTensor_(data)(indices),
|
||||
numPlanes, inputW, inputH, outputW, outputH);
|
||||
} else {
|
||||
long batch;
|
||||
#pragma omp parallel for private(batch)
|
||||
for (batch = 0; batch < numBatch; ++batch) {
|
||||
THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)(
|
||||
THTensor_(data)(gradInput) + batch * numPlanes * inputH * inputW,
|
||||
THTensor_(data)(gradOutput) + batch * numPlanes * outputH * outputW,
|
||||
THTensor_(data)(indices) + batch * numPlanes * outputH * outputW,
|
||||
numPlanes, inputW, inputH, outputW, outputH);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
385
torch/lib/THNN/generic/SpatialFullConvolution.c
Normal file
385
torch/lib/THNN/generic/SpatialFullConvolution.c
Normal file
|
|
@ -0,0 +1,385 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialFullConvolution.c"
|
||||
#else
|
||||
|
||||
static void THNN_(im2col)(const real* data_im, const int channels,
|
||||
const int height, const int width, const int kernel_h, const int kernel_w,
|
||||
const int pad_h, const int pad_w,
|
||||
const int stride_h, const int stride_w,
|
||||
const int dilation_h, const int dilation_w,
|
||||
real* data_col) {
|
||||
const int height_col = (height + 2 * pad_h -
|
||||
(dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
|
||||
const int width_col = (width + 2 * pad_w -
|
||||
(dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
|
||||
const int channels_col = channels * kernel_h * kernel_w;
|
||||
for (int c_col = 0; c_col < channels_col; ++c_col) {
|
||||
int w_offset = c_col % kernel_w;
|
||||
int h_offset = (c_col / kernel_w) % kernel_h;
|
||||
int c_im = c_col / kernel_h / kernel_w;
|
||||
for (int h_col = 0; h_col < height_col; ++h_col) {
|
||||
for (int w_col = 0; w_col < width_col; ++w_col) {
|
||||
int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
|
||||
int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
|
||||
data_col[(c_col * height_col + h_col) * width_col + w_col] =
|
||||
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
|
||||
data_im[(c_im * height + h_im) * width + w_im] : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void THNN_(col2im)(const real* data_col, const int channels,
|
||||
const int height, const int width, const int kernel_h, const int kernel_w,
|
||||
const int pad_h, const int pad_w,
|
||||
const int stride_h, const int stride_w,
|
||||
const int dilation_h, const int dilation_w,
|
||||
real* data_im) {
|
||||
memset(data_im, 0, sizeof(real) * height * width * channels);
|
||||
const int height_col = (height + 2 * pad_h -
|
||||
(dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
|
||||
const int width_col = (width + 2 * pad_w -
|
||||
(dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
|
||||
const int channels_col = channels * kernel_h * kernel_w;
|
||||
for (int c_col = 0; c_col < channels_col; ++c_col) {
|
||||
int w_offset = c_col % kernel_w;
|
||||
int h_offset = (c_col / kernel_w) % kernel_h;
|
||||
int c_im = c_col / kernel_h / kernel_w;
|
||||
for (int h_col = 0; h_col < height_col; ++h_col) {
|
||||
for (int w_col = 0; w_col < width_col; ++w_col) {
|
||||
int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
|
||||
int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
|
||||
if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
|
||||
data_im[(c_im * height + h_im) * width + w_im] +=
|
||||
data_col[(c_col * height_col + h_col) * width_col + w_col];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialFullConvolution_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *columns,
|
||||
THTensor *ones,
|
||||
int kW, int kH,
|
||||
int dW, int dH,
|
||||
int padW, int padH,
|
||||
int adjW, int adjH)
|
||||
{
|
||||
int nInputPlane = THTensor_(size)(weight,0);
|
||||
int nOutputPlane = THTensor_(size)(weight,1);
|
||||
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 3) {
|
||||
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
|
||||
} else {
|
||||
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
|
||||
}
|
||||
|
||||
long inputWidth = input->size[3];
|
||||
long inputHeight = input->size[2];
|
||||
long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
|
||||
long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
|
||||
|
||||
// Batch size + input planes
|
||||
long batchSize = input->size[0];
|
||||
|
||||
// Resize output
|
||||
THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);
|
||||
THTensor_(zero)(columns);
|
||||
|
||||
// Define a buffer of ones, for bias accumulation
|
||||
// Note: this buffer can be shared with other modules, it only ever gets increased,
|
||||
// and always contains ones.
|
||||
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
|
||||
// Resize plane and fill with ones...
|
||||
THTensor_(resize2d)(ones, outputHeight, outputWidth);
|
||||
THTensor_(fill)(ones, 1);
|
||||
}
|
||||
|
||||
// Helpers
|
||||
THTensor *input_n = THTensor_(new)();
|
||||
THTensor *output_n = THTensor_(new)();
|
||||
|
||||
int elt;
|
||||
// For each elt in batch, do:
|
||||
for (elt = 0; elt < batchSize; elt ++) {
|
||||
// Matrix mulitply per output:
|
||||
THTensor_(select)(input_n, input, 0, elt);
|
||||
THTensor_(select)(output_n, output, 0, elt);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
|
||||
long m = weight->size[1] * weight->size[2] * weight->size[3];
|
||||
long n = columns->size[1];
|
||||
long k = weight->size[0];
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
'n', 't',
|
||||
n, m, k,
|
||||
1,
|
||||
THTensor_(data)(input_n), n,
|
||||
THTensor_(data)(weight), m,
|
||||
0,
|
||||
THTensor_(data)(columns), n
|
||||
);
|
||||
|
||||
// Unpack columns back into input:
|
||||
THNN_(col2im)(
|
||||
THTensor_(data)(columns),
|
||||
nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
|
||||
1, 1,
|
||||
THTensor_(data)(output_n)
|
||||
);
|
||||
|
||||
// Do Bias after:
|
||||
// M,N,K are dims of matrix A and B
|
||||
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
|
||||
long m_ = nOutputPlane;
|
||||
long n_ = outputHeight * outputWidth;
|
||||
long k_ = 1;
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
if (bias) {
|
||||
THBlas_(gemm)(
|
||||
't', 'n',
|
||||
n_, m_, k_,
|
||||
1,
|
||||
THTensor_(data)(ones), k_,
|
||||
THTensor_(data)(bias), k_,
|
||||
1,
|
||||
THTensor_(data)(output_n), n_
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(input_n);
|
||||
THTensor_(free)(output_n);
|
||||
|
||||
// Resize output
|
||||
if (batch == 0) {
|
||||
THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
|
||||
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialFullConvolution_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
THTensor *gradColumns,
|
||||
int kW, int kH,
|
||||
int dW, int dH,
|
||||
int padW, int padH,
|
||||
int adjW, int adjH)
|
||||
{
|
||||
int nInputPlane = THTensor_(size)(weight,0);
|
||||
int nOutputPlane = THTensor_(size)(weight,1);
|
||||
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 3) {
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
|
||||
THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
|
||||
}
|
||||
|
||||
long inputWidth = input->size[3];
|
||||
long inputHeight = input->size[2];
|
||||
long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
|
||||
long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
|
||||
|
||||
// Batch size + input planes
|
||||
long batchSize = input->size[0];
|
||||
|
||||
// Resize output
|
||||
THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth);
|
||||
|
||||
// Helpers
|
||||
THTensor *gradInput_n = THTensor_(new)();
|
||||
THTensor *gradOutput_n = THTensor_(new)();
|
||||
|
||||
int elt;
|
||||
// For each elt in batch, do:
|
||||
for (elt = 0; elt < batchSize; elt ++) {
|
||||
// Matrix mulitply per sample:
|
||||
THTensor_(select)(gradInput_n, gradInput, 0, elt);
|
||||
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
|
||||
|
||||
// Extract columns:
|
||||
THNN_(im2col)(
|
||||
THTensor_(data)(gradOutput_n),
|
||||
nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
|
||||
1, 1,
|
||||
THTensor_(data)(gradColumns)
|
||||
);
|
||||
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
|
||||
long m = weight->size[0];
|
||||
long n = gradColumns->size[1];
|
||||
long k = weight->size[1] * weight->size[2] * weight->size[3];
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
'n', 'n',
|
||||
n, m, k,
|
||||
1,
|
||||
THTensor_(data)(gradColumns), n,
|
||||
THTensor_(data)(weight), k,
|
||||
0,
|
||||
THTensor_(data)(gradInput_n), n
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// Free
|
||||
THTensor_(free)(gradInput_n);
|
||||
THTensor_(free)(gradOutput_n);
|
||||
|
||||
// Resize output
|
||||
if (batch == 0) {
|
||||
THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
|
||||
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
|
||||
THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void THNN_(SpatialFullConvolution_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *columns,
|
||||
THTensor *ones,
|
||||
int kW, int kH,
|
||||
int dW, int dH,
|
||||
int padW, int padH,
|
||||
int adjW, int adjH,
|
||||
real scale)
|
||||
{
|
||||
int nInputPlane = THTensor_(size)(gradWeight,0);
|
||||
int nOutputPlane = THTensor_(size)(gradWeight,1);
|
||||
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 3) {
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
|
||||
THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
|
||||
}
|
||||
|
||||
long inputWidth = input->size[3];
|
||||
long inputHeight = input->size[2];
|
||||
long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
|
||||
long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
|
||||
|
||||
// Batch size + input planes
|
||||
long batchSize = input->size[0];
|
||||
|
||||
// Define a buffer of ones, for bias accumulation
|
||||
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
|
||||
// Resize plane and fill with ones...
|
||||
THTensor_(resize2d)(ones, outputHeight, outputWidth);
|
||||
THTensor_(fill)(ones, 1);
|
||||
}
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);
|
||||
|
||||
// Helpers
|
||||
THTensor *input_n = THTensor_(new)();
|
||||
THTensor *gradOutput_n = THTensor_(new)();
|
||||
|
||||
int elt;
|
||||
// For each elt in batch, do:
|
||||
for (elt = 0; elt < batchSize; elt ++) {
|
||||
// Matrix mulitply per output:
|
||||
THTensor_(select)(input_n, input, 0, elt);
|
||||
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
|
||||
|
||||
// Extract columns:
|
||||
THNN_(im2col)(
|
||||
THTensor_(data)(gradOutput_n),
|
||||
nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
|
||||
1, 1,
|
||||
THTensor_(data)(columns)
|
||||
);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
|
||||
long n = columns->size[0]; // nOutputPlane * kh * kw
|
||||
long m = input_n->size[0]; // nInputPlane
|
||||
long k = columns->size[1]; // inputHeight * inputWidth
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
't', 'n',
|
||||
n, m, k,
|
||||
scale,
|
||||
THTensor_(data)(columns), k,
|
||||
THTensor_(data)(input_n), k,
|
||||
1,
|
||||
THTensor_(data)(gradWeight), n
|
||||
);
|
||||
|
||||
|
||||
// Do Bias:
|
||||
// M,N,K are dims of matrix A and B
|
||||
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
|
||||
long m_ = nOutputPlane;
|
||||
long k_ = outputHeight * outputWidth;
|
||||
|
||||
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
|
||||
if (gradBias) {
|
||||
THBlas_(gemv)(
|
||||
't',
|
||||
k_, m_,
|
||||
scale,
|
||||
THTensor_(data)(gradOutput_n), k_,
|
||||
THTensor_(data)(ones), 1,
|
||||
1,
|
||||
THTensor_(data)(gradBias), 1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(input_n);
|
||||
THTensor_(free)(gradOutput_n);
|
||||
|
||||
// Resize
|
||||
if (batch == 0) {
|
||||
THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
|
||||
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
212
torch/lib/THNN/generic/SpatialFullConvolutionMap.c
Normal file
212
torch/lib/THNN/generic/SpatialFullConvolutionMap.c
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialFullConvolutionMap.c"
|
||||
#else
|
||||
|
||||
void THNN_(SpatialFullConvolutionMap_updateOutput)(
|
||||
THNNState *state, THTensor *input, THTensor *output_, THTensor *weight, THTensor *bias,
|
||||
THTensor *connTable, int nInputPlane, int nOutputPlane,
|
||||
int dW, int dH)
|
||||
{
|
||||
THArgCheck(
|
||||
weight != NULL && weight->nDimension == 3
|
||||
&& connTable != NULL && connTable->size[0] == weight->size[0], 4,
|
||||
"3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
|
||||
);
|
||||
|
||||
const int kH = (int)weight->size[1];
|
||||
const int kW = (int)weight->size[2];
|
||||
|
||||
THArgCheck(input != NULL && input->nDimension == 3, 2, "3D tensor expected");
|
||||
THArgCheck(input->size[0] >= nInputPlane, 2, "invalid number of input planes");
|
||||
|
||||
THTensor_(resize3d)(
|
||||
output_, nOutputPlane,
|
||||
(input->size[1] - 1) * dH + kH,
|
||||
(input->size[2] - 1) * dW + kW
|
||||
);
|
||||
|
||||
/* contiguous */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
THTensor* output = THTensor_(newContiguous)(output_);
|
||||
|
||||
/* get raw pointers */
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *output_data = THTensor_(data)(output);
|
||||
real *weight_data = THTensor_(data)(weight);
|
||||
real *bias_data = THTensor_(data)(bias);
|
||||
real *connTable_data = THTensor_(data)(connTable);
|
||||
|
||||
/* and dims */
|
||||
const long input_h = input->size[1];
|
||||
const long input_w = input->size[2];
|
||||
const long output_h = output->size[1];
|
||||
const long output_w = output->size[2];
|
||||
const long weight_h = weight->size[1];
|
||||
const long weight_w = weight->size[2];
|
||||
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nOutputPlane; p++)
|
||||
{
|
||||
/* add bias */
|
||||
real *ptr_output = output_data + p*output_w*output_h;
|
||||
long j;
|
||||
int nweight;
|
||||
long k;
|
||||
|
||||
for (j = 0; j < output_h*output_w; j++)
|
||||
ptr_output[j] = bias_data[p];
|
||||
|
||||
/* convolve all maps */
|
||||
nweight = connTable->size[0];
|
||||
for (k = 0; k < nweight; k++)
|
||||
{
|
||||
/* get offsets for input/output */
|
||||
int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
|
||||
int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
|
||||
|
||||
if (o == p)
|
||||
{
|
||||
THTensor_(fullConv2Dptr)(
|
||||
output_data + o*output_w*output_h,
|
||||
1.0,
|
||||
input_data + i*input_w*input_h, input_h, input_w,
|
||||
weight_data + k*weight_w*weight_h, weight_h, weight_w,
|
||||
dH, dW
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
THTensor_(free)(input);
|
||||
THTensor_(freeCopyTo)(output, output_);
|
||||
}
|
||||
|
||||
void THNN_(SpatialFullConvolutionMap_updateGradInput)(
|
||||
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput_, THTensor *weight, THTensor *bias,
|
||||
THTensor *connTable, int nInputPlane, int nOutputPlane,
|
||||
int dW, int dH)
|
||||
{
|
||||
THArgCheck(
|
||||
weight != NULL && weight->nDimension == 3
|
||||
&& connTable != NULL && connTable->size[0] == weight->size[0], 5,
|
||||
"3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
|
||||
);
|
||||
|
||||
/* contiguous */
|
||||
THTensor* gradInput = THTensor_(newContiguous)(gradInput_);
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* Resize/Zero */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
/* get raw pointers */
|
||||
real *gradInput_data = THTensor_(data)(gradInput);
|
||||
real *gradOutput_data = THTensor_(data)(gradOutput);
|
||||
real *weight_data = THTensor_(data)(weight);
|
||||
real *connTable_data = THTensor_(data)(connTable);
|
||||
|
||||
/* and dims */
|
||||
const long input_h = input->size[1];
|
||||
const long input_w = input->size[2];
|
||||
const long output_h = gradOutput->size[1];
|
||||
const long output_w = gradOutput->size[2];
|
||||
const long kH = weight->size[1];
|
||||
const long kW = weight->size[2];
|
||||
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nInputPlane; p++)
|
||||
{
|
||||
long k;
|
||||
/* backward all */
|
||||
int nkernel = connTable->size[0];
|
||||
for (k = 0; k < nkernel; k++)
|
||||
{
|
||||
int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
|
||||
int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
|
||||
if (i == p)
|
||||
{
|
||||
/* gradient to input */
|
||||
THTensor_(validXCorr2Dptr)(
|
||||
gradInput_data + i*input_w*input_h,
|
||||
1.0,
|
||||
gradOutput_data + o*output_w*output_h, output_h, output_w,
|
||||
weight_data + k*kW*kH, kH, kW,
|
||||
dH, dW
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
THTensor_(freeCopyTo)(gradInput, gradInput_);
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
void THNN_(SpatialFullConvolutionMap_accGradParameters)(
|
||||
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias,
|
||||
THTensor *connTable, int nInputPlane, int nOutputPlane,
|
||||
int dW, int dH, real scale)
|
||||
{
|
||||
THArgCheck(
|
||||
gradWeight != NULL && gradWeight->nDimension == 3
|
||||
&& connTable != NULL && connTable->size[0] == gradWeight->size[0], 5,
|
||||
"3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
|
||||
);
|
||||
|
||||
/* contiguous */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* get raw pointers */
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *gradOutput_data = THTensor_(data)(gradOutput);
|
||||
real *gradWeight_data = THTensor_(data)(gradWeight);
|
||||
real *gradBias_data = THTensor_(data)(gradBias);
|
||||
|
||||
/* and dims */
|
||||
const long input_h = input->size[1];
|
||||
const long input_w = input->size[2];
|
||||
const long output_h = gradOutput->size[1];
|
||||
const long output_w = gradOutput->size[2];
|
||||
const long weight_h = gradWeight->size[1];
|
||||
const long weight_w = gradWeight->size[2];
|
||||
|
||||
/* gradients wrt bias */
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nOutputPlane; k++)
|
||||
{
|
||||
real *ptr_gradOutput = gradOutput_data + k*output_w*output_h;
|
||||
long l;
|
||||
for (l = 0; l < output_h*output_w; l++)
|
||||
gradBias_data[k] += scale*ptr_gradOutput[l];
|
||||
}
|
||||
|
||||
/* gradients wrt weight */
|
||||
int nkernel = connTable->size[0];
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nkernel; k++)
|
||||
{
|
||||
int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE;
|
||||
int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE;
|
||||
|
||||
/* gradient to kernel */
|
||||
THTensor_(validXCorr2DRevptr)(
|
||||
gradWeight_data + k*weight_w*weight_h,
|
||||
scale,
|
||||
gradOutput_data + o*output_w*output_h, output_h, output_w,
|
||||
input_data + i*input_w*input_h, input_h, input_w,
|
||||
dH, dW
|
||||
);
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
THTensor_(free)(input);
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
300
torch/lib/THNN/generic/SpatialMaxPooling.c
Normal file
300
torch/lib/THNN/generic/SpatialMaxPooling.c
Normal file
|
|
@ -0,0 +1,300 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialMaxPooling.c"
|
||||
#else
|
||||
|
||||
static void THNN_(SpatialMaxPooling_updateOutput_frame)(
|
||||
real *input_p,
|
||||
real *output_p,
|
||||
real *ind_p,
|
||||
long nslices,
|
||||
long iwidth,
|
||||
long iheight,
|
||||
long owidth,
|
||||
long oheight,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
/* loop over output */
|
||||
long i, j;
|
||||
real *ip = input_p + k*iwidth*iheight;
|
||||
for(i = 0; i < oheight; i++)
|
||||
{
|
||||
for(j = 0; j < owidth; j++)
|
||||
{
|
||||
long hstart = i * dH - padH;
|
||||
long wstart = j * dW - padW;
|
||||
long hend = fminf(hstart + kH, iheight);
|
||||
long wend = fminf(wstart + kW, iwidth);
|
||||
hstart = fmaxf(hstart, 0);
|
||||
wstart = fmaxf(wstart, 0);
|
||||
|
||||
/* local pointers */
|
||||
real *op = output_p + k*owidth*oheight + i*owidth + j;
|
||||
real *indp = ind_p + k*owidth*oheight + i*owidth + j;
|
||||
|
||||
/* compute local max: */
|
||||
long maxindex = -1;
|
||||
real maxval = -THInf;
|
||||
long tcntr = 0;
|
||||
long x,y;
|
||||
for(y = hstart; y < hend; y++)
|
||||
{
|
||||
for(x = wstart; x < wend; x++)
|
||||
{
|
||||
tcntr = y*iwidth + x;
|
||||
real val = *(ip + tcntr);
|
||||
if (val > maxval)
|
||||
{
|
||||
maxval = val;
|
||||
maxindex = tcntr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* set output to local max */
|
||||
*op = maxval;
|
||||
|
||||
/* store location of max */
|
||||
*indp = maxindex + TH_INDEX_BASE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialMaxPooling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *indices,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH,
|
||||
bool ceil_mode)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
long nbatch = 1;
|
||||
long nslices;
|
||||
long iheight;
|
||||
long iwidth;
|
||||
long oheight;
|
||||
long owidth;
|
||||
real *input_data;
|
||||
real *output_data;
|
||||
real *indices_data;
|
||||
|
||||
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
|
||||
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
THArgCheck(input->size[dimw] >= kW - padW && input->size[dimh] >= kH - padH, 2, "input image smaller than kernel size");
|
||||
|
||||
THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size");
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimh-1];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
if (ceil_mode)
|
||||
{
|
||||
oheight = (long)(ceil((float)(iheight - kH + 2*padH) / dH)) + 1;
|
||||
owidth = (long)(ceil((float)(iwidth - kW + 2*padW) / dW)) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
oheight = (long)(floor((float)(iheight - kH + 2*padH) / dH)) + 1;
|
||||
owidth = (long)(floor((float)(iwidth - kW + 2*padW) / dW)) + 1;
|
||||
}
|
||||
|
||||
if (padW || padH)
|
||||
{
|
||||
// ensure that the last pooling starts inside the image
|
||||
if ((oheight - 1)*dH >= iheight + padH)
|
||||
--oheight;
|
||||
if ((owidth - 1)*dW >= iwidth + padW)
|
||||
--owidth;
|
||||
}
|
||||
|
||||
/* get contiguous input */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
|
||||
/* resize output */
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
THTensor_(resize3d)(output, nslices, oheight, owidth);
|
||||
/* indices will contain the locations for each output point */
|
||||
THTensor_(resize3d)(indices, nslices, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
THNN_(SpatialMaxPooling_updateOutput_frame)(input_data, output_data,
|
||||
indices_data,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
kW, kH, dW, dH,
|
||||
padW, padH);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
|
||||
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
|
||||
/* indices will contain the locations for each output point */
|
||||
THTensor_(resize4d)(indices, nbatch, nslices, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(SpatialMaxPooling_updateOutput_frame)(input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight,
|
||||
indices_data+p*nslices*owidth*oheight,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
kW, kH, dW, dH,
|
||||
padW, padH);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
static void THNN_(SpatialMaxPooling_updateGradInput_frame)(
|
||||
real *gradInput_p,
|
||||
real *gradOutput_p,
|
||||
real *ind_p,
|
||||
long nslices,
|
||||
long iwidth,
|
||||
long iheight,
|
||||
long owidth,
|
||||
long oheight,
|
||||
int dW,
|
||||
int dH)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
|
||||
real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
|
||||
real *ind_p_k = ind_p + k*owidth*oheight;
|
||||
|
||||
/* calculate max points */
|
||||
long i, j;
|
||||
for(i = 0; i < oheight; i++)
|
||||
{
|
||||
for(j = 0; j < owidth; j++)
|
||||
{
|
||||
/* retrieve position of max */
|
||||
long maxp = ind_p_k[i*owidth + j] - TH_INDEX_BASE;
|
||||
/* update gradient */
|
||||
gradInput_p_k[maxp] += gradOutput_p_k[i*owidth + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialMaxPooling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *indices,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH,
|
||||
bool ceil_mode)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
long nbatch = 1;
|
||||
int nslices;
|
||||
int iheight;
|
||||
int iwidth;
|
||||
int oheight;
|
||||
int owidth;
|
||||
real *gradInput_data;
|
||||
real *gradOutput_data;
|
||||
real *indices_data;
|
||||
|
||||
/* get contiguous gradOutput */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* resize */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
if (input->nDimension == 4) {
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimh-1];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
oheight = gradOutput->size[dimh];
|
||||
owidth = gradOutput->size[dimw];
|
||||
|
||||
/* get raw pointers */
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
/* backprop */
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
THNN_(SpatialMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
|
||||
indices_data,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
dW, dH);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(SpatialMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
|
||||
indices_data+p*nslices*owidth*oheight,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
dW, dH);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
223
torch/lib/THNN/generic/SpatialMaxUnpooling.c
Normal file
223
torch/lib/THNN/generic/SpatialMaxUnpooling.c
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialMaxUnpooling.c"
|
||||
#else
|
||||
|
||||
static void THNN_(SpatialMaxUnpooling_updateOutput_frame)(real *input_p, real *output_p,
|
||||
real *ind_p,
|
||||
long nslices,
|
||||
long iwidth, long iheight,
|
||||
long owidth, long oheight)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
real *output_p_k = output_p + k*owidth*oheight;
|
||||
real *input_p_k = input_p + k*iwidth*iheight;
|
||||
real *ind_p_k = ind_p + k*iwidth*iheight;
|
||||
|
||||
long i, j, maxp;
|
||||
for(i = 0; i < iheight; i++)
|
||||
{
|
||||
for(j = 0; j < iwidth; j++)
|
||||
{
|
||||
maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */
|
||||
if(maxp<0 || maxp>=owidth*oheight){
|
||||
THError("invalid max index %d, owidth= %d, oheight= %d",maxp,owidth,oheight);
|
||||
}
|
||||
output_p_k[maxp] = input_p_k[i*iwidth + j]; /* update output */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialMaxUnpooling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *indices,
|
||||
int owidth, int oheight)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
int nbatch = 1;
|
||||
int nslices;
|
||||
int iheight;
|
||||
int iwidth;
|
||||
real *input_data;
|
||||
real *output_data;
|
||||
real *indices_data;
|
||||
|
||||
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
|
||||
if (!THTensor_(isSameSizeAs)(input, indices)){
|
||||
THError("Invalid input size w.r.t current indices size");
|
||||
}
|
||||
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimh-1];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
|
||||
/* get contiguous input and indices */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
indices = THTensor_(newContiguous)(indices);
|
||||
|
||||
/* resize output */
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
THTensor_(resize3d)(output, nslices, oheight, owidth);
|
||||
THTensor_(zero)(output);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data, output_data,
|
||||
indices_data,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
|
||||
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
|
||||
THTensor_(zero)(output);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight,
|
||||
indices_data+p*nslices*iwidth*iheight,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(input);
|
||||
THTensor_(free)(indices);
|
||||
}
|
||||
|
||||
static void THNN_(SpatialMaxUnpooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p,
|
||||
real *ind_p,
|
||||
long nslices,
|
||||
long iwidth, long iheight,
|
||||
long owidth, long oheight)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
|
||||
real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
|
||||
real *ind_p_k = ind_p + k*iwidth*iheight;
|
||||
|
||||
long i, j, maxp;
|
||||
for(i = 0; i < iheight; i++)
|
||||
{
|
||||
for(j = 0; j < iwidth; j++)
|
||||
{
|
||||
maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */
|
||||
if(maxp<0 || maxp>=owidth*oheight){
|
||||
THError("invalid max index %d, owidth= %d, oheight= %d",maxp,owidth,oheight);
|
||||
}
|
||||
gradInput_p_k[i*iwidth + j] = gradOutput_p_k[maxp]; /* update gradient */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialMaxUnpooling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *indices,
|
||||
int owidth, int oheight)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
int nbatch = 1;
|
||||
int nslices;
|
||||
int iheight;
|
||||
int iwidth;
|
||||
real *gradInput_data;
|
||||
real *gradOutput_data;
|
||||
real *indices_data;
|
||||
|
||||
if (!THTensor_(isSameSizeAs)(input, indices)){
|
||||
THError("Invalid input size w.r.t current indices size");
|
||||
}
|
||||
|
||||
/* get contiguous gradOutput and indices */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
indices = THTensor_(newContiguous)(indices);
|
||||
|
||||
/* resize */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
if (input->nDimension == 4) {
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimh-1];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
|
||||
if(owidth!=gradOutput->size[dimw] || oheight!=gradOutput->size[dimh]){
|
||||
THError("Inconsistent gradOutput size. oheight= %d, owidth= %d, gradOutput: %dx%d", oheight, owidth,gradOutput->size[dimh],gradOutput->size[dimw]);
|
||||
}
|
||||
|
||||
/* get raw pointers */
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
/* backprop */
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
|
||||
indices_data,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
|
||||
indices_data+p*nslices*iwidth*iheight,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
THTensor_(free)(indices);
|
||||
}
|
||||
|
||||
#endif
|
||||
255
torch/lib/THNN/generic/SpatialReflectionPadding.c
Normal file
255
torch/lib/THNN/generic/SpatialReflectionPadding.c
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialReflectionPadding.c"
|
||||
#else
|
||||
|
||||
static void THNN_(SpatialReflectionPadding_updateOutput_frame)(
|
||||
real *input_p, real *output_p,
|
||||
long nslices,
|
||||
long iwidth, long iheight,
|
||||
long owidth, long oheight,
|
||||
int pad_l, int pad_r,
|
||||
int pad_t, int pad_b)
|
||||
{
|
||||
int iStartX = fmax(0, -pad_l);
|
||||
int iStartY = fmax(0, -pad_t);
|
||||
int oStartX = fmax(0, pad_l);
|
||||
int oStartY = fmax(0, pad_t);
|
||||
|
||||
long k, ip_x, ip_y;
|
||||
#pragma omp parallel for private(k, ip_x, ip_y)
|
||||
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
long i, j;
|
||||
for (i = 0; i < oheight; i++) {
|
||||
for (j = 0; j < owidth; j++) {
|
||||
if (j < pad_l) {
|
||||
ip_x = pad_l * 2 - j;
|
||||
} else if (j >= pad_l && j < iwidth + pad_l) {
|
||||
ip_x = j;
|
||||
} else {
|
||||
ip_x = (iwidth + pad_l - 1) * 2 - j;
|
||||
}
|
||||
ip_x = ip_x - oStartX + iStartX;
|
||||
|
||||
if (i < pad_t) {
|
||||
ip_y = pad_t * 2 - i;
|
||||
} else if (i >= pad_t && i < iheight + pad_t) {
|
||||
ip_y = i;
|
||||
} else {
|
||||
ip_y = (iheight + pad_t - 1) * 2 - i;
|
||||
}
|
||||
ip_y = ip_y - oStartY + iStartY;
|
||||
|
||||
real *dest_p = output_p + k*owidth*oheight + i * owidth + j;
|
||||
real *src_p = input_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
|
||||
*dest_p = *src_p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialReflectionPadding_updateOutput)(THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int pad_l, int pad_r,
|
||||
int pad_t, int pad_b)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
int dimslices = 0;
|
||||
long nbatch = 1;
|
||||
long nslices;
|
||||
long iheight;
|
||||
long iwidth;
|
||||
long oheight;
|
||||
long owidth;
|
||||
real *input_data;
|
||||
real *output_data;
|
||||
|
||||
THArgCheck(input->nDimension == 3 ||
|
||||
input->nDimension == 4 , 2, "input must be 3 or 4-dimensional");
|
||||
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
dimslices++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimslices];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
oheight = iheight + pad_t + pad_b;
|
||||
owidth = iwidth + pad_l + pad_r;
|
||||
|
||||
THArgCheck(owidth >= 1 || oheight >= 1 , 2, "input is too small");
|
||||
|
||||
/* get contiguous input */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
|
||||
/* resize output */
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
THTensor_(resize3d)(output, nslices, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
THNN_(SpatialReflectionPadding_updateOutput_frame)(input_data, output_data,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
pad_l, pad_r,
|
||||
pad_t, pad_b);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
|
||||
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(SpatialReflectionPadding_updateOutput_frame)(
|
||||
input_data+p*nslices*iwidth*iheight,
|
||||
output_data+p*nslices*owidth*oheight,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
pad_l, pad_r,
|
||||
pad_t, pad_b);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
static void THNN_(SpatialReflectionPadding_updateGradInput_frame)(
|
||||
real *ginput_p, real *goutput_p,
|
||||
long nslices,
|
||||
long iwidth, long iheight,
|
||||
long owidth, long oheight,
|
||||
int pad_l, int pad_r,
|
||||
int pad_t, int pad_b)
|
||||
{
|
||||
int iStartX = fmax(0, -pad_l);
|
||||
int iStartY = fmax(0, -pad_t);
|
||||
int oStartX = fmax(0, pad_l);
|
||||
int oStartY = fmax(0, pad_t);
|
||||
|
||||
long k, ip_x, ip_y;
|
||||
#pragma omp parallel for private(k, ip_x, ip_y)
|
||||
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
long i, j;
|
||||
for (i = 0; i < oheight; i++) {
|
||||
for (j = 0; j < owidth; j++) {
|
||||
if (j < pad_l) {
|
||||
ip_x = pad_l * 2 - j;
|
||||
} else if (j >= pad_l && j < iwidth + pad_l) {
|
||||
ip_x = j;
|
||||
} else {
|
||||
ip_x = (iwidth + pad_l - 1) * 2 - j;
|
||||
}
|
||||
ip_x = ip_x - oStartX + iStartX;
|
||||
|
||||
if (i < pad_t) {
|
||||
ip_y = pad_t * 2 - i;
|
||||
} else if (i >= pad_t && i < iheight + pad_t) {
|
||||
ip_y = i;
|
||||
} else {
|
||||
ip_y = (iheight + pad_t - 1) * 2 - i;
|
||||
}
|
||||
ip_y = ip_y - oStartY + iStartY;
|
||||
|
||||
real *src_p = goutput_p + k*owidth*oheight + i * owidth + j;
|
||||
real *dest_p = ginput_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
|
||||
*dest_p += *src_p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialReflectionPadding_updateGradInput)(THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int pad_l, int pad_r,
|
||||
int pad_t, int pad_b)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
int dimslices = 0;
|
||||
long nbatch = 1;
|
||||
long nslices;
|
||||
long iheight;
|
||||
long iwidth;
|
||||
long oheight;
|
||||
long owidth;
|
||||
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
dimslices++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimslices];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
oheight = iheight + pad_t + pad_b;
|
||||
owidth = iwidth + pad_l + pad_r;
|
||||
|
||||
THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
|
||||
"gradOutput width unexpected");
|
||||
THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
|
||||
"gradOutput height unexpected");
|
||||
|
||||
/* get contiguous gradOutput */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* resize */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
/* backprop */
|
||||
if (input->nDimension == 3) {
|
||||
THNN_(SpatialReflectionPadding_updateGradInput_frame)(
|
||||
THTensor_(data)(gradInput),
|
||||
THTensor_(data)(gradOutput),
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
pad_l, pad_r,
|
||||
pad_t, pad_b);
|
||||
} else {
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++) {
|
||||
THNN_(SpatialReflectionPadding_updateGradInput_frame)(
|
||||
THTensor_(data)(gradInput) + p * nslices * iheight * iwidth,
|
||||
THTensor_(data)(gradOutput) + p * nslices * oheight * owidth,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
pad_l, pad_r,
|
||||
pad_t, pad_b);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
254
torch/lib/THNN/generic/SpatialReplicationPadding.c
Normal file
254
torch/lib/THNN/generic/SpatialReplicationPadding.c
Normal file
|
|
@ -0,0 +1,254 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialReplicationPadding.c"
|
||||
#else
|
||||
|
||||
static void THNN_(SpatialReplicationPadding_updateOutput_frame)(
|
||||
real *input_p, real *output_p,
|
||||
long nslices,
|
||||
long iwidth, long iheight,
|
||||
long owidth, long oheight,
|
||||
int pad_l, int pad_r,
|
||||
int pad_t, int pad_b)
|
||||
{
|
||||
int iStartX = fmax(0, -pad_l);
|
||||
int iStartY = fmax(0, -pad_t);
|
||||
int oStartX = fmax(0, pad_l);
|
||||
int oStartY = fmax(0, pad_t);
|
||||
|
||||
long k, ip_x, ip_y;
|
||||
#pragma omp parallel for private(k, ip_x, ip_y)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
long i, j;
|
||||
for (i = 0; i < oheight; i++) {
|
||||
for (j = 0; j < owidth; j++) {
|
||||
if (j < pad_l) {
|
||||
ip_x = pad_l;
|
||||
} else if (j >= pad_l && j < iwidth + pad_l) {
|
||||
ip_x = j;
|
||||
} else {
|
||||
ip_x = iwidth + pad_l - 1;
|
||||
}
|
||||
ip_x = ip_x - oStartX + iStartX;
|
||||
|
||||
if (i < pad_t) {
|
||||
ip_y = pad_t;
|
||||
} else if (i >= pad_t && i < iheight + pad_t) {
|
||||
ip_y = i;
|
||||
} else {
|
||||
ip_y = iheight + pad_t - 1;
|
||||
}
|
||||
ip_y = ip_y - oStartY + iStartY;
|
||||
|
||||
real *dest_p = output_p + k*owidth*oheight + i * owidth + j;
|
||||
real *src_p = input_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
|
||||
*dest_p = *src_p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialReplicationPadding_updateOutput)(THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int pad_l, int pad_r,
|
||||
int pad_t, int pad_b)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
int dimslices = 0;
|
||||
long nbatch = 1;
|
||||
long nslices;
|
||||
long iheight;
|
||||
long iwidth;
|
||||
long oheight;
|
||||
long owidth;
|
||||
real *input_data;
|
||||
real *output_data;
|
||||
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4,
|
||||
2, "input must be 3 or 4-dimensional");
|
||||
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
dimslices++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimslices];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
oheight = iheight + pad_t + pad_b;
|
||||
owidth = iwidth + pad_l + pad_r;
|
||||
|
||||
THArgCheck(owidth >= 1 || oheight >= 1 , 2, "input is too small");
|
||||
|
||||
/* get contiguous input */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
|
||||
/* resize output */
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
THTensor_(resize3d)(output, nslices, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
THNN_(SpatialReplicationPadding_updateOutput_frame)(input_data, output_data,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
pad_l, pad_r,
|
||||
pad_t, pad_b);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
|
||||
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(SpatialReplicationPadding_updateOutput_frame)(
|
||||
input_data+p*nslices*iwidth*iheight,
|
||||
output_data+p*nslices*owidth*oheight,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
pad_l, pad_r,
|
||||
pad_t, pad_b);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
static void THNN_(SpatialReplicationPadding_updateGradInput_frame)(
|
||||
real *ginput_p, real *goutput_p,
|
||||
long nslices,
|
||||
long iwidth, long iheight,
|
||||
long owidth, long oheight,
|
||||
int pad_l, int pad_r,
|
||||
int pad_t, int pad_b)
|
||||
{
|
||||
int iStartX = fmax(0, -pad_l);
|
||||
int iStartY = fmax(0, -pad_t);
|
||||
int oStartX = fmax(0, pad_l);
|
||||
int oStartY = fmax(0, pad_t);
|
||||
|
||||
long k, ip_x, ip_y;
|
||||
#pragma omp parallel for private(k, ip_x, ip_y)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
long i, j;
|
||||
for (i = 0; i < oheight; i++) {
|
||||
for (j = 0; j < owidth; j++) {
|
||||
if (j < pad_l) {
|
||||
ip_x = pad_l;
|
||||
} else if (j >= pad_l && j < iwidth + pad_l) {
|
||||
ip_x = j;
|
||||
} else {
|
||||
ip_x = iwidth + pad_l - 1;
|
||||
}
|
||||
ip_x = ip_x - oStartX + iStartX;
|
||||
|
||||
if (i < pad_t) {
|
||||
ip_y = pad_t;
|
||||
} else if (i >= pad_t && i < iheight + pad_t) {
|
||||
ip_y = i;
|
||||
} else {
|
||||
ip_y = iheight + pad_t - 1;
|
||||
}
|
||||
ip_y = ip_y - oStartY + iStartY;
|
||||
|
||||
real *src_p = goutput_p + k*owidth*oheight + i * owidth + j;
|
||||
real *dest_p = ginput_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
|
||||
*dest_p += *src_p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialReplicationPadding_updateGradInput)(THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int pad_l, int pad_r,
|
||||
int pad_t, int pad_b)
|
||||
{
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
int dimslices = 0;
|
||||
long nbatch = 1;
|
||||
long nslices;
|
||||
long iheight;
|
||||
long iwidth;
|
||||
long oheight;
|
||||
long owidth;
|
||||
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
dimslices++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimslices];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
oheight = iheight + pad_t + pad_b;
|
||||
owidth = iwidth + pad_l + pad_r;
|
||||
|
||||
THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
|
||||
"gradOutput width unexpected");
|
||||
THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
|
||||
"gradOutput height unexpected");
|
||||
|
||||
/* get contiguous gradOutput */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* resize */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
/* backprop */
|
||||
if (input->nDimension == 3) {
|
||||
THNN_(SpatialReplicationPadding_updateGradInput_frame)(
|
||||
THTensor_(data)(gradInput),
|
||||
THTensor_(data)(gradOutput),
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
pad_l, pad_r,
|
||||
pad_t, pad_b);
|
||||
} else {
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++) {
|
||||
THNN_(SpatialReplicationPadding_updateGradInput_frame)(
|
||||
THTensor_(data)(gradInput) + p * nslices * iheight * iwidth,
|
||||
THTensor_(data)(gradOutput) + p * nslices * oheight * owidth,
|
||||
nslices,
|
||||
iwidth, iheight,
|
||||
owidth, oheight,
|
||||
pad_l, pad_r,
|
||||
pad_t, pad_b);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
267
torch/lib/THNN/generic/SpatialSubSampling.c
Normal file
267
torch/lib/THNN/generic/SpatialSubSampling.c
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialSubSampling.c"
|
||||
#else
|
||||
|
||||
void THNN_(SpatialSubSampling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
int kW, int kH,
|
||||
int dW, int dH)
|
||||
{
|
||||
|
||||
real *weight_data = THTensor_(data)(weight);
|
||||
real *bias_data = THTensor_(data)(bias);
|
||||
real *output_data;
|
||||
real *input_data;
|
||||
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
long nbatch = 1;
|
||||
|
||||
long inputWidth;
|
||||
long inputHeight;
|
||||
long outputWidth;
|
||||
long outputHeight;
|
||||
|
||||
int nInputPlane = THTensor_(size)(weight,0);
|
||||
|
||||
long k;
|
||||
|
||||
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
|
||||
|
||||
if (input->nDimension == 4) {
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
inputWidth = input->size[dimw];
|
||||
inputHeight = input->size[dimh];
|
||||
outputWidth = (inputWidth - kW) / dW + 1;
|
||||
outputHeight = (inputHeight - kH) / dH + 1;
|
||||
|
||||
THArgCheck(input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes");
|
||||
THArgCheck(inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size");
|
||||
|
||||
if (input->nDimension == 3)
|
||||
THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
|
||||
else
|
||||
THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth);
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
#pragma omp parallel for private(k)
|
||||
for(k = 0; k < nInputPlane; k++)
|
||||
{
|
||||
long p;
|
||||
for(p = 0; p < nbatch; p++)
|
||||
{
|
||||
long xx, yy;
|
||||
/* For all output pixels... */
|
||||
real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight;
|
||||
/* Get the good mask for (k,i) (k out, i in) */
|
||||
real the_weight = weight_data[k];
|
||||
/* Initialize to the bias */
|
||||
real z = bias_data[k];
|
||||
long i;
|
||||
for(i = 0; i < outputWidth*outputHeight; i++)
|
||||
ptr_output[i] = z;
|
||||
|
||||
for(yy = 0; yy < outputHeight; yy++)
|
||||
{
|
||||
for(xx = 0; xx < outputWidth; xx++)
|
||||
{
|
||||
/* Compute the mean of the input image... */
|
||||
real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
|
||||
real sum = 0;
|
||||
long kx, ky;
|
||||
|
||||
for(ky = 0; ky < kH; ky++)
|
||||
{
|
||||
for(kx = 0; kx < kW; kx++)
|
||||
sum += ptr_input[kx];
|
||||
ptr_input += inputWidth; /* next input line */
|
||||
}
|
||||
/* Update output */
|
||||
*ptr_output++ += the_weight*sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
void THNN_(SpatialSubSampling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
int kW, int kH,
|
||||
int dW, int dH)
|
||||
{
|
||||
|
||||
int dimw = 2;
|
||||
int dimh = 1;
|
||||
long nbatch = 1;
|
||||
|
||||
long inputWidth;
|
||||
long inputHeight;
|
||||
long outputWidth;
|
||||
long outputHeight;
|
||||
|
||||
int nInputPlane = THTensor_(size)(weight,0);
|
||||
|
||||
real *weight_data;
|
||||
real *gradOutput_data;
|
||||
real *input_data, *gradInput_data;
|
||||
|
||||
long k;
|
||||
|
||||
if (input->nDimension == 4) {
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
inputWidth = input->size[dimw];
|
||||
inputHeight = input->size[dimh];
|
||||
outputWidth = (inputWidth - kW) / dW + 1;
|
||||
outputHeight = (inputHeight - kH) / dH + 1;
|
||||
|
||||
weight_data = THTensor_(data)(weight);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
|
||||
#pragma omp parallel for private(k)
|
||||
for(k = 0; k < nInputPlane; k++)
|
||||
{
|
||||
long p;
|
||||
for(p = 0; p < nbatch; p++)
|
||||
{
|
||||
real the_weight = weight_data[k];
|
||||
real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
|
||||
long xx, yy;
|
||||
|
||||
real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
|
||||
long i;
|
||||
for(i=0; i<inputWidth*inputHeight; i++)
|
||||
ptr_gi[i] = 0.0;
|
||||
|
||||
for(yy = 0; yy < outputHeight; yy++)
|
||||
{
|
||||
for(xx = 0; xx < outputWidth; xx++)
|
||||
{
|
||||
real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
|
||||
real z = *ptr_gradOutput++ * the_weight;
|
||||
long kx, ky;
|
||||
|
||||
for(ky = 0; ky < kH; ky++)
|
||||
{
|
||||
for(kx = 0; kx < kW; kx++)
|
||||
ptr_gradInput[kx] += z;
|
||||
ptr_gradInput += inputWidth;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialSubSampling_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
int kW, int kH,
|
||||
int dW, int dH,
|
||||
real scale)
|
||||
{
|
||||
long nbatch = 1;
|
||||
long dimw = 2;
|
||||
long dimh = 1;
|
||||
|
||||
long inputWidth;
|
||||
long inputHeight;
|
||||
long outputWidth;
|
||||
long outputHeight;
|
||||
|
||||
int nInputPlane = THTensor_(size)(gradWeight,0);
|
||||
|
||||
real *gradWeight_data;
|
||||
real *gradBias_data;
|
||||
real *gradOutput_data;
|
||||
real *input_data;
|
||||
|
||||
long k;
|
||||
|
||||
if (input->nDimension == 4) {
|
||||
dimw++;
|
||||
dimh++;
|
||||
nbatch = input->size[0];
|
||||
}
|
||||
|
||||
inputWidth = input->size[dimw];
|
||||
inputHeight = input->size[dimh];
|
||||
outputWidth = (inputWidth - kW) / dW + 1;
|
||||
outputHeight = (inputHeight - kH) / dH + 1;
|
||||
|
||||
gradWeight_data = THTensor_(data)(gradWeight);
|
||||
gradBias_data = THTensor_(data)(gradBias);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
input_data = THTensor_(data)(input);
|
||||
|
||||
#pragma omp parallel for private(k)
|
||||
for(k = 0; k < nInputPlane; k++)
|
||||
{
|
||||
long p;
|
||||
for(p = 0; p < nbatch; p++)
|
||||
{
|
||||
real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
|
||||
real sum;
|
||||
long xx, yy;
|
||||
long i;
|
||||
|
||||
sum = 0;
|
||||
for(i = 0; i < outputWidth*outputHeight; i++)
|
||||
sum += ptr_gradOutput[i];
|
||||
gradBias_data[k] += scale*sum;
|
||||
|
||||
sum = 0;
|
||||
for(yy = 0; yy < outputHeight; yy++)
|
||||
{
|
||||
for(xx = 0; xx < outputWidth; xx++)
|
||||
{
|
||||
real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
|
||||
real z = *ptr_gradOutput++;
|
||||
long kx, ky;
|
||||
|
||||
for(ky = 0; ky < kH; ky++)
|
||||
{
|
||||
for(kx = 0; kx < kW; kx++)
|
||||
sum += z * ptr_input[kx];
|
||||
ptr_input += inputWidth;
|
||||
}
|
||||
}
|
||||
}
|
||||
gradWeight_data[k] += scale*sum;
|
||||
}
|
||||
}
|
||||
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
#endif
|
||||
127
torch/lib/THNN/generic/SpatialUpSamplingBilinear.c
Normal file
127
torch/lib/THNN/generic/SpatialUpSamplingBilinear.c
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
// Adapted from interp.cpp from Caffe util by Pauline Luc
|
||||
// Originally developed by George Papandreou
|
||||
|
||||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialUpSamplingBilinear.c"
|
||||
#else
|
||||
|
||||
void THNN_(SpatialUpSamplingBilinear_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output){
|
||||
input = THTensor_(newContiguous)(input);
|
||||
output = THTensor_(newContiguous)(output);
|
||||
THTensor_(zero)(output);
|
||||
real *idata = THTensor_(data)(input);
|
||||
real *odata = THTensor_(data)(output);
|
||||
int channels = THTensor_(size)(input, 0) * THTensor_(size)(input, 1);
|
||||
int height1 = THTensor_(size)(input, 2);
|
||||
int width1 = THTensor_(size)(input, 3);
|
||||
int height2 = THTensor_(size)(output, 2);
|
||||
int width2 = THTensor_(size)(output, 3);
|
||||
THAssert(height1 > 0 && width1 > 0 && height2 > 0 && width2 > 0);
|
||||
// special case: just copy
|
||||
if (height1 == height2 && width1 == width2) {
|
||||
for (int h2 = 0; h2 < height2; ++h2) {
|
||||
const int h1 = h2;
|
||||
for (int w2 = 0; w2 < width2; ++w2) {
|
||||
const int w1 = w2;
|
||||
const real* pos1 = &idata[h1 * width1 + w1];
|
||||
real* pos2 = &odata[h2 * width2 + w2];
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
pos2[0] = pos1[0];
|
||||
pos1 += width1 * height1;
|
||||
pos2 += width2 * height2;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
const float rheight =(height2 > 1) ? (float)(height1 - 1)/(height2 - 1) : 0.f;
|
||||
const float rwidth = (width2 > 1) ? (float)(width1 - 1) / (width2 - 1) : 0.f;
|
||||
for (int h2 = 0; h2 < height2; ++h2) {
|
||||
const float h1r = rheight * h2;
|
||||
const int h1 = h1r;
|
||||
const int h1p = (h1 < height1 - 1) ? 1 : 0;
|
||||
const real h1lambda = h1r - h1;
|
||||
const real h0lambda = (real)1. - h1lambda;
|
||||
for (int w2 = 0; w2 < width2; ++w2) {
|
||||
const float w1r = rwidth * w2;
|
||||
const int w1 = w1r;
|
||||
const int w1p = (w1 < width1 - 1) ? 1 : 0;
|
||||
const real w1lambda = w1r - w1;
|
||||
const real w0lambda = (real)1. - w1lambda;
|
||||
const real* pos1 = &idata[h1 * width1 + w1];
|
||||
real* pos2 = &odata[h2 * width2 + w2];
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
pos2[0] = h0lambda * (w0lambda * pos1[0]+ w1lambda * pos1[w1p])
|
||||
+ h1lambda * (w0lambda * pos1[h1p * width1]
|
||||
+ w1lambda * pos1[h1p * width1 + w1p]);
|
||||
pos1 += width1 * height1;
|
||||
pos2 += width2 * height2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput){
|
||||
gradInput = THTensor_(newContiguous)(gradInput);
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
THTensor_(zero)(gradInput);
|
||||
real *data1 = THTensor_(data)(gradInput);
|
||||
real *data2 = THTensor_(data)(gradOutput);
|
||||
int channels = THTensor_(size)(gradInput, 0) * THTensor_(size)(gradInput, 1);
|
||||
int height1 = THTensor_(size)(gradInput, 2);
|
||||
int width1 = THTensor_(size)(gradInput, 3);
|
||||
int height2 = THTensor_(size)(gradOutput, 2);
|
||||
int width2 = THTensor_(size)(gradOutput, 3);
|
||||
THAssert(height1 > 0 && width1 > 0 && height2 > 0 && width2 > 0);
|
||||
// special case: same-size matching grids
|
||||
if (height1 == height2 && width1 == width2) {
|
||||
for (int h2 = 0; h2 < height2; ++h2) {
|
||||
const int h1 = h2;
|
||||
for (int w2 = 0; w2 < width2; ++w2) {
|
||||
const int w1 = w2;
|
||||
real* pos1 = &data1[h1 * width1 + w1];
|
||||
const real* pos2 = &data2[h2 * width2 + w2];
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
pos1[0] += pos2[0];
|
||||
pos1 += width1 * height1;
|
||||
pos2 += width2 * height2;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
const float rheight =(height2 > 1) ? (float)(height1 - 1)/(height2 - 1) : 0.f;
|
||||
const float rwidth = (width2 > 1) ? (float)(width1 - 1)/(width2 - 1) : 0.f;
|
||||
for (int h2 = 0; h2 < height2; ++h2) {
|
||||
const float h1r = rheight * h2;
|
||||
const int h1 = h1r;
|
||||
const int h1p = (h1 < height1 - 1) ? 1 : 0;
|
||||
const real h1lambda = h1r - h1;
|
||||
const real h0lambda = (real)1. - h1lambda;
|
||||
for (int w2 = 0; w2 < width2; ++w2) {
|
||||
const float w1r = rwidth * w2;
|
||||
const int w1 = w1r;
|
||||
const int w1p = (w1 < width1 - 1) ? 1 : 0;
|
||||
const real w1lambda = w1r - w1;
|
||||
const real w0lambda = (real)1. - w1lambda;
|
||||
real* pos1 = &data1[h1 * width1 + w1];
|
||||
const real* pos2 = &data2[h2 * width2 + w2];
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
pos1[0] += h0lambda * w0lambda * pos2[0];
|
||||
pos1[w1p] += h0lambda * w1lambda * pos2[0];
|
||||
pos1[h1p * width1] += h1lambda * w0lambda * pos2[0];
|
||||
pos1[h1p * width1 + w1p] += h1lambda * w1lambda * pos2[0];
|
||||
pos1 += width1 * height1;
|
||||
pos2 += width2 * height2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
143
torch/lib/THNN/generic/SpatialUpSamplingNearest.c
Normal file
143
torch/lib/THNN/generic/SpatialUpSamplingNearest.c
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/SpatialUpSamplingNearest.c"
|
||||
#else
|
||||
|
||||
void THNN_(SpatialUpSamplingNearest_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int scale_factor)
|
||||
{
|
||||
int dW = scale_factor;
|
||||
int dH = scale_factor;
|
||||
int xDim = input->nDimension-2;
|
||||
int yDim = input->nDimension-1;
|
||||
|
||||
// dims
|
||||
int idim = input->nDimension; // Gauranteed to be between 3 and 5
|
||||
int osz0 = output->size[0];
|
||||
int osz1 = output->size[1];
|
||||
int osz2 = output->size[2];
|
||||
int osz3 = 1;
|
||||
if (idim > 3) {
|
||||
osz3 = output->size[3];
|
||||
}
|
||||
|
||||
// get strides
|
||||
long *is = input->stride;
|
||||
long *os = output->stride;
|
||||
|
||||
// get raw pointers
|
||||
real *pin = THTensor_(data)(input);
|
||||
real *pout = THTensor_(data)(output);
|
||||
|
||||
// perform the upsampling
|
||||
int i0, i1, i2, i3, isrc, idst;
|
||||
int iout[4]; // Output indices
|
||||
int iin[4]; // Input indices
|
||||
|
||||
for (i0 = 0; i0 < osz0; i0++) {
|
||||
iout[0] = i0;
|
||||
iin[0] = i0;
|
||||
for (i1 = 0; i1 < osz1; i1++) {
|
||||
iout[1] = i1;
|
||||
iin[1] = i1;
|
||||
for (i2 = 0; i2 < osz2; i2++) {
|
||||
iout[2] = i2;
|
||||
iin[2] = i2;
|
||||
for (i3 = 0; i3 < osz3; i3++) {
|
||||
iout[3] = i3;
|
||||
iin[3] = i3;
|
||||
|
||||
// set the indices for the upsampled dimensions
|
||||
iin[xDim] = iout[xDim] / dW;
|
||||
iin[yDim] = iout[yDim] / dH;
|
||||
|
||||
idst = i0*os[0] + i1*os[1] + i2*os[2];
|
||||
isrc = iin[0]*is[0] + iin[1]*is[1] + iin[2]*is[2];
|
||||
if (idim > 3) {
|
||||
idst += i3*os[3];
|
||||
isrc += iin[3]*is[3];
|
||||
}
|
||||
|
||||
pout[idst] = pin[isrc];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(SpatialUpSamplingNearest_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int scale_factor)
|
||||
{
|
||||
int dW = scale_factor;
|
||||
int dH = scale_factor;
|
||||
int xDim = gradInput->nDimension-2;
|
||||
int yDim = gradInput->nDimension-1;
|
||||
|
||||
// dims
|
||||
int idim = gradInput->nDimension; // Gauranteed to be between 3 and 5
|
||||
int isz0 = gradInput->size[0];
|
||||
int isz1 = gradInput->size[1];
|
||||
int isz2 = gradInput->size[2];
|
||||
int isz3 = 1;
|
||||
if (idim > 3) {
|
||||
isz3 = gradInput->size[3];
|
||||
}
|
||||
|
||||
// get strides
|
||||
long *is = gradInput->stride;
|
||||
long *os = gradOutput->stride;
|
||||
|
||||
// get raw pointers
|
||||
real *pin = THTensor_(data)(gradInput);
|
||||
real *pout = THTensor_(data)(gradOutput);
|
||||
|
||||
// perform the upsampling
|
||||
int i0, i1, i2, i3, isrc, idst, x, y;
|
||||
int iin[4]; // Input indices
|
||||
int iout[4]; // Output indices
|
||||
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
for (i0 = 0; i0 < isz0; i0++) {
|
||||
iin[0] = i0;
|
||||
iout[0] = i0;
|
||||
for (i1 = 0; i1 < isz1; i1++) {
|
||||
iin[1] = i1;
|
||||
iout[1] = i1;
|
||||
for (i2 = 0; i2 < isz2; i2++) {
|
||||
iin[2] = i2;
|
||||
iout[2] = i2;
|
||||
for (i3 = 0; i3 < isz3; i3++) {
|
||||
iin[3] = i3;
|
||||
iout[3] = i3;
|
||||
|
||||
idst = i0*is[0] + i1*is[1] + i2*is[2];
|
||||
if (idim > 3) {
|
||||
idst += i3*is[3];
|
||||
}
|
||||
|
||||
// Now accumulate the gradients from gradOutput
|
||||
for (y = 0; y < dH; y++) {
|
||||
for (x = 0; x < dW; x++) {
|
||||
iout[xDim] = dW * iin[xDim] + x;
|
||||
iout[yDim] = dH * iin[yDim] + y;
|
||||
isrc = iout[0]*os[0] + iout[1]*os[1] + iout[2]*os[2];
|
||||
if (idim > 3) {
|
||||
isrc += iout[3]*os[3];
|
||||
}
|
||||
pin[idst] += pout[isrc];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
50
torch/lib/THNN/generic/Sqrt.c
Normal file
50
torch/lib/THNN/generic/Sqrt.c
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/Sqrt.c"
|
||||
#else
|
||||
|
||||
void THNN_(Sqrt_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
real eps)
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
THTensor_(sqrt)(output, input);
|
||||
}
|
||||
|
||||
void THNN_(Sqrt_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *output)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
|
||||
if (output->nDimension == 1 ||
|
||||
!THTensor_(isContiguous)(output) ||
|
||||
!THTensor_(isContiguous)(gradOutput) ||
|
||||
!THTensor_(isContiguous)(gradInput))
|
||||
{
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
|
||||
*gradInput_data = (*output_data == 0.0) ? 0.0 : (0.5 * (*gradOutput_data / *output_data));
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
real *gradOutput_data = THTensor_(data)(gradOutput);
|
||||
real *gradInput_data = THTensor_(data)(gradInput);
|
||||
real *output_data = THTensor_(data)(output);
|
||||
long i;
|
||||
#pragma omp parallel for private(i)
|
||||
for(i = 0; i < THTensor_(nElement)(output); i++)
|
||||
{
|
||||
if (output_data[i] == 0.0)
|
||||
gradInput_data[i] = 0.0;
|
||||
else
|
||||
gradInput_data[i] = 0.5 * (gradOutput_data[i] / output_data[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
58
torch/lib/THNN/generic/Square.c
Normal file
58
torch/lib/THNN/generic/Square.c
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/Square.c"
|
||||
#else
|
||||
|
||||
void THNN_(Square_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output)
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
|
||||
if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output))
|
||||
{
|
||||
TH_TENSOR_APPLY2(real, output, real, input,
|
||||
*output_data = (*input_data) * (*input_data);
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
real *output_data = THTensor_(data)(output);
|
||||
real *input_data = THTensor_(data)(input);
|
||||
long i;
|
||||
#pragma omp parallel for private(i)
|
||||
for (i = 0; i < THTensor_(nElement)(input); i++)
|
||||
output_data[i] = input_data[i]*input_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(Square_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
|
||||
if (input->nDimension == 1 ||
|
||||
!THTensor_(isContiguous)(input) ||
|
||||
!THTensor_(isContiguous)(gradOutput) ||
|
||||
!THTensor_(isContiguous)(gradInput))
|
||||
{
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
|
||||
*gradInput_data = 2.0 * (*gradOutput_data) * (*input_data);
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
real *gradOutput_data = THTensor_(data)(gradOutput);
|
||||
real *gradInput_data = THTensor_(data)(gradInput);
|
||||
real *input_data = THTensor_(data)(input);
|
||||
long i;
|
||||
#pragma omp parallel for private(i)
|
||||
for (i = 0; i < THTensor_(nElement)(gradInput); i++)
|
||||
gradInput_data[i] = 2.0 * gradOutput_data[i] * input_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
1167
torch/lib/THNN/generic/THNN.h
Normal file
1167
torch/lib/THNN/generic/THNN.h
Normal file
File diff suppressed because it is too large
Load Diff
49
torch/lib/THNN/generic/Tanh.c
Normal file
49
torch/lib/THNN/generic/Tanh.c
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/Tanh.c"
|
||||
#else
|
||||
|
||||
void THNN_(Tanh_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output)
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
THTensor_(tanh)(output, input);
|
||||
}
|
||||
|
||||
void THNN_(Tanh_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *output)
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, output);
|
||||
|
||||
if (output->nDimension == 1 ||
|
||||
!THTensor_(isContiguous)(output) ||
|
||||
!THTensor_(isContiguous)(gradOutput) ||
|
||||
!THTensor_(isContiguous)(gradInput))
|
||||
{
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
|
||||
real z = *output_data; \
|
||||
*gradInput_data = *gradOutput_data * (1. - z*z);
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
real* ptr_gradOutput = THTensor_(data)(gradOutput);
|
||||
real* ptr_gradInput = THTensor_(data)(gradInput);
|
||||
real* ptr_output = THTensor_(data)(output);
|
||||
long i;
|
||||
|
||||
#pragma omp parallel for private(i)
|
||||
for (i = 0; i < THTensor_(nElement)(gradInput); i++)
|
||||
{
|
||||
real z = ptr_output[i];
|
||||
ptr_gradInput[i] = ptr_gradOutput[i] * (1. - z*z);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
349
torch/lib/THNN/generic/TemporalConvolution.c
Normal file
349
torch/lib/THNN/generic/TemporalConvolution.c
Normal file
|
|
@ -0,0 +1,349 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/TemporalConvolution.c"
|
||||
#else
|
||||
|
||||
void THNN_(TemporalConvolution_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
int kW,
|
||||
int dW,
|
||||
int inputFrameSize,
|
||||
int outputFrameSize)
|
||||
{
|
||||
THTensor *outputWindow, *inputWindow;
|
||||
int nInputFrame, nOutputFrame;
|
||||
long k, i;
|
||||
|
||||
int dimS = 0; // sequence dimension
|
||||
int dimF = 1; // feature dimension
|
||||
|
||||
THArgCheck(input->nDimension == 2 || input->nDimension == 3, 2, "2D or 3D(batch mode) tensor expected");
|
||||
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
dimS = 1;
|
||||
dimF = 2;
|
||||
}
|
||||
THArgCheck(input->size[dimF] == inputFrameSize, 2, "invalid input frame size");
|
||||
THArgCheck(input->size[dimS] >= kW, 2, "input sequence smaller than kernel size");
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
outputWindow = THTensor_(new)();
|
||||
inputWindow = THTensor_(new)();
|
||||
|
||||
nInputFrame = input->size[dimS];
|
||||
nOutputFrame = (nInputFrame - kW) / dW + 1;
|
||||
|
||||
if (input->nDimension == 2)
|
||||
{
|
||||
THTensor_(resize2d)(output,
|
||||
nOutputFrame,
|
||||
outputFrameSize);
|
||||
|
||||
/* bias first */
|
||||
for(k = 0; k < nOutputFrame; k++)
|
||||
{
|
||||
THTensor_(select)(outputWindow, output, 0, k);
|
||||
THTensor_(copy)(outputWindow, bias);
|
||||
}
|
||||
|
||||
/* ouch */
|
||||
for(k = 0; nOutputFrame > 0; k++)
|
||||
{
|
||||
long outputFrameStride = (kW-1)/dW+1;
|
||||
long inputFrameStride = outputFrameStride*dW;
|
||||
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
|
||||
nOutputFrame -= nFrame;
|
||||
|
||||
THTensor_(setStorage2d)(inputWindow, input->storage,
|
||||
input->storageOffset+k*dW*input->size[1],
|
||||
nFrame, inputFrameStride*input->size[1],
|
||||
kW*input->size[1], 1);
|
||||
|
||||
THTensor_(setStorage2d)(outputWindow, output->storage,
|
||||
output->storageOffset + k*output->size[1],
|
||||
nFrame, outputFrameStride*output->size[1],
|
||||
output->size[1], 1);
|
||||
|
||||
THTensor_(transpose)(weight, NULL, 0, 1);
|
||||
THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, weight);
|
||||
THTensor_(transpose)(weight, NULL, 0, 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor *outputSample = THTensor_(new)();
|
||||
THTensor *inputSample = THTensor_(new)();
|
||||
int nBatchFrame = input->size[0];
|
||||
|
||||
THTensor_(resize3d)(output,
|
||||
nBatchFrame,
|
||||
nOutputFrame,
|
||||
outputFrameSize);
|
||||
|
||||
for(i = 0; i < nBatchFrame; i++)
|
||||
{
|
||||
THTensor_(select)(outputSample, output, 0, i);
|
||||
THTensor_(select)(inputSample, input, 0, i);
|
||||
long nOutputSampleFrame = nOutputFrame;
|
||||
|
||||
/* bias first */
|
||||
for(k = 0; k < nOutputFrame; k++)
|
||||
{
|
||||
THTensor_(select)(outputWindow, outputSample, 0, k);
|
||||
THTensor_(copy)(outputWindow, bias);
|
||||
}
|
||||
|
||||
/* ouch */
|
||||
for(k = 0; nOutputSampleFrame > 0; k++)
|
||||
{
|
||||
long outputFrameStride = (kW-1)/dW+1;
|
||||
long inputFrameStride = outputFrameStride*dW;
|
||||
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
|
||||
nOutputSampleFrame -= nFrame;
|
||||
|
||||
THTensor_(setStorage2d)(inputWindow, inputSample->storage,
|
||||
inputSample->storageOffset+k*dW*inputSample->size[1],
|
||||
nFrame, inputFrameStride*inputSample->size[1],
|
||||
kW*inputSample->size[1], 1);
|
||||
|
||||
THTensor_(setStorage2d)(outputWindow, outputSample->storage,
|
||||
outputSample->storageOffset + k*outputSample->size[1],
|
||||
nFrame, outputFrameStride*outputSample->size[1],
|
||||
outputSample->size[1], 1);
|
||||
|
||||
THTensor_(transpose)(weight, NULL, 0, 1);
|
||||
THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, weight);
|
||||
THTensor_(transpose)(weight, NULL, 0, 1);
|
||||
}
|
||||
}
|
||||
THTensor_(free)(outputSample);
|
||||
THTensor_(free)(inputSample);
|
||||
}
|
||||
|
||||
THTensor_(free)(outputWindow);
|
||||
THTensor_(free)(inputWindow);
|
||||
THTensor_(free)(input);
|
||||
|
||||
}
|
||||
|
||||
void THNN_(TemporalConvolution_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
int kW,
|
||||
int dW)
|
||||
{
|
||||
long nInputFrame;
|
||||
long nOutputFrame;
|
||||
|
||||
THTensor *gradOutputWindow;
|
||||
THTensor *gradInputWindow;
|
||||
long k, i;
|
||||
|
||||
int dimS = 0; // sequence dimension
|
||||
int dimF = 1; // feature dimension
|
||||
|
||||
if (gradOutput->nDimension == 3)
|
||||
{
|
||||
dimS = 1;
|
||||
dimF = 2;
|
||||
}
|
||||
|
||||
nInputFrame = input->size[dimS];
|
||||
nOutputFrame = gradOutput->size[dimS];
|
||||
|
||||
gradOutputWindow = THTensor_(new)();
|
||||
gradInputWindow = THTensor_(new)();
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
if (gradOutput->nDimension == 2)
|
||||
{
|
||||
/* ouch */
|
||||
for(k = 0; nOutputFrame > 0; k++)
|
||||
{
|
||||
long outputFrameStride = (kW-1)/dW+1;
|
||||
long inputFrameStride = outputFrameStride*dW;
|
||||
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
|
||||
nOutputFrame -= nFrame;
|
||||
|
||||
THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage,
|
||||
gradOutput->storageOffset + k*gradOutput->size[1],
|
||||
nFrame, outputFrameStride*gradOutput->size[1],
|
||||
gradOutput->size[1], 1);
|
||||
|
||||
THTensor_(setStorage2d)(gradInputWindow, gradInput->storage,
|
||||
gradInput->storageOffset+k*dW*gradInput->size[1],
|
||||
nFrame, inputFrameStride*gradInput->size[1],
|
||||
kW*gradInput->size[1], 1);
|
||||
|
||||
THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor *gradOutputSample = THTensor_(new)();
|
||||
THTensor *gradInputSample = THTensor_(new)();
|
||||
int nBatchFrame = input->size[0];
|
||||
|
||||
for(i = 0; i < nBatchFrame; i++)
|
||||
{
|
||||
THTensor_(select)(gradOutputSample, gradOutput, 0, i);
|
||||
THTensor_(select)(gradInputSample, gradInput, 0, i);
|
||||
int nOutputSampleFrame = nOutputFrame;
|
||||
|
||||
/* ouch */
|
||||
for(k = 0; nOutputSampleFrame > 0; k++)
|
||||
{
|
||||
long outputFrameStride = (kW-1)/dW+1;
|
||||
long inputFrameStride = outputFrameStride*dW;
|
||||
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
|
||||
nOutputSampleFrame -= nFrame;
|
||||
|
||||
THTensor_(setStorage2d)(gradOutputWindow, gradOutputSample->storage,
|
||||
gradOutputSample->storageOffset + k*gradOutputSample->size[1],
|
||||
nFrame, outputFrameStride*gradOutputSample->size[1],
|
||||
gradOutputSample->size[1], 1);
|
||||
|
||||
THTensor_(setStorage2d)(gradInputWindow, gradInputSample->storage,
|
||||
gradInputSample->storageOffset+k*dW*gradInputSample->size[1],
|
||||
nFrame, inputFrameStride*gradInputSample->size[1],
|
||||
kW*gradInputSample->size[1], 1);
|
||||
|
||||
THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight);
|
||||
}
|
||||
}
|
||||
THTensor_(free)(gradOutputSample);
|
||||
THTensor_(free)(gradInputSample);
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOutputWindow);
|
||||
THTensor_(free)(gradInputWindow);
|
||||
|
||||
}
|
||||
|
||||
void THNN_(TemporalConvolution_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
int kW,
|
||||
int dW,
|
||||
real scale)
|
||||
{
|
||||
long nInputFrame;
|
||||
long nOutputFrame;
|
||||
|
||||
THTensor *gradOutputWindow;
|
||||
THTensor *inputWindow;
|
||||
long k, i;
|
||||
|
||||
int dimS = 0; // sequence dimension
|
||||
int dimF = 1; // feature dimension
|
||||
|
||||
if (gradOutput->nDimension == 3)
|
||||
{
|
||||
dimS = 1;
|
||||
dimF = 2;
|
||||
}
|
||||
|
||||
nInputFrame = input->size[dimS];
|
||||
nOutputFrame = gradOutput->size[dimS];
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
gradOutputWindow = THTensor_(new)();
|
||||
inputWindow = THTensor_(new)();
|
||||
|
||||
if (input->nDimension == 2)
|
||||
{
|
||||
/* bias first */
|
||||
for(k = 0; k < nOutputFrame; k++)
|
||||
{
|
||||
THTensor_(select)(gradOutputWindow, gradOutput, 0, k);
|
||||
THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow);
|
||||
}
|
||||
|
||||
/* ouch */
|
||||
for(k = 0; nOutputFrame > 0; k++)
|
||||
{
|
||||
long outputFrameStride = (kW-1)/dW+1;
|
||||
long inputFrameStride = outputFrameStride*dW;
|
||||
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
|
||||
nOutputFrame -= nFrame;
|
||||
|
||||
THTensor_(setStorage2d)(inputWindow, input->storage,
|
||||
input->storageOffset+k*dW*input->size[1],
|
||||
nFrame, inputFrameStride*input->size[1],
|
||||
kW*input->size[1], 1);
|
||||
|
||||
THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage,
|
||||
gradOutput->storageOffset + k*gradOutput->size[1],
|
||||
nFrame, outputFrameStride*gradOutput->size[1],
|
||||
gradOutput->size[1], 1);
|
||||
|
||||
THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
|
||||
THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutputWindow, inputWindow);
|
||||
THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor *gradOutputSample = THTensor_(new)();
|
||||
THTensor *inputSample = THTensor_(new)();
|
||||
int nBatchFrame = input->size[0];
|
||||
|
||||
for(i = 0; i < nBatchFrame; i++)
|
||||
{
|
||||
THTensor_(select)(gradOutputSample, gradOutput, 0, i);
|
||||
THTensor_(select)(inputSample, input, 0, i);
|
||||
int nOutputSampleFrame = nOutputFrame;
|
||||
|
||||
/* bias first */
|
||||
for(k = 0; k < nOutputFrame; k++)
|
||||
{
|
||||
THTensor_(select)(gradOutputWindow, gradOutputSample, 0, k);
|
||||
THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow);
|
||||
}
|
||||
|
||||
/* ouch */
|
||||
for(k = 0; nOutputSampleFrame > 0; k++)
|
||||
{
|
||||
long outputFrameStride = (kW-1)/dW+1;
|
||||
long inputFrameStride = outputFrameStride*dW;
|
||||
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
|
||||
nOutputSampleFrame -= nFrame;
|
||||
|
||||
THTensor_(setStorage2d)(inputWindow, inputSample->storage,
|
||||
inputSample->storageOffset+k*dW*inputSample->size[1],
|
||||
nFrame, inputFrameStride*inputSample->size[1],
|
||||
kW*inputSample->size[1], 1);
|
||||
|
||||
THTensor_(setStorage2d)(gradOutputWindow, gradOutputSample->storage,
|
||||
gradOutputSample->storageOffset + k*gradOutputSample->size[1],
|
||||
nFrame, outputFrameStride*gradOutputSample->size[1],
|
||||
gradOutputSample->size[1], 1);
|
||||
|
||||
THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
|
||||
THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutputWindow, inputWindow);
|
||||
THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
|
||||
}
|
||||
}
|
||||
THTensor_(free)(gradOutputSample);
|
||||
THTensor_(free)(inputSample);
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOutputWindow);
|
||||
THTensor_(free)(inputWindow);
|
||||
THTensor_(free)(input);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
235
torch/lib/THNN/generic/TemporalMaxPooling.c
Normal file
235
torch/lib/THNN/generic/TemporalMaxPooling.c
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/TemporalMaxPooling.c"
|
||||
#else
|
||||
|
||||
void THNN_(TemporalMaxPooling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *indices,
|
||||
int kW,
|
||||
int dW)
|
||||
{
|
||||
long niframe;
|
||||
long framesize;
|
||||
long noframe;
|
||||
|
||||
real *input_data;
|
||||
real *output_data;
|
||||
real *indices_data;
|
||||
|
||||
long t, y;
|
||||
|
||||
int dimS = 0; // sequence dimension
|
||||
int dimF = 1; // feature dimension
|
||||
|
||||
THArgCheck(input->nDimension == 2 || input->nDimension == 3, 2, "2D or 3D(batch mode) tensor expected");
|
||||
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
dimS = 1;
|
||||
dimF = 2;
|
||||
}
|
||||
THArgCheck(input->size[dimS] >= kW, 2, "input sequence smaller than kernel size");
|
||||
|
||||
/* sizes */
|
||||
niframe = input->size[dimS];
|
||||
framesize = input->size[dimF];
|
||||
noframe = (niframe - kW) / dW + 1;
|
||||
|
||||
/* get contiguous input */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
|
||||
if (input->nDimension == 2)
|
||||
{
|
||||
/* resize output */
|
||||
THTensor_(resize2d)(output, noframe, framesize);
|
||||
|
||||
/* indices will contain index locations for each output point */
|
||||
THTensor_(resize2d)(indices, noframe, framesize);
|
||||
|
||||
/* get raw pointers */
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
for(t = 0; t < noframe; t++)
|
||||
{
|
||||
real *ip = input_data + t*framesize*dW;
|
||||
real *op = output_data + t*framesize;
|
||||
real *xp = indices_data + t*framesize;
|
||||
#pragma omp parallel for private(y)
|
||||
for(y = 0; y < framesize; y++)
|
||||
{
|
||||
/* compute local max: */
|
||||
long maxindex = -1;
|
||||
real maxval = -THInf;
|
||||
long x;
|
||||
for(x = 0; x < kW; x++)
|
||||
{
|
||||
real val = ip[x*framesize+y];
|
||||
if (val > maxval)
|
||||
{
|
||||
maxval = val;
|
||||
maxindex = x;
|
||||
}
|
||||
}
|
||||
|
||||
/* set output to local max */
|
||||
op[y] = maxval;
|
||||
xp[y] = (real)maxindex;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* number of batch frames */
|
||||
long nbframe = input->size[0];
|
||||
long i;
|
||||
|
||||
/* resize output */
|
||||
THTensor_(resize3d)(output, nbframe, noframe, framesize);
|
||||
|
||||
/* indices will contain index locations for each output point */
|
||||
THTensor_(resize3d)(indices, nbframe, noframe, framesize);
|
||||
|
||||
/* get raw pointers */
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
for(i = 0; i < nbframe; i++)
|
||||
{
|
||||
real *inputSample_data = input_data + i*niframe*framesize;
|
||||
real *outputSample_data = output_data + i*noframe*framesize;
|
||||
real *indicesSample_data = indices_data + i*noframe*framesize;
|
||||
|
||||
for(t = 0; t < noframe; t++)
|
||||
{
|
||||
real *ip = inputSample_data + t*framesize*dW;
|
||||
real *op = outputSample_data + t*framesize;
|
||||
real *xp = indicesSample_data + t*framesize;
|
||||
|
||||
#pragma omp parallel for private(y)
|
||||
for(y = 0; y < framesize; y++)
|
||||
{
|
||||
/* compute local max: */
|
||||
long maxindex = -1;
|
||||
real maxval = -THInf;
|
||||
long x;
|
||||
for(x = 0; x < kW; x++)
|
||||
{
|
||||
real val = ip[x*framesize+y];
|
||||
if (val > maxval)
|
||||
{
|
||||
maxval = val;
|
||||
maxindex = x;
|
||||
}
|
||||
}
|
||||
|
||||
/* set output to local max */
|
||||
op[y] = maxval;
|
||||
xp[y] = (real)maxindex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(input);
|
||||
|
||||
}
|
||||
|
||||
void THNN_(TemporalMaxPooling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *indices,
|
||||
int kW,
|
||||
int dW)
|
||||
{
|
||||
long niframe;
|
||||
int noframe;
|
||||
long framesize;
|
||||
|
||||
real *gradInput_data;
|
||||
real *gradOutput_data;
|
||||
real *indices_data;
|
||||
|
||||
long t, y;
|
||||
|
||||
/* get contiguous gradOutput */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* resize and zero */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
int dimS = 0; // sequence dimension
|
||||
int dimF = 1; // feature dimension
|
||||
|
||||
if (input->nDimension == 3)
|
||||
{
|
||||
dimS = 1;
|
||||
dimF = 2;
|
||||
}
|
||||
/* sizes */
|
||||
niframe = input->size[dimS];
|
||||
noframe = gradOutput->size[dimS];
|
||||
framesize = gradOutput->size[dimF];
|
||||
|
||||
/* get raw pointers */
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
if (input->nDimension == 2)
|
||||
{
|
||||
for(t = 0; t < noframe; t++)
|
||||
{
|
||||
real *gip = gradInput_data + t*framesize*dW;
|
||||
real *gop = gradOutput_data + t*framesize;
|
||||
real *xp = indices_data + t*framesize;
|
||||
#pragma omp parallel for private(y)
|
||||
for(y = 0; y < framesize; y++)
|
||||
{
|
||||
/* compute local max: */
|
||||
long maxindex = (long)xp[y];
|
||||
gip[maxindex*framesize+y] += gop[y];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* number of batch frames */
|
||||
long nbframe = input->size[0];
|
||||
long i;
|
||||
|
||||
for(i = 0; i < nbframe; i++)
|
||||
{
|
||||
real *gradInputSample_data = gradInput_data + i*niframe*framesize;
|
||||
real *gradOutputSample_data = gradOutput_data + i*noframe*framesize;
|
||||
real *indicesSample_data = indices_data + i*noframe*framesize;
|
||||
|
||||
for(t = 0; t < noframe; t++)
|
||||
{
|
||||
real *gip = gradInputSample_data + t*framesize*dW;
|
||||
real *gop = gradOutputSample_data + t*framesize;
|
||||
real *xp = indicesSample_data + t*framesize;
|
||||
#pragma omp parallel for private(y)
|
||||
for(y = 0; y < framesize; y++)
|
||||
{
|
||||
/* compute local max: */
|
||||
long maxindex = (long)xp[y];
|
||||
gip[maxindex*framesize+y] += gop[y];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
116
torch/lib/THNN/generic/TemporalSubSampling.c
Normal file
116
torch/lib/THNN/generic/TemporalSubSampling.c
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/TemporalSubSampling.c"
|
||||
#else
|
||||
|
||||
void THNN_(TemporalSubSampling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
int kW,
|
||||
int dW,
|
||||
int inputFrameSize)
|
||||
{
|
||||
THTensor *outputFrame, *inputWindow;
|
||||
int nInputFrame, nOutputFrame;
|
||||
long k;
|
||||
|
||||
THArgCheck( input->nDimension == 2, 2, "2D tensor expected");
|
||||
THArgCheck( input->size[1] == inputFrameSize, 2, "invalid input frame size");
|
||||
THArgCheck( input->size[0] >= kW, 2, "input sequence smaller than kernel size");
|
||||
|
||||
outputFrame = THTensor_(new)();
|
||||
inputWindow = THTensor_(new)();
|
||||
|
||||
nInputFrame = input->size[0];
|
||||
nOutputFrame = (nInputFrame - kW) / dW + 1;
|
||||
|
||||
THTensor_(resize2d)(output,
|
||||
nOutputFrame,
|
||||
inputFrameSize);
|
||||
|
||||
for(k = 0; k < nOutputFrame; k++)
|
||||
{
|
||||
THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
|
||||
THTensor_(select)(outputFrame, output, 0, k);
|
||||
THTensor_(sum)(outputFrame, inputWindow, 0);
|
||||
THTensor_(cmul)(outputFrame, outputFrame, weight);
|
||||
THTensor_(cadd)(outputFrame, outputFrame, 1, bias);
|
||||
}
|
||||
|
||||
THTensor_(free)(outputFrame);
|
||||
THTensor_(free)(inputWindow);
|
||||
}
|
||||
|
||||
void THNN_(TemporalSubSampling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
int kW,
|
||||
int dW)
|
||||
{
|
||||
|
||||
THTensor *gradOutputFrame;
|
||||
THTensor *gradInputWindow, *buffer, *kwunit;
|
||||
long k;
|
||||
|
||||
gradOutputFrame = THTensor_(new)();
|
||||
gradInputWindow = THTensor_(new)();
|
||||
buffer = THTensor_(new)();
|
||||
kwunit = THTensor_(newWithSize1d)(kW);
|
||||
|
||||
THTensor_(fill)(kwunit, 1);
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
for(k = 0; k < gradOutput->size[0]; k++)
|
||||
{
|
||||
THTensor_(narrow)(gradInputWindow, gradInput, 0, k*dW, kW);
|
||||
THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
|
||||
THTensor_(cmul)(buffer, weight, gradOutputFrame);
|
||||
THTensor_(addr)(gradInputWindow, 1, gradInputWindow, 1, kwunit, buffer);
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOutputFrame);
|
||||
THTensor_(free)(gradInputWindow);
|
||||
THTensor_(free)(buffer);
|
||||
THTensor_(free)(kwunit);
|
||||
}
|
||||
|
||||
void THNN_(TemporalSubSampling_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
int kW,
|
||||
int dW,
|
||||
real scale)
|
||||
{
|
||||
THTensor *gradOutputFrame;
|
||||
THTensor *inputWindow, *buffer;
|
||||
long k;
|
||||
|
||||
|
||||
gradOutputFrame = THTensor_(new)();
|
||||
inputWindow = THTensor_(new)();
|
||||
buffer = THTensor_(new)();
|
||||
|
||||
for(k = 0; k < gradOutput->size[0]; k++)
|
||||
{
|
||||
THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
|
||||
THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
|
||||
THTensor_(sum)(buffer, inputWindow, 0);
|
||||
THTensor_(addcmul)(gradWeight, gradWeight, scale, buffer, gradOutputFrame);
|
||||
THTensor_(cadd)(gradBias, gradBias, scale, gradOutputFrame);
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOutputFrame);
|
||||
THTensor_(free)(inputWindow);
|
||||
THTensor_(free)(buffer);
|
||||
}
|
||||
|
||||
#endif
|
||||
58
torch/lib/THNN/generic/Threshold.c
Normal file
58
torch/lib/THNN/generic/Threshold.c
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/Threshold.c"
|
||||
#else
|
||||
|
||||
void THNN_(Threshold_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
real threshold,
|
||||
real val,
|
||||
bool inplace)
|
||||
{
|
||||
if (inplace)
|
||||
{
|
||||
TH_TENSOR_APPLY(real, input,
|
||||
if (*input_data <= threshold)
|
||||
*input_data = val;
|
||||
);
|
||||
THTensor_(set)(output, input);
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor_(resizeAs)(output, input);
|
||||
TH_TENSOR_APPLY2(real, output, real, input,
|
||||
*output_data = (*input_data > threshold) ? *input_data : val;
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(Threshold_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
real threshold,
|
||||
bool inplace)
|
||||
{
|
||||
if (inplace)
|
||||
{
|
||||
TH_TENSOR_APPLY2(real, gradOutput, real, input,
|
||||
if ((*input_data) <= threshold)
|
||||
*gradOutput_data = 0;
|
||||
);
|
||||
THTensor_(set)(gradInput, gradOutput);
|
||||
}
|
||||
else
|
||||
{
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
|
||||
if ((*input_data) > threshold)
|
||||
*gradInput_data = *gradOutput_data;
|
||||
else
|
||||
*gradInput_data = 0;
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
309
torch/lib/THNN/generic/VolumetricAveragePooling.c
Normal file
309
torch/lib/THNN/generic/VolumetricAveragePooling.c
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/VolumetricAveragePooling.c"
|
||||
#else
|
||||
|
||||
static void THNN_(VolumetricAveragePooling_updateOutput_frame)(
|
||||
real *input_p,
|
||||
real *output_p,
|
||||
long nslices,
|
||||
long itime,
|
||||
long iwidth,
|
||||
long iheight,
|
||||
long otime,
|
||||
long owidth,
|
||||
long oheight,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
/* loop over output */
|
||||
long i, j, ti;
|
||||
for (ti = 0; ti < otime; ti++)
|
||||
{
|
||||
for (i = 0; i < oheight; i++)
|
||||
{
|
||||
for (j = 0; j < owidth; j++)
|
||||
{
|
||||
/* local pointers */
|
||||
real *ip = input_p + k * itime * iwidth * iheight
|
||||
+ ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
|
||||
real *op = output_p + k * otime * owidth * oheight
|
||||
+ ti * owidth * oheight + i * owidth + j;
|
||||
|
||||
/* compute local sum: */
|
||||
real sum = 0.0;
|
||||
int x, y, z;
|
||||
|
||||
for (z=0; z < kT; z++)
|
||||
{
|
||||
for (y = 0; y < kH; y++)
|
||||
{
|
||||
for (x = 0; x < kW; x++)
|
||||
{
|
||||
sum += *(ip + z * iwidth * iheight + y * iwidth + x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* set output to local max */
|
||||
*op = sum / (kT * kW * kH);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricAveragePooling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH)
|
||||
{
|
||||
long nslices;
|
||||
long itime;
|
||||
long iheight;
|
||||
long iwidth;
|
||||
long otime;
|
||||
long oheight;
|
||||
long owidth;
|
||||
real *input_data;
|
||||
real *output_data;
|
||||
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
|
||||
"4D or 5D (batch-mode) tensor expected"
|
||||
);
|
||||
|
||||
int dimN = 0;
|
||||
int dimt = 1;
|
||||
int dimh = 2;
|
||||
int dimw = 3;
|
||||
|
||||
if (input->nDimension == 5)
|
||||
{
|
||||
dimN++;
|
||||
dimt++;
|
||||
dimh++;
|
||||
dimw++;
|
||||
}
|
||||
|
||||
THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2,
|
||||
"input image smaller than kernel size"
|
||||
);
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimN];
|
||||
itime = input->size[dimt];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
otime = (itime - kT) / dT + 1;
|
||||
oheight = (iheight - kH) / dH + 1;
|
||||
owidth = (iwidth - kW) / dW + 1;
|
||||
|
||||
/* get contiguous input */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
|
||||
if (input->nDimension == 4) /* non-batch mode */
|
||||
{
|
||||
/* resize output */
|
||||
THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
THNN_(VolumetricAveragePooling_updateOutput_frame)(
|
||||
input_data, output_data, nslices,
|
||||
itime, iwidth, iheight,
|
||||
otime, owidth, oheight,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH
|
||||
);
|
||||
}
|
||||
else /* batch mode */
|
||||
{
|
||||
long p;
|
||||
long nBatch = input->size[0];
|
||||
|
||||
long istride = nslices * itime * iwidth * iheight;
|
||||
long ostride = nslices * otime * owidth * oheight;
|
||||
|
||||
/* resize output */
|
||||
THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p=0; p < nBatch; p++)
|
||||
{
|
||||
THNN_(VolumetricAveragePooling_updateOutput_frame)(
|
||||
input_data + p * istride, output_data + p * ostride, nslices,
|
||||
itime, iwidth, iheight,
|
||||
otime, owidth, oheight,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
static void THNN_(VolumetricAveragePooling_updateGradInput_frame)(
|
||||
real *gradInput_p,
|
||||
real *gradOutput_p,
|
||||
long nslices,
|
||||
long itime,
|
||||
long iwidth,
|
||||
long iheight,
|
||||
long otime,
|
||||
long owidth,
|
||||
long oheight,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
/* loop over output */
|
||||
long i, j, ti;
|
||||
for (ti = 0; ti < otime; ti++)
|
||||
{
|
||||
for (i = 0; i < oheight; i++)
|
||||
{
|
||||
for (j = 0; j < owidth; j++)
|
||||
{
|
||||
/* local pointers */
|
||||
real *ip = gradInput_p + k * itime * iwidth * iheight
|
||||
+ ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
|
||||
real *op = gradOutput_p + k * otime * owidth * oheight
|
||||
+ ti * owidth * oheight + i * owidth + j;
|
||||
|
||||
/* scatter gradients out to footprint: */
|
||||
real val = *op / (kT * kW * kH);
|
||||
int x,y,z;
|
||||
for (z=0; z < kT; z++)
|
||||
{
|
||||
for (y = 0; y < kH; y++)
|
||||
{
|
||||
for (x = 0; x < kW; x++)
|
||||
{
|
||||
*(ip + z * iwidth * iheight + y * iwidth + x) += val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricAveragePooling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH)
|
||||
{
|
||||
int nslices;
|
||||
int itime;
|
||||
int iheight;
|
||||
int iwidth;
|
||||
int otime;
|
||||
int oheight;
|
||||
int owidth;
|
||||
real *gradInput_data;
|
||||
real *gradOutput_data;
|
||||
|
||||
int dimN = 0;
|
||||
int dimt = 1;
|
||||
int dimh = 2;
|
||||
int dimw = 3;
|
||||
|
||||
/* get contiguous gradOutput */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* resize */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
if (input->nDimension == 5)
|
||||
{
|
||||
dimN++;
|
||||
dimt++;
|
||||
dimh++;
|
||||
dimw++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimN];
|
||||
itime = input->size[dimt];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
otime = gradOutput->size[dimt];
|
||||
oheight = gradOutput->size[dimh];
|
||||
owidth = gradOutput->size[dimw];
|
||||
|
||||
/* get raw pointers */
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
|
||||
/* backprop */
|
||||
if (input->nDimension == 4) /* non-batch mode*/
|
||||
{
|
||||
THNN_(VolumetricAveragePooling_updateGradInput_frame)(
|
||||
gradInput_data, gradOutput_data, nslices,
|
||||
itime, iwidth, iheight,
|
||||
otime, owidth, oheight,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH
|
||||
);
|
||||
}
|
||||
else /* batch mode */
|
||||
{
|
||||
long p;
|
||||
long nBatch = input->size[0];
|
||||
|
||||
long istride = nslices * itime * iwidth * iheight;
|
||||
long ostride = nslices * otime * owidth * oheight;
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nBatch; p++)
|
||||
{
|
||||
THNN_(VolumetricAveragePooling_updateGradInput_frame)(
|
||||
gradInput_data + p * istride, gradOutput_data + p * ostride, nslices,
|
||||
itime, iwidth, iheight,
|
||||
otime, owidth, oheight,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
247
torch/lib/THNN/generic/VolumetricConvolution.c
Normal file
247
torch/lib/THNN/generic/VolumetricConvolution.c
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/VolumetricConvolution.c"
|
||||
#else
|
||||
|
||||
void THNN_(VolumetricConvolution_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *finput, // only used by cuda impl
|
||||
THTensor *fgradInput, // only used by cuda impl
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version
|
||||
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
|
||||
"4D or 5D (batch-mode) tensor expected"
|
||||
);
|
||||
|
||||
int dimt = 1;
|
||||
int dimh = 2;
|
||||
int dimw = 3;
|
||||
|
||||
if (input->nDimension == 5)
|
||||
{
|
||||
dimt++;
|
||||
dimh++;
|
||||
dimw++;
|
||||
}
|
||||
|
||||
long nOutputPlane = weight->size[0];
|
||||
long kT = weight->size[2];
|
||||
long kH = weight->size[3];
|
||||
long kW = weight->size[4];
|
||||
long inputDepth = input->size[dimt];
|
||||
long inputHeight = input->size[dimh];
|
||||
long inputWidth = input->size[dimw];
|
||||
long outputDepth = (inputDepth - kT) / dT + 1;
|
||||
long outputWidth = (inputWidth - kW) / dW + 1;
|
||||
long outputHeight = (inputHeight - kH) / dH + 1;
|
||||
THTensor *outn = THTensor_(new)();
|
||||
long i, j;
|
||||
if (input->nDimension == 4) /* non-batch mode */
|
||||
{
|
||||
THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
|
||||
/* add bias */
|
||||
for (i = 0; i < bias->size[0]; i++)
|
||||
{
|
||||
THTensor_(select)(outn, output, 0, i);
|
||||
THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
|
||||
}
|
||||
|
||||
/* do convolutions */
|
||||
THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X");
|
||||
}
|
||||
else /* batch mode */
|
||||
{
|
||||
long nBatch = input->size[0];
|
||||
THTensor_(resize5d)(output, nBatch, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
THTensor *inb = THTensor_(new)();
|
||||
THTensor *outb = THTensor_(new)();
|
||||
|
||||
/* loop over batches */
|
||||
for (j = 0; j < nBatch; j++)
|
||||
{
|
||||
THTensor_(select)(inb, input, 0, j);
|
||||
THTensor_(select)(outb, output, 0, j);
|
||||
|
||||
/* add bias */
|
||||
for (i = 0; i < bias->size[0]; i++)
|
||||
{
|
||||
THTensor_(select)(outn, outb, 0, i);
|
||||
THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
|
||||
}
|
||||
|
||||
/* do convolutions */
|
||||
THTensor_(conv3Dmv)(outb, 1.0, 1.0, inb, weight, dT, dH, dW, "V", "X");
|
||||
}
|
||||
|
||||
THTensor_(free)(inb);
|
||||
THTensor_(free)(outb);
|
||||
}
|
||||
THTensor_(free)(outn);
|
||||
}
|
||||
|
||||
void THNN_(VolumetricConvolution_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
THTensor *finput, // only used by cuda impl
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version
|
||||
|
||||
THArgCheck(weight->nDimension == 5, 4,
|
||||
"5D weight tensor is expected (nOutputPlane x nInputPlane x kT x kH x kW)"
|
||||
);
|
||||
|
||||
int nOutputPlane = (int)weight->size[0];
|
||||
|
||||
THArgCheck(gradOutput->nDimension == 4 || gradOutput->nDimension == 5, 3,
|
||||
"4D or 5D (batch-mode) tensor expected"
|
||||
);
|
||||
|
||||
int dimPlane = 0;
|
||||
if (gradOutput->nDimension == 5)
|
||||
{
|
||||
dimPlane++;
|
||||
}
|
||||
|
||||
THArgCheck(nOutputPlane == gradOutput->size[dimPlane], 1,
|
||||
"Number of output features is not equal to nOutputPlane"
|
||||
);
|
||||
|
||||
/* gradient to input */
|
||||
THTensor *tweight = THTensor_(newTranspose)(weight, 0, 1);
|
||||
if (gradOutput->nDimension == 4) /* non-batch mode */
|
||||
{
|
||||
THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C");
|
||||
}
|
||||
else /* batch mode */
|
||||
{
|
||||
long nBatch = gradOutput->size[0];
|
||||
THTensor *ginpb = THTensor_(new)();
|
||||
THTensor *goutb = THTensor_(new)();
|
||||
long j;
|
||||
|
||||
THTensor_(resize5d)(gradInput,
|
||||
input->size[0], input->size[1], input->size[2], input->size[3], input->size[4]
|
||||
);
|
||||
|
||||
/* loop over batches */
|
||||
for (j = 0; j < nBatch; j++)
|
||||
{
|
||||
THTensor_(select)(ginpb, gradInput, 0, j);
|
||||
THTensor_(select)(goutb, gradOutput, 0, j);
|
||||
THTensor_(conv3Dmv)(ginpb, 0.0, 1.0, goutb, tweight, dT, dH, dW, "F", "C");
|
||||
}
|
||||
THTensor_(free)(ginpb);
|
||||
THTensor_(free)(goutb);
|
||||
}
|
||||
|
||||
THTensor_(free)(tweight);
|
||||
}
|
||||
|
||||
void THNN_(VolumetricConvolution_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *finput, // only used by cuda impl
|
||||
THTensor *fgradInput, // only used by cuda impl
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH,
|
||||
real scale)
|
||||
{
|
||||
THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version
|
||||
|
||||
THArgCheck(gradWeight->nDimension == 5, 4,
|
||||
"5D gradWeight tensor is expected (nOutputPlane x nInputPlane x kT x kH x kW)"
|
||||
);
|
||||
|
||||
int nOutputPlane = (int)gradWeight->size[0];
|
||||
|
||||
THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5,
|
||||
"gradBias tensor has wrong size"
|
||||
);
|
||||
|
||||
long k;
|
||||
real *gradBias_data;
|
||||
THTensor *gradOutSlice;
|
||||
int dimPlane = 0;
|
||||
if (gradOutput->nDimension == 5)
|
||||
{
|
||||
dimPlane++;
|
||||
}
|
||||
|
||||
THArgCheck(nOutputPlane == gradOutput->size[dimPlane], 1,
|
||||
"Number of output features is not equal to nOutputPlane"
|
||||
);
|
||||
|
||||
if (gradOutput->nDimension == 4) /* non-batch mode */
|
||||
{
|
||||
/* gradient to bias */
|
||||
gradBias_data = THTensor_(data)(gradBias);
|
||||
gradOutSlice = THTensor_(new)();
|
||||
for (k = 0; k < nOutputPlane; k++)
|
||||
{
|
||||
THTensor_(select)(gradOutSlice, gradOutput, 0, k);
|
||||
gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice);
|
||||
}
|
||||
THTensor_(free)(gradOutSlice);
|
||||
|
||||
/* gradient to kernels */
|
||||
THTensor_(conv3DRevger)(gradWeight, 1.0, scale, input, gradOutput, dT, dH, dW);
|
||||
}
|
||||
else /* batch mode */
|
||||
{
|
||||
long nBatch = gradOutput->size[0];
|
||||
THTensor *inpb = THTensor_(new)();
|
||||
THTensor *goutb = THTensor_(new)();
|
||||
long j;
|
||||
|
||||
/* loop over batches */
|
||||
for (j = 0; j < nBatch; j++)
|
||||
{
|
||||
THTensor_(select)(inpb, input, 0, j);
|
||||
THTensor_(select)(goutb, gradOutput, 0, j);
|
||||
|
||||
/* gradient to bias */
|
||||
gradBias_data = THTensor_(data)(gradBias);
|
||||
gradOutSlice = THTensor_(new)();
|
||||
for (k = 0; k < nOutputPlane; k++)
|
||||
{
|
||||
THTensor_(select)(gradOutSlice, goutb, 0, k);
|
||||
gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice);
|
||||
}
|
||||
THTensor_(free)(gradOutSlice);
|
||||
|
||||
/* gradient to kernels */
|
||||
THTensor_(conv3DRevger)(gradWeight, 1.0, scale, inpb, goutb, dT, dH, dW);
|
||||
}
|
||||
THTensor_(free)(inpb);
|
||||
THTensor_(free)(goutb);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
518
torch/lib/THNN/generic/VolumetricConvolutionMM.c
Normal file
518
torch/lib/THNN/generic/VolumetricConvolutionMM.c
Normal file
|
|
@ -0,0 +1,518 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/VolumetricConvolutionMM.c"
|
||||
#else
|
||||
|
||||
/* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */
|
||||
static void THNN_(unfolded_acc_vol)(
|
||||
THTensor *finput,
|
||||
THTensor *input,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH,
|
||||
int nInputPlane,
|
||||
int inputDepth,
|
||||
int inputWidth,
|
||||
int inputHeight,
|
||||
int outputDepth,
|
||||
int outputWidth,
|
||||
int outputHeight)
|
||||
{
|
||||
int nip;
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *finput_data = THTensor_(data)(finput);
|
||||
|
||||
//#pragma omp parallel for private(nip)
|
||||
for (nip = 0; nip < nInputPlane; nip++)
|
||||
{
|
||||
int kt, kw, kh, t, y, x, it, ix, iy;
|
||||
for (kt = 0; kt < kT; kt++)
|
||||
{
|
||||
for (kh = 0; kh < kH; kh++)
|
||||
{
|
||||
for (kw = 0; kw < kW; kw++)
|
||||
{
|
||||
real *src = finput_data
|
||||
+ nip * (kT*kH*kW*outputDepth*outputHeight*outputWidth)
|
||||
+ kt * (kH*kW*outputDepth*outputHeight*outputWidth)
|
||||
+ kh * (kW*outputDepth*outputHeight*outputWidth)
|
||||
+ kw * (outputDepth*outputHeight*outputWidth);
|
||||
|
||||
real *dst = input_data + nip*(inputDepth*inputHeight*inputWidth);
|
||||
if (pT > 0 || pH > 0 || pW > 0)
|
||||
{
|
||||
for (t = 0; t < outputDepth; t++)
|
||||
{
|
||||
it = t*dT - pT + kt;
|
||||
for (y = 0; y < outputHeight; y++)
|
||||
{
|
||||
iy = y*dH - pH + kh;
|
||||
for (x = 0; x < outputWidth; x++)
|
||||
{
|
||||
ix = x*dW - pW + kw;
|
||||
if (it < 0 || it >= inputDepth || iy < 0 || iy >= inputHeight || ix < 0 || ix >= inputWidth)
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
THVector_(add)(dst+it*inputHeight*inputWidth+iy*inputWidth+ix, src+t*outputHeight*outputWidth+y*outputWidth+x, 1, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (t = 0; t < outputDepth; t++)
|
||||
{
|
||||
it = t*dT + kt;
|
||||
for (y = 0; y < outputHeight; y++)
|
||||
{
|
||||
iy = y*dH + kh;
|
||||
for(x = 0; x < outputWidth; x++)
|
||||
{
|
||||
ix = x*dW + kw;
|
||||
THVector_(add)(dst+it*inputHeight*inputWidth+iy*inputWidth+ix, src+t*outputHeight*outputWidth+y*outputWidth+x, 1, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void THNN_(unfolded_copy_vol)(
|
||||
THTensor *finput,
|
||||
THTensor *input,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH,
|
||||
int nInputPlane,
|
||||
int inputDepth,
|
||||
int inputWidth,
|
||||
int inputHeight,
|
||||
int outputDepth,
|
||||
int outputWidth,
|
||||
int outputHeight)
|
||||
{
|
||||
long k;
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *finput_data = THTensor_(data)(finput);
|
||||
// #pragma omp parallel for private(k)
|
||||
for (k = 0; k < nInputPlane*kT*kH*kW; k++)
|
||||
{
|
||||
int nip = k / (kT*kH*kW);
|
||||
int rest = k % (kT*kH*kW);
|
||||
int kt = rest / (kH*kW);
|
||||
rest = rest % (kH*kW);
|
||||
int kh = rest / kW;
|
||||
int kw = rest % kW;
|
||||
int t,x,y,it,ix,iy;
|
||||
real *dst = finput_data
|
||||
+ nip * (kT*kH*kW*outputDepth*outputHeight*outputWidth)
|
||||
+ kt * (kH*kW*outputDepth*outputHeight*outputWidth)
|
||||
+ kh * (kW*outputDepth*outputHeight*outputWidth)
|
||||
+ kw * (outputDepth*outputHeight*outputWidth);
|
||||
real *src = input_data + nip*(inputDepth*inputHeight*inputWidth);
|
||||
|
||||
if (pT > 0 || pH > 0 || pW > 0)
|
||||
{
|
||||
for (t = 0; t < outputDepth; t++)
|
||||
{
|
||||
it = t*dT - pT + kt;
|
||||
for (y = 0; y < outputHeight; y++)
|
||||
{
|
||||
iy = y*dH - pH + kh;
|
||||
for (x = 0; x < outputWidth; x++)
|
||||
{
|
||||
ix = x*dW - pW + kw;
|
||||
if (it < 0 || it >= inputDepth || iy < 0 || iy >= inputHeight || ix < 0 || ix >= inputWidth)
|
||||
memset(dst+t*outputHeight*outputWidth+y*outputWidth+x, 0, sizeof(real)*(1));
|
||||
else
|
||||
memcpy(dst+t*outputHeight*outputWidth+y*outputWidth+x, src+it*inputHeight*inputWidth+iy*inputWidth+ix, sizeof(real)*(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (t = 0; t < outputDepth; t++)
|
||||
{
|
||||
it = t*dT + kt;
|
||||
for (y = 0; y < outputHeight; y++)
|
||||
{
|
||||
iy = y*dH + kh;
|
||||
for(x = 0; x < outputWidth; x++)
|
||||
{
|
||||
ix = x*dW + kw;
|
||||
memcpy(dst+t*outputHeight*outputWidth+y*outputWidth+x, src+it*inputHeight*inputWidth+iy*inputWidth+ix, sizeof(real)*(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void THNN_(VolumetricConvolutionMM_updateOutput_frame)(
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *finput,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH,
|
||||
long nInputPlane,
|
||||
long inputDepth,
|
||||
long inputWidth,
|
||||
long inputHeight,
|
||||
long nOutputPlane,
|
||||
long outputDepth,
|
||||
long outputWidth,
|
||||
long outputHeight)
|
||||
{
|
||||
long i;
|
||||
THTensor *output2d;
|
||||
|
||||
THNN_(unfolded_copy_vol)(
|
||||
finput, input,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH,
|
||||
nInputPlane,
|
||||
inputDepth, inputWidth, inputHeight,
|
||||
outputDepth, outputWidth, outputHeight
|
||||
);
|
||||
|
||||
output2d = THTensor_(newWithStorage2d)(
|
||||
output->storage, output->storageOffset, nOutputPlane, -1,
|
||||
outputDepth*outputHeight*outputWidth, -1
|
||||
);
|
||||
|
||||
for (i = 0; i < nOutputPlane; i++)
|
||||
{
|
||||
THVector_(fill)(
|
||||
output->storage->data+output->storageOffset+output->stride[0]*i,
|
||||
THTensor_(get1d)(bias, i),
|
||||
outputDepth*outputHeight*outputWidth
|
||||
);
|
||||
}
|
||||
|
||||
THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput);
|
||||
|
||||
THTensor_(free)(output2d);
|
||||
}
|
||||
|
||||
void THNN_(VolumetricConvolutionMM_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *finput,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
int dimf = 0;
|
||||
int dimt = 1;
|
||||
int dimh = 2;
|
||||
int dimw = 3;
|
||||
|
||||
long nInputPlane;
|
||||
long inputDepth;
|
||||
long inputHeight;
|
||||
long inputWidth;
|
||||
long nOutputPlane;
|
||||
long outputDepth;
|
||||
long outputHeight;
|
||||
long outputWidth;
|
||||
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
|
||||
"4D or 5D(batch mode) tensor expected"
|
||||
);
|
||||
|
||||
if (input->nDimension == 5)
|
||||
{
|
||||
dimf++;
|
||||
dimt++;
|
||||
dimh++;
|
||||
dimw++;
|
||||
}
|
||||
|
||||
nInputPlane = input->size[dimf];
|
||||
inputDepth = input->size[dimt];
|
||||
inputHeight = input->size[dimh];
|
||||
inputWidth = input->size[dimw];
|
||||
nOutputPlane = weight->size[0];
|
||||
outputDepth = (inputDepth + 2*pT - kT) / dT + 1;
|
||||
outputHeight = (inputHeight + 2*pH - kH) / dH + 1;
|
||||
outputWidth = (inputWidth + 2*pW - kW) / dW + 1;
|
||||
|
||||
if (outputWidth < 1 || outputHeight < 1)
|
||||
{
|
||||
THError(
|
||||
"Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
|
||||
nInputPlane, inputDepth, inputHeight, inputWidth,
|
||||
nOutputPlane, outputDepth, outputHeight, outputWidth
|
||||
);
|
||||
}
|
||||
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
THTensor_(resize2d)(finput, kT*kW*kH*nInputPlane, outputDepth*outputHeight*outputWidth);
|
||||
THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
|
||||
THNN_(VolumetricConvolutionMM_updateOutput_frame)(
|
||||
input, output, weight, bias, finput,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH,
|
||||
nInputPlane, inputDepth, inputWidth, inputHeight,
|
||||
nOutputPlane, outputDepth, outputWidth, outputHeight
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
THTensor_(resize3d)(finput, T, kT*kW*kH*nInputPlane, outputDepth*outputHeight*outputWidth);
|
||||
THTensor_(resize5d)(output, T, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
|
||||
// #pragma omp parallel for private(t)
|
||||
for (t = 0; t < T; t++)
|
||||
{
|
||||
THTensor *input_t = THTensor_(newSelect)(input, 0, t);
|
||||
THTensor *output_t = THTensor_(newSelect)(output, 0, t);
|
||||
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
|
||||
|
||||
THNN_(VolumetricConvolutionMM_updateOutput_frame)(
|
||||
input_t, output_t, weight, bias, finput_t,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH,
|
||||
nInputPlane, inputDepth, inputWidth, inputHeight,
|
||||
nOutputPlane, outputDepth, outputWidth, outputHeight
|
||||
);
|
||||
|
||||
THTensor_(free)(input_t);
|
||||
THTensor_(free)(output_t);
|
||||
THTensor_(free)(finput_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
|
||||
THTensor *gradInput,
|
||||
THTensor *gradOutput,
|
||||
THTensor *weight,
|
||||
THTensor *fgradInput,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(
|
||||
gradOutput->storage, gradOutput->storageOffset,
|
||||
gradOutput->size[0], -1,
|
||||
gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1
|
||||
);
|
||||
|
||||
THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
|
||||
THTensor_(free)(gradOutput2d);
|
||||
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
THNN_(unfolded_acc_vol)(
|
||||
fgradInput, gradInput,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH,
|
||||
gradInput->size[0], gradInput->size[1], gradInput->size[3], gradInput->size[2],
|
||||
gradOutput->size[1], gradOutput->size[3], gradOutput->size[2]
|
||||
);
|
||||
}
|
||||
|
||||
void THNN_(VolumetricConvolutionMM_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
// number of input/output planes and kernel size is indirectly defined by the weight tensor
|
||||
THArgCheck(weight->nDimension == 2, 4,
|
||||
"2D weight tensor is expected (nOutputPlane x (nInputPlane * kT * kH * kW))"
|
||||
);
|
||||
|
||||
int nOutputPlane = (int)weight->size[0];
|
||||
|
||||
THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 5 ? 1 : 0], 1,
|
||||
"Number of output features is not equal to nOutputPlane"
|
||||
);
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(resizeAs)(fgradInput, finput);
|
||||
// depending on the BLAS library, fgradInput (result tensor) might
|
||||
// be left uninitialized on zero alpha, which might lead to weird behavior
|
||||
// hence, to be safe, zero it
|
||||
THTensor_(zero)(fgradInput);
|
||||
THTensor_(transpose)(weight, weight, 0, 1);
|
||||
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
|
||||
gradInput, gradOutput, weight, fgradInput,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
//#pragma omp parallel for private(t)
|
||||
for (t = 0; t < T; t++)
|
||||
{
|
||||
THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
|
||||
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
|
||||
THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
|
||||
|
||||
THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
|
||||
gradInput_t, gradOutput_t, weight, fgradInput_t,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH
|
||||
);
|
||||
|
||||
THTensor_(free)(gradInput_t);
|
||||
THTensor_(free)(gradOutput_t);
|
||||
THTensor_(free)(fgradInput_t);
|
||||
}
|
||||
}
|
||||
|
||||
THTensor_(transpose)(weight, weight, 0, 1);
|
||||
}
|
||||
|
||||
static void THNN_(VolumetricConvolutionMM_accGradParameters_frame)(
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *finput,
|
||||
real scale)
|
||||
{
|
||||
long i;
|
||||
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(
|
||||
gradOutput->storage, gradOutput->storageOffset,
|
||||
gradOutput->size[0], -1,
|
||||
gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1
|
||||
);
|
||||
|
||||
THTensor_(transpose)(finput, finput, 0, 1);
|
||||
THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput);
|
||||
THTensor_(transpose)(finput, finput, 0, 1);
|
||||
|
||||
for (i = 0; i < gradBias->size[0]; i++)
|
||||
{
|
||||
long k;
|
||||
real sum = 0;
|
||||
real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0];
|
||||
for (k = 0; k < gradOutput2d->size[1]; k++)
|
||||
sum += data[k];
|
||||
|
||||
(gradBias->storage->data + gradBias->storageOffset)[i] += scale * sum;
|
||||
}
|
||||
|
||||
THTensor_(free)(gradOutput2d);
|
||||
}
|
||||
|
||||
void THNN_(VolumetricConvolutionMM_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *finput,
|
||||
real scale)
|
||||
{
|
||||
THArgCheck(gradWeight->nDimension == 2, 4,
|
||||
"2D gradWeight tensor is expected (nOutputPlane x (nInputPlane * kT * kH * kW))"
|
||||
);
|
||||
|
||||
int nOutputPlane = (int)gradWeight->size[0];
|
||||
|
||||
THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5,
|
||||
"gradBias tensor has wrong size"
|
||||
);
|
||||
|
||||
THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 5 ? 1 : 0], 3,
|
||||
"Number of output features is not equal to nOutputPlane"
|
||||
);
|
||||
|
||||
if (input->nDimension == 4) // non-batch mode
|
||||
{
|
||||
THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
|
||||
}
|
||||
else // batch mode
|
||||
{
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
for (t = 0; t < T; t++)
|
||||
{
|
||||
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
|
||||
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
|
||||
|
||||
THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);
|
||||
|
||||
THTensor_(free)(gradOutput_t);
|
||||
THTensor_(free)(finput_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
356
torch/lib/THNN/generic/VolumetricDilatedConvolution.c
Normal file
356
torch/lib/THNN/generic/VolumetricDilatedConvolution.c
Normal file
|
|
@ -0,0 +1,356 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/VolumetricDilatedConvolution.c"
|
||||
#else
|
||||
|
||||
void THNN_(VolumetricDilatedConvolution_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *columns,
|
||||
THTensor *ones,
|
||||
int kT, int kW, int kH,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int dilationT, int dilationW, int dilationH)
|
||||
{
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch mode) tensor is expected, but got: %d", input->nDimension);
|
||||
THArgCheck(weight->nDimension == 5, 4, "weight tensor must be 5D (nOutputPlane,nInputPlane,kT,kH,kW)");
|
||||
THArgCheck(!bias || weight->size[0] == bias->size[0], 4, "nOutputPlane mismatch in weight and bias");
|
||||
THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
|
||||
THArgCheck(dT > 0 && dW > 0 && dH > 0, 10, "stride should be greater than zero");
|
||||
|
||||
// Params:
|
||||
int nInputPlane = weight->size[1];
|
||||
int nOutputPlane = weight->size[0];
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 4) {
|
||||
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match. Expected: %d, got %d", nInputPlane, input->size[0]);
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
|
||||
} else {
|
||||
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match. Expected: %d, got %d", nInputPlane, input->size[1]);
|
||||
}
|
||||
|
||||
long inputDepth = input->size[2];
|
||||
long inputHeight = input->size[3];
|
||||
long inputWidth = input->size[4];
|
||||
long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
|
||||
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
|
||||
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
|
||||
|
||||
if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1)
|
||||
THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
|
||||
nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth);
|
||||
|
||||
// Batch size + input planes
|
||||
long batchSize = input->size[0];
|
||||
|
||||
// Resize output
|
||||
THTensor_(resize5d)(output, batchSize, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(zero)(output);
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(columns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
|
||||
|
||||
// Define a buffer of ones, for bias accumulation
|
||||
// Note: this buffer can be shared with other modules, it only ever gets increased,
|
||||
// and always contains ones.
|
||||
if (ones->nDimension != 3 ||
|
||||
ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) {
|
||||
// Resize plane and fill with ones...
|
||||
THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(fill)(ones, 1);
|
||||
}
|
||||
|
||||
// Helpers
|
||||
THTensor *input_n = THTensor_(new)();
|
||||
THTensor *output_n = THTensor_(new)();
|
||||
|
||||
// For each elt in batch, do:
|
||||
for (int elt = 0; elt < batchSize; elt ++) {
|
||||
// Matrix mulitply per output:
|
||||
THTensor_(select)(input_n, input, 0, elt);
|
||||
THTensor_(select)(output_n, output, 0, elt);
|
||||
|
||||
// Do Bias first:
|
||||
// M,N,K are dims of matrix A and B
|
||||
long m_ = nOutputPlane;
|
||||
long n_ = outputDepth * outputHeight * outputWidth;
|
||||
long k_ = 1;
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
if (bias) {
|
||||
THBlas_(gemm)(
|
||||
't', 'n',
|
||||
n_, m_, k_,
|
||||
1,
|
||||
THTensor_(data)(ones), k_,
|
||||
THTensor_(data)(bias), k_,
|
||||
0,
|
||||
THTensor_(data)(output_n), n_
|
||||
);
|
||||
} else {
|
||||
THTensor_(zero)(output_n);
|
||||
}
|
||||
|
||||
// Extract columns:
|
||||
THNN_(vol2col)(
|
||||
THTensor_(data)(input_n),
|
||||
nInputPlane, inputDepth, inputHeight, inputWidth,
|
||||
kT, kH, kW, padT, padH, padW, dT, dH, dW,
|
||||
dilationT, dilationH, dilationW,
|
||||
THTensor_(data)(columns)
|
||||
);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
long m = nOutputPlane;
|
||||
long n = columns->size[1];
|
||||
long k = nInputPlane*kT*kH*kW;
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
'n', 'n',
|
||||
n, m, k,
|
||||
1,
|
||||
THTensor_(data)(columns), n,
|
||||
THTensor_(data)(weight), k,
|
||||
1,
|
||||
THTensor_(data)(output_n), n
|
||||
);
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(input_n);
|
||||
THTensor_(free)(output_n);
|
||||
|
||||
// Resize output
|
||||
if (batch == 0) {
|
||||
THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricDilatedConvolution_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
THTensor *gradColumns,
|
||||
int kT, int kW, int kH,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int dilationT, int dilationW, int dilationH)
|
||||
{
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch mode) tensor is expected");
|
||||
THArgCheck(gradOutput->nDimension == 4 || gradOutput->nDimension == 5, 3, "4D or 5D (batch mode) tensor is expected");
|
||||
THArgCheck(weight->nDimension == 5, 4, "weight tensor must be 5D (nOutputPlane,nInputPlane,kT,kH,kW)");
|
||||
THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
|
||||
THArgCheck(dT > 0 && dW > 0 && dH > 0, 10, "stride should be greater than zero");
|
||||
|
||||
// Params
|
||||
int nInputPlane = weight->size[1];
|
||||
int nOutputPlane = weight->size[0];
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 4) {
|
||||
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
|
||||
THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
|
||||
} else {
|
||||
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
|
||||
}
|
||||
|
||||
long inputDepth = input->size[2];
|
||||
long inputWidth = input->size[4];
|
||||
long inputHeight = input->size[3];
|
||||
long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
|
||||
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
|
||||
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
|
||||
|
||||
// Batch size + input planes
|
||||
long batchSize = input->size[0];
|
||||
|
||||
// Resize output
|
||||
THTensor_(resize5d)(gradInput, batchSize, nInputPlane, inputDepth, inputHeight, inputWidth);
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(gradColumns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
|
||||
THTensor_(zero)(gradColumns);
|
||||
|
||||
// Helpers
|
||||
THTensor *gradInput_n = THTensor_(new)();
|
||||
THTensor *gradOutput_n = THTensor_(new)();
|
||||
|
||||
// For each elt in batch, do:
|
||||
for (int elt = 0; elt < batchSize; elt ++) {
|
||||
// Matrix mulitply per sample:
|
||||
THTensor_(select)(gradInput_n, gradInput, 0, elt);
|
||||
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
long m = nInputPlane*kT*kW*kH;
|
||||
long n = gradColumns->size[1];
|
||||
long k = nOutputPlane;
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
'n', 't',
|
||||
n, m, k,
|
||||
1,
|
||||
THTensor_(data)(gradOutput_n), n,
|
||||
THTensor_(data)(weight), m,
|
||||
0,
|
||||
THTensor_(data)(gradColumns), n
|
||||
);
|
||||
|
||||
// Unpack columns back into input:
|
||||
THNN_(col2vol)(
|
||||
THTensor_(data)(gradColumns),
|
||||
nInputPlane, inputDepth, inputHeight, inputWidth,
|
||||
kT, kH, kW, padT, padH, padW, dT, dH, dW,
|
||||
dilationT, dilationH, dilationW,
|
||||
THTensor_(data)(gradInput_n)
|
||||
);
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(gradInput_n);
|
||||
THTensor_(free)(gradOutput_n);
|
||||
|
||||
// Resize output
|
||||
if (batch == 0) {
|
||||
THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
|
||||
THTensor_(resize4d)(gradInput, nInputPlane, inputDepth, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricDilatedConvolution_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *columns,
|
||||
THTensor *ones,
|
||||
int kT, int kW, int kH,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int dilationT, int dilationW, int dilationH,
|
||||
real scale)
|
||||
{
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch mode) tensor is expected");
|
||||
THArgCheck(gradOutput->nDimension == 4 || gradOutput->nDimension == 5, 3, "4D or 5D (batch mode) tensor is expected");
|
||||
THArgCheck(gradWeight->nDimension == 5, 4, "gradWeight tensor must be 5D (nOutputPlane,nInputPlane,kT,kH,kW)");
|
||||
THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
|
||||
THArgCheck(dT > 0 && dW > 0 && dH > 0, 10, "stride should be greater than zero");
|
||||
THArgCheck(!gradBias || gradWeight->size[0] == gradBias->size[0], 4, "nOutputPlane mismatch in gradWeight and gradBias");
|
||||
|
||||
// Params
|
||||
int nInputPlane = gradWeight->size[1];
|
||||
int nOutputPlane = gradWeight->size[0];
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 4) {
|
||||
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
|
||||
THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
|
||||
} else {
|
||||
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
|
||||
}
|
||||
|
||||
long inputDepth = input->size[2];
|
||||
long inputWidth = input->size[4];
|
||||
long inputHeight = input->size[3];
|
||||
long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
|
||||
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
|
||||
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
|
||||
|
||||
// Batch size + input planes
|
||||
long batchSize = input->size[0];
|
||||
|
||||
// Define a buffer of ones, for bias accumulation
|
||||
if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) {
|
||||
// Resize plane and fill with ones...
|
||||
THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(fill)(ones, 1);
|
||||
}
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(columns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
|
||||
|
||||
// Helpers
|
||||
THTensor *input_n = THTensor_(new)();
|
||||
THTensor *gradOutput_n = THTensor_(new)();
|
||||
|
||||
// For each elt in batch, do:
|
||||
for (int elt = 0; elt < batchSize; elt ++) {
|
||||
// Matrix mulitply per output:
|
||||
THTensor_(select)(input_n, input, 0, elt);
|
||||
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
|
||||
|
||||
// Extract columns:
|
||||
THNN_(vol2col)(
|
||||
THTensor_(data)(input_n),
|
||||
nInputPlane, inputDepth, inputHeight, inputWidth,
|
||||
kT, kH, kW, padT, padH, padW, dT, dH, dW,
|
||||
dilationT, dilationH, dilationW,
|
||||
THTensor_(data)(columns)
|
||||
);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
long m = nOutputPlane;
|
||||
long n = nInputPlane*kT*kW*kH;
|
||||
long k = columns->size[1];
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
't', 'n',
|
||||
n, m, k,
|
||||
scale,
|
||||
THTensor_(data)(columns), k,
|
||||
THTensor_(data)(gradOutput_n), k,
|
||||
1,
|
||||
THTensor_(data)(gradWeight), n
|
||||
);
|
||||
|
||||
// Do Bias:
|
||||
// M,N,K are dims of matrix A and B
|
||||
long m_ = nOutputPlane;
|
||||
long k_ = outputDepth * outputHeight * outputWidth;
|
||||
|
||||
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
|
||||
if (gradBias) {
|
||||
THBlas_(gemv)(
|
||||
't',
|
||||
k_, m_,
|
||||
scale,
|
||||
THTensor_(data)(gradOutput_n), k_,
|
||||
THTensor_(data)(ones), 1,
|
||||
1,
|
||||
THTensor_(data)(gradBias), 1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(input_n);
|
||||
THTensor_(free)(gradOutput_n);
|
||||
|
||||
// Resize
|
||||
if (batch == 0) {
|
||||
THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
469
torch/lib/THNN/generic/VolumetricFullConvolution.c
Normal file
469
torch/lib/THNN/generic/VolumetricFullConvolution.c
Normal file
|
|
@ -0,0 +1,469 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/VolumetricFullConvolution.c"
|
||||
#else
|
||||
|
||||
static void THNN_(vol2col)(
|
||||
const real *data_vol, const int channels,
|
||||
const int depth, const int height, const int width,
|
||||
const int kT, const int kH, const int kW,
|
||||
const int pT, const int pH, const int pW,
|
||||
const int dT, const int dH, const int dW,
|
||||
const int dilationT, const int dilationH, const int dilationW,
|
||||
real *data_col)
|
||||
{
|
||||
int c, t, h, w;
|
||||
int depth_col = (depth + 2 * pT - (dilationT * (kT - 1) + 1)) / dT + 1;
|
||||
int height_col = (height + 2 * pH - (dilationH * (kH - 1) + 1)) / dH + 1;
|
||||
int width_col = (width + 2 * pW - (dilationW * (kW - 1) + 1)) / dW + 1;
|
||||
int channels_col = channels * kT * kH * kW;
|
||||
for (c = 0; c < channels_col; ++c)
|
||||
{
|
||||
int w_offset = c % kW;
|
||||
int h_offset = (c / kW) % kH;
|
||||
int t_offset = (c / kW / kH) % kT;
|
||||
int c_vol = c / kT / kH / kW;
|
||||
for (t = 0; t < depth_col; ++t)
|
||||
{
|
||||
for (h = 0; h < height_col; ++h)
|
||||
{
|
||||
for (w = 0; w < width_col; ++w)
|
||||
{
|
||||
int t_pad = t * dT - pT + t_offset * dilationT;
|
||||
int h_pad = h * dH - pH + h_offset * dilationH;
|
||||
int w_pad = w * dW - pW + w_offset * dilationW;
|
||||
if (t_pad >= 0 && t_pad < depth &&
|
||||
h_pad >= 0 && h_pad < height &&
|
||||
w_pad >= 0 && w_pad < width)
|
||||
data_col[((c * depth_col + t) * height_col + h) * width_col + w] =
|
||||
data_vol[((c_vol * depth + t_pad) * height + h_pad) * width + w_pad];
|
||||
else
|
||||
data_col[((c * depth_col + t) * height_col + h) * width_col + w] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void THNN_(col2vol)(
|
||||
const real* data_col, const int channels,
|
||||
const int depth, const int height, const int width,
|
||||
const int kT, const int kH, const int kW,
|
||||
const int pT, const int pH, const int pW,
|
||||
const int dT, const int dH, const int dW,
|
||||
const int dilationT, const int dilationH, const int dilationW,
|
||||
real* data_vol)
|
||||
{
|
||||
int c, t, h, w;
|
||||
memset(data_vol, 0, sizeof(real) * depth * height * width * channels);
|
||||
int depth_col = (depth + 2 * pT - (dilationT * (kT - 1) + 1)) / dT + 1;
|
||||
int height_col = (height + 2 * pH - (dilationH * (kH - 1) + 1)) / dH + 1;
|
||||
int width_col = (width + 2 * pW - (dilationW * (kW - 1) + 1)) / dW + 1;
|
||||
int channels_col = channels * kT * kH * kW;
|
||||
for (c = 0; c < channels_col; ++c)
|
||||
{
|
||||
int w_offset = c % kW;
|
||||
int h_offset = (c / kW) % kH;
|
||||
int t_offset = (c / kW / kH) % kT;
|
||||
int c_vol = c / kT / kH / kW;
|
||||
for (t = 0; t < depth_col; ++t)
|
||||
{
|
||||
for (h = 0; h < height_col; ++h)
|
||||
{
|
||||
for (w = 0; w < width_col; ++w)
|
||||
{
|
||||
int t_pad = t * dT - pT + t_offset * dilationT;
|
||||
int h_pad = h * dH - pH + h_offset * dilationH;
|
||||
int w_pad = w * dW - pW + w_offset * dilationW;
|
||||
if (t_pad >= 0 && t_pad < depth &&
|
||||
h_pad >= 0 && h_pad < height &&
|
||||
w_pad >= 0 && w_pad < width)
|
||||
data_vol[((c_vol * depth + t_pad) * height + h_pad) * width + w_pad] +=
|
||||
data_col[((c * depth_col + t) * height_col + h) * width_col + w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricFullConvolution_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input, // 4D or 5D (batch) tensor
|
||||
THTensor *output,
|
||||
THTensor *weight, // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW)
|
||||
THTensor *bias,
|
||||
THTensor *finput, // internal columns buffer
|
||||
THTensor *fgradInput, // internal ones buffer
|
||||
int dT, int dW, int dH, // stride of the convolution
|
||||
int pT, int pW, int pH, // padding
|
||||
int aT, int aW, int aH) // extra output adjustment
|
||||
{
|
||||
THTensor *columns = finput;
|
||||
THTensor *ones = fgradInput;
|
||||
|
||||
// number of input & output planes and kernel size is indirectly defined by the weight tensor
|
||||
THArgCheck(weight->nDimension == 5, 4,
|
||||
"5D weight tensor is expected (nInputPlane x nOutputPlane x kT x kH x kW)"
|
||||
);
|
||||
|
||||
const int nInputPlane = (int)weight->size[0];
|
||||
const int nOutputPlane = (int)weight->size[1];
|
||||
const int kT = (int)weight->size[2];
|
||||
const int kH = (int)weight->size[3];
|
||||
const int kW = (int)weight->size[4];
|
||||
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
|
||||
"4D or 5D (batch mode) tensor is expected"
|
||||
);
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
|
||||
}
|
||||
|
||||
const long inputWidth = input->size[4];
|
||||
const long inputHeight = input->size[3];
|
||||
const long inputDepth = input->size[2];
|
||||
const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW;
|
||||
const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
|
||||
const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT;
|
||||
|
||||
// Batch size + input planes
|
||||
const long batchSize = input->size[0];
|
||||
|
||||
// Resize output
|
||||
THTensor_(resize5d)(output, batchSize, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(columns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
|
||||
THTensor_(zero)(columns);
|
||||
|
||||
// Define a buffer of ones, for bias accumulation
|
||||
// Note: this buffer can be shared with other modules, it only ever gets increased,
|
||||
// and always contains ones.
|
||||
if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth)
|
||||
{
|
||||
// Resize plane and fill with ones...
|
||||
THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(fill)(ones, 1);
|
||||
}
|
||||
|
||||
// Helpers
|
||||
THTensor *input_n = THTensor_(new)();
|
||||
THTensor *output_n = THTensor_(new)();
|
||||
|
||||
int elt;
|
||||
// For each elt in batch, do:
|
||||
for (elt = 0; elt < batchSize; ++elt)
|
||||
{
|
||||
// Matrix mulitply per output:
|
||||
THTensor_(select)(input_n, input, 0, elt);
|
||||
THTensor_(select)(output_n, output, 0, elt);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
|
||||
const long m = weight->size[1] * weight->size[2] * weight->size[3] * weight->size[4];
|
||||
const long n = columns->size[1];
|
||||
const long k = weight->size[0];
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
'n', 't',
|
||||
n, m, k,
|
||||
1,
|
||||
THTensor_(data)(input_n), n,
|
||||
THTensor_(data)(weight), m,
|
||||
0,
|
||||
THTensor_(data)(columns), n
|
||||
);
|
||||
|
||||
// Unpack columns back into input:
|
||||
THNN_(col2vol)(
|
||||
THTensor_(data)(columns),
|
||||
nOutputPlane, outputDepth, outputHeight, outputWidth,
|
||||
kT, kH, kW,
|
||||
pT, pH, pW,
|
||||
dT, dH, dW,
|
||||
1, 1, 1,
|
||||
THTensor_(data)(output_n)
|
||||
);
|
||||
|
||||
// Do Bias after:
|
||||
// M,N,K are dims of matrix A and B
|
||||
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
|
||||
const long m_ = nOutputPlane;
|
||||
const long n_ = outputDepth * outputHeight * outputWidth;
|
||||
const long k_ = 1;
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
't', 'n',
|
||||
n_, m_, k_,
|
||||
1,
|
||||
THTensor_(data)(ones), k_,
|
||||
THTensor_(data)(bias), k_,
|
||||
1,
|
||||
THTensor_(data)(output_n), n_
|
||||
);
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(input_n);
|
||||
THTensor_(free)(output_n);
|
||||
|
||||
// Resize output
|
||||
if (batch == 0)
|
||||
{
|
||||
THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricFullConvolution_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *weight,
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput, // only used by cuda impl
|
||||
int dT, int dW, int dH, // stride
|
||||
int pT, int pW, int pH, // padding
|
||||
int aT, int aW, int aH) // extra output adjustment
|
||||
{
|
||||
THTensor *gradColumns = finput;
|
||||
|
||||
// number of input & output planes and kernel size is indirectly defined by the weight tensor
|
||||
THArgCheck(weight->nDimension == 5, 4,
|
||||
"5D weight tensor is expected (nInputPlane x nOutputPlane x kT x kH x kW)"
|
||||
);
|
||||
|
||||
const int nInputPlane = (int)weight->size[0];
|
||||
const int nOutputPlane = (int)weight->size[1];
|
||||
const int kT = (int)weight->size[2];
|
||||
const int kH = (int)weight->size[3];
|
||||
const int kW = (int)weight->size[4];
|
||||
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
|
||||
"4D or 5D (batch mode) tensor is expected"
|
||||
);
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
|
||||
THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
|
||||
}
|
||||
|
||||
const long inputWidth = input->size[4];
|
||||
const long inputHeight = input->size[3];
|
||||
const long inputDepth = input->size[2];
|
||||
const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW;
|
||||
const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
|
||||
const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT;
|
||||
|
||||
// Batch size + input planes
|
||||
const long batchSize = input->size[0];
|
||||
|
||||
// Resize output
|
||||
THTensor_(resize5d)(gradInput, batchSize, nInputPlane, inputDepth, inputHeight, inputWidth);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
|
||||
|
||||
// Helpers
|
||||
THTensor *gradInput_n = THTensor_(new)();
|
||||
THTensor *gradOutput_n = THTensor_(new)();
|
||||
|
||||
int elt;
|
||||
// For each elt in batch, do:
|
||||
for (elt = 0; elt < batchSize; ++elt)
|
||||
{
|
||||
// Matrix mulitply per sample:
|
||||
THTensor_(select)(gradInput_n, gradInput, 0, elt);
|
||||
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
|
||||
|
||||
// Extract columns:
|
||||
THNN_(vol2col)(
|
||||
THTensor_(data)(gradOutput_n),
|
||||
nOutputPlane, outputDepth, outputHeight, outputWidth,
|
||||
kT, kH, kW,
|
||||
pT, pH, pW,
|
||||
dT, dH, dW,
|
||||
1, 1, 1,
|
||||
THTensor_(data)(gradColumns)
|
||||
);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
|
||||
const long m = weight->size[0];
|
||||
const long n = gradColumns->size[1];
|
||||
const long k = weight->size[1] * weight->size[2] * weight->size[3] * weight->size[4];
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
'n', 'n',
|
||||
n, m, k,
|
||||
1,
|
||||
THTensor_(data)(gradColumns), n,
|
||||
THTensor_(data)(weight), k,
|
||||
0,
|
||||
THTensor_(data)(gradInput_n), n
|
||||
);
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(gradInput_n);
|
||||
THTensor_(free)(gradOutput_n);
|
||||
|
||||
// Resize output
|
||||
if (batch == 0)
|
||||
{
|
||||
THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
|
||||
THTensor_(resize4d)(gradInput, nInputPlane, inputDepth, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricFullConvolution_accGradParameters)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput,
|
||||
int dT, int dW, int dH, // stride
|
||||
int pT, int pW, int pH, // padding
|
||||
int aT, int aW, int aH, // extra output adjustment
|
||||
real scale)
|
||||
{
|
||||
// number of input & output planes and kernel size is indirectly defined by the gradWeight tensor
|
||||
THArgCheck(gradWeight->nDimension == 5, 4,
|
||||
"5D gradWeight tensor is expected (nInputPlane x nOutputPlane x kT x kH x kW)"
|
||||
);
|
||||
|
||||
int nInputPlane = (int)gradWeight->size[0];
|
||||
int nOutputPlane = (int)gradWeight->size[1];
|
||||
int kT = (int)gradWeight->size[2];
|
||||
int kH = (int)gradWeight->size[3];
|
||||
int kW = (int)gradWeight->size[4];
|
||||
|
||||
THTensor *columns = finput;
|
||||
THTensor *ones = fgradInput;
|
||||
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
|
||||
"4D or 5D (batch mode) tensor is expected"
|
||||
);
|
||||
|
||||
int batch = 1;
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
// Force batch
|
||||
batch = 0;
|
||||
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
|
||||
THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
|
||||
}
|
||||
|
||||
const long inputWidth = input->size[4];
|
||||
const long inputHeight = input->size[3];
|
||||
const long inputDepth = input->size[2];
|
||||
const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW;
|
||||
const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
|
||||
const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT;
|
||||
|
||||
// Batch size + input planes
|
||||
const long batchSize = input->size[0];
|
||||
|
||||
// Define a buffer of ones, for bias accumulation
|
||||
if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth)
|
||||
{
|
||||
// Resize plane and fill with ones...
|
||||
THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(fill)(ones, 1);
|
||||
}
|
||||
|
||||
// Resize temporary columns
|
||||
THTensor_(resize2d)(columns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
|
||||
|
||||
// Helpers
|
||||
THTensor *input_n = THTensor_(new)();
|
||||
THTensor *gradOutput_n = THTensor_(new)();
|
||||
|
||||
int elt;
|
||||
// For each elt in batch, do:
|
||||
for (elt = 0; elt < batchSize; ++elt)
|
||||
{
|
||||
// Matrix mulitply per output:
|
||||
THTensor_(select)(input_n, input, 0, elt);
|
||||
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
|
||||
|
||||
// Extract columns:
|
||||
THNN_(vol2col)(
|
||||
THTensor_(data)(gradOutput_n), nOutputPlane,
|
||||
outputDepth, outputHeight, outputWidth,
|
||||
kT, kH, kW,
|
||||
pT, pH, pW,
|
||||
dT, dH, dW,
|
||||
1, 1, 1,
|
||||
THTensor_(data)(columns)
|
||||
);
|
||||
|
||||
// M,N,K are dims of matrix A and B
|
||||
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
|
||||
const long n = columns->size[0]; // nOutputPlane * kt * kh * kw
|
||||
const long m = input_n->size[0]; // nInputPlane
|
||||
const long k = columns->size[1]; // inputHeight * inputWidth
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
THBlas_(gemm)(
|
||||
't', 'n',
|
||||
n, m, k,
|
||||
scale,
|
||||
THTensor_(data)(columns), k,
|
||||
THTensor_(data)(input_n), k,
|
||||
1,
|
||||
THTensor_(data)(gradWeight), n
|
||||
);
|
||||
|
||||
// Do Bias:
|
||||
// M,N,K are dims of matrix A and B
|
||||
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
|
||||
const long m_ = nOutputPlane;
|
||||
const long k_ = outputDepth * outputHeight * outputWidth;
|
||||
|
||||
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
|
||||
THBlas_(gemv)(
|
||||
't',
|
||||
k_, m_,
|
||||
scale,
|
||||
THTensor_(data)(gradOutput_n), k_,
|
||||
THTensor_(data)(ones), 1,
|
||||
1,
|
||||
THTensor_(data)(gradBias), 1
|
||||
);
|
||||
}
|
||||
|
||||
// Free
|
||||
THTensor_(free)(input_n);
|
||||
THTensor_(free)(gradOutput_n);
|
||||
|
||||
// Resize
|
||||
if (batch == 0)
|
||||
{
|
||||
THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
392
torch/lib/THNN/generic/VolumetricMaxPooling.c
Normal file
392
torch/lib/THNN/generic/VolumetricMaxPooling.c
Normal file
|
|
@ -0,0 +1,392 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c"
|
||||
#else
|
||||
|
||||
static void THNN_(VolumetricMaxPooling_updateOutput_frame)(
|
||||
real *input_p,
|
||||
real *output_p,
|
||||
real *indz_p,
|
||||
long nslices,
|
||||
long itime,
|
||||
long iwidth,
|
||||
long iheight,
|
||||
long otime,
|
||||
long owidth,
|
||||
long oheight,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
/* loop over output */
|
||||
long i, j, ti;
|
||||
for (ti = 0; ti < otime; ti++)
|
||||
{
|
||||
for (i = 0; i < oheight; i++)
|
||||
{
|
||||
for (j = 0; j < owidth; j++)
|
||||
{
|
||||
/* local pointers */
|
||||
|
||||
long start_t = ti * dT - pT;
|
||||
long start_h = i * dH - pH;
|
||||
long start_w = j * dW - pW;
|
||||
|
||||
long kernel_t = fminf(kT, kT + start_t);
|
||||
long kernel_h = fminf(kH, kH + start_h);
|
||||
long kernel_w = fminf(kW, kW + start_w);
|
||||
|
||||
start_t = fmaxf(start_t, 0);
|
||||
start_h = fmaxf(start_h, 0);
|
||||
start_w = fmaxf(start_w, 0);
|
||||
|
||||
real *ip = input_p + k * itime * iwidth * iheight
|
||||
+ start_t * iwidth * iheight + start_h * iwidth + start_w;
|
||||
real *op = output_p + k * otime * owidth * oheight
|
||||
+ ti * owidth * oheight + i * owidth + j;
|
||||
real *indzp = indz_p + k * otime * owidth * oheight
|
||||
+ ti * owidth * oheight + i * owidth + j;
|
||||
|
||||
/* compute local max: */
|
||||
real maxval = -THInf;
|
||||
int x,y,z;
|
||||
int mx, my, mz;
|
||||
|
||||
for (z = 0; z < kernel_t; z++)
|
||||
{
|
||||
for (y = 0; y < kernel_h; y++)
|
||||
{
|
||||
for (x = 0; x < kernel_w; x++)
|
||||
{
|
||||
if ((start_t + z < itime) && (start_h + y < iheight) && (start_w + x < iwidth))
|
||||
{
|
||||
real val = *(ip + z * iwidth * iheight + y * iwidth + x);
|
||||
if (val > maxval)
|
||||
{
|
||||
maxval = val;
|
||||
// Store indices w.r.t the kernel dimension
|
||||
mz = z + (kT - kernel_t);
|
||||
my = y + (kH - kernel_h);
|
||||
mx = x + (kW - kernel_w);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// set max values
|
||||
((unsigned char*)(indzp))[0] = mz;
|
||||
((unsigned char*)(indzp))[1] = my;
|
||||
((unsigned char*)(indzp))[2] = mx;
|
||||
((unsigned char*)(indzp))[3] = 0;
|
||||
|
||||
/* set output to local max */
|
||||
*op = maxval;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricMaxPooling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *indices,
|
||||
int kT,
|
||||
int kW,
|
||||
int kH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH,
|
||||
bool ceilMode)
|
||||
{
|
||||
long nslices;
|
||||
long itime;
|
||||
long iheight;
|
||||
long iwidth;
|
||||
long otime;
|
||||
long oheight;
|
||||
long owidth;
|
||||
real *input_data;
|
||||
real *output_data;
|
||||
real *indices_data;
|
||||
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
|
||||
"4D or 5D (batch-mode) tensor expected"
|
||||
);
|
||||
|
||||
int dimN = 0;
|
||||
int dimt = 1;
|
||||
int dimh = 2;
|
||||
int dimw = 3;
|
||||
|
||||
if (input->nDimension == 5)
|
||||
{
|
||||
dimN++;
|
||||
dimt++;
|
||||
dimh++;
|
||||
dimw++;
|
||||
}
|
||||
|
||||
THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2,
|
||||
"input image smaller than kernel size"
|
||||
);
|
||||
|
||||
THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2,
|
||||
"pad should be smaller than half of kernel size"
|
||||
);
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimN];
|
||||
itime = input->size[dimt];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
if (ceilMode)
|
||||
{
|
||||
otime = (int)(ceil((float)(itime - kT + 2 * pT) / dT) + 1);
|
||||
oheight = (int)(ceil((float)(iheight - kH + 2 * pH) / dH) + 1);
|
||||
owidth = (int)(ceil((float)(iwidth - kW + 2 * pW) / dW) + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
otime = (int)(floor((float)(itime - kT + 2 * pT) / dT) + 1);
|
||||
oheight = (int)(floor((float)(iheight - kH + 2 * pH) / dH) + 1);
|
||||
owidth = (int)(floor((float)(iwidth - kW + 2 * pW) / dW) + 1);
|
||||
}
|
||||
|
||||
if (pT || pW || pH)
|
||||
{
|
||||
// ensure that the last pooling starts inside the image
|
||||
if ((otime - 1)*dT >= itime + pT)
|
||||
--otime;
|
||||
if ((oheight - 1)*dH >= iheight + pH)
|
||||
--oheight;
|
||||
if ((owidth - 1)*dW >= iwidth + pW)
|
||||
--owidth;
|
||||
}
|
||||
|
||||
/* get contiguous input */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
|
||||
if (input->nDimension == 4) /* non-batch mode */
|
||||
{
|
||||
/* resize output */
|
||||
THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
|
||||
/* indices will contain ti,i,j uchar locations packed into float/double */
|
||||
THTensor_(resize4d)(indices, nslices, otime, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
THNN_(VolumetricMaxPooling_updateOutput_frame)(
|
||||
input_data, output_data,
|
||||
indices_data,
|
||||
nslices,
|
||||
itime, iwidth, iheight,
|
||||
otime, owidth, oheight,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH
|
||||
);
|
||||
}
|
||||
else /* batch mode */
|
||||
{
|
||||
long p;
|
||||
long nBatch = input->size[0];
|
||||
|
||||
long istride = nslices * itime * iwidth * iheight;
|
||||
long ostride = nslices * otime * owidth * oheight;
|
||||
|
||||
/* resize output */
|
||||
THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
|
||||
/* indices will contain ti,i,j locations for each output point */
|
||||
THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p=0; p < nBatch; p++)
|
||||
{
|
||||
THNN_(VolumetricMaxPooling_updateOutput_frame)(
|
||||
input_data + p * istride,
|
||||
output_data + p * ostride,
|
||||
indices_data + p * ostride,
|
||||
nslices,
|
||||
itime, iwidth, iheight,
|
||||
otime, owidth, oheight,
|
||||
kT, kW, kH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
static void THNN_(VolumetricMaxPooling_updateGradInput_frame)(
|
||||
real *gradInput_p,
|
||||
real *gradOutput_p,
|
||||
real *indz_p,
|
||||
long nslices,
|
||||
long itime,
|
||||
long iwidth,
|
||||
long iheight,
|
||||
long otime,
|
||||
long owidth,
|
||||
long oheight,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
real *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight;
|
||||
real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight;
|
||||
real *indz_p_k = indz_p + k * otime * owidth * oheight;
|
||||
|
||||
/* calculate max points */
|
||||
long ti, i, j;
|
||||
for (ti = 0; ti < otime; ti++)
|
||||
{
|
||||
for (i = 0; i < oheight; i++)
|
||||
{
|
||||
for (j = 0; j < owidth; j++)
|
||||
{
|
||||
/* retrieve position of max */
|
||||
real * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j];
|
||||
long maxti = ((unsigned char*)(indzp))[0] + ti * dT - pT;
|
||||
long maxi = ((unsigned char*)(indzp))[1] + i * dH - pH;
|
||||
long maxj = ((unsigned char*)(indzp))[2] + j * dW - pW;
|
||||
|
||||
/* update gradient */
|
||||
gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] +=
|
||||
gradOutput_p_k[ti * oheight * owidth + i * owidth + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricMaxPooling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *indices,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
int nslices;
|
||||
int itime;
|
||||
int iheight;
|
||||
int iwidth;
|
||||
int otime;
|
||||
int oheight;
|
||||
int owidth;
|
||||
real *gradInput_data;
|
||||
real *gradOutput_data;
|
||||
real *indices_data;
|
||||
|
||||
int dimN = 0;
|
||||
int dimt = 1;
|
||||
int dimh = 2;
|
||||
int dimw = 3;
|
||||
|
||||
/* get contiguous gradOutput */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* resize */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
if (input->nDimension == 5)
|
||||
{
|
||||
dimN++;
|
||||
dimt++;
|
||||
dimh++;
|
||||
dimw++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimN];
|
||||
itime = input->size[dimt];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
otime = gradOutput->size[dimt];
|
||||
oheight = gradOutput->size[dimh];
|
||||
owidth = gradOutput->size[dimw];
|
||||
|
||||
/* get raw pointers */
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
/* backprop */
|
||||
if (input->nDimension == 4) /* non-batch mode*/
|
||||
{
|
||||
THNN_(VolumetricMaxPooling_updateGradInput_frame)(
|
||||
gradInput_data, gradOutput_data,
|
||||
indices_data,
|
||||
nslices,
|
||||
itime, iwidth, iheight,
|
||||
otime, owidth, oheight,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH
|
||||
);
|
||||
}
|
||||
else /* batch mode */
|
||||
{
|
||||
long p;
|
||||
long nBatch = input->size[0];
|
||||
|
||||
long istride = nslices * itime * iwidth * iheight;
|
||||
long ostride = nslices * otime * owidth * oheight;
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nBatch; p++)
|
||||
{
|
||||
THNN_(VolumetricMaxPooling_updateGradInput_frame)(
|
||||
gradInput_data + p * istride,
|
||||
gradOutput_data + p * ostride,
|
||||
indices_data + p * ostride,
|
||||
nslices,
|
||||
itime, iwidth, iheight,
|
||||
otime, owidth, oheight,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
325
torch/lib/THNN/generic/VolumetricMaxUnpooling.c
Normal file
325
torch/lib/THNN/generic/VolumetricMaxUnpooling.c
Normal file
|
|
@ -0,0 +1,325 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/VolumetricMaxUnpooling.c"
|
||||
#else
|
||||
|
||||
static void THNN_(VolumetricMaxUnpooling_updateOutput_frame)(
|
||||
real *input_p,
|
||||
real *output_p,
|
||||
real *ind_p,
|
||||
long nslices,
|
||||
long iT,
|
||||
long iW,
|
||||
long iH,
|
||||
long oT,
|
||||
long oW,
|
||||
long oH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
long ti, i, j, maxz, maxy, maxx;
|
||||
for (ti = 0; ti < iT; ti++)
|
||||
{
|
||||
for (i = 0; i < iH; i++)
|
||||
{
|
||||
for (j = 0; j < iW; j++)
|
||||
{
|
||||
long start_t = ti * dT - pT;
|
||||
long start_h = i * dH - pH;
|
||||
long start_w = j * dW - pW;
|
||||
|
||||
//real *output_p_k = output_p + k*oT*oW*oH + ti*oW*oH*dT + i*oW*dH + j*dW;
|
||||
real *input_p_k = input_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
|
||||
real *ind_p_k = ind_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
|
||||
|
||||
maxz = ((unsigned char*)(ind_p_k))[0]; /* retrieve position of max */
|
||||
maxy = ((unsigned char*)(ind_p_k))[1];
|
||||
maxx = ((unsigned char*)(ind_p_k))[2];
|
||||
|
||||
if (start_t+maxz<0 || start_h+maxy<0 || start_w+maxx<0 || start_t+maxz>=oT || start_h+maxy>=oH || start_w+maxx>=oW)
|
||||
{
|
||||
THError(
|
||||
"invalid max index z= %d, y= %d, x= %d, oT= %d, oW= %d, oH= %d",
|
||||
start_t+maxz, start_h+maxy, start_w+maxx, oT, oW, oH
|
||||
);
|
||||
}
|
||||
output_p[k*oT*oW*oH + oH*oW*(start_t+maxz) + oW*(start_h+maxy) + (start_w+maxx)] = *input_p_k; /* update output */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricMaxUnpooling_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *indices,
|
||||
int oT,
|
||||
int oW,
|
||||
int oH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
int dimw = 3;
|
||||
int dimh = 2;
|
||||
int dimt = 1;
|
||||
int nbatch = 1;
|
||||
int nslices;
|
||||
int iT;
|
||||
int iH;
|
||||
int iW;
|
||||
real *input_data;
|
||||
real *output_data;
|
||||
real *indices_data;
|
||||
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5 , 2,
|
||||
"4D or 5D (batch mode) tensor expected"
|
||||
);
|
||||
|
||||
if (!THTensor_(isSameSizeAs)(input, indices))
|
||||
{
|
||||
THError("Invalid input size w.r.t current indices size");
|
||||
}
|
||||
|
||||
if (input->nDimension == 5)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimt++;
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimt-1];
|
||||
iT = input->size[dimt];
|
||||
iH = input->size[dimh];
|
||||
iW = input->size[dimw];
|
||||
|
||||
/* get contiguous input */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
indices = THTensor_(newContiguous)(indices);
|
||||
|
||||
/* resize output */
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
THTensor_(resize4d)(output, nslices, oT, oH, oW);
|
||||
THTensor_(zero)(output);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
THNN_(VolumetricMaxUnpooling_updateOutput_frame)(
|
||||
input_data, output_data,
|
||||
indices_data,
|
||||
nslices,
|
||||
iT, iW, iH,
|
||||
oT, oW, oH,
|
||||
dT, dW, dH, pT, pW, pH
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
|
||||
THTensor_(resize5d)(output, nbatch, nslices, oT, oH, oW);
|
||||
THTensor_(zero)(output);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(VolumetricMaxUnpooling_updateOutput_frame)(
|
||||
input_data+p*nslices*iT*iW*iH,
|
||||
output_data+p*nslices*oT*oW*oH,
|
||||
indices_data+p*nslices*iT*iW*iH,
|
||||
nslices,
|
||||
iT, iW, iH,
|
||||
oT, oW, oH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(input);
|
||||
THTensor_(free)(indices);
|
||||
}
|
||||
|
||||
static void THNN_(VolumetricMaxUnpooling_updateGradInput_frame)(
|
||||
real *gradInput_p,
|
||||
real *gradOutput_p,
|
||||
real *ind_p,
|
||||
long nslices,
|
||||
long iT,
|
||||
long iW,
|
||||
long iH,
|
||||
long oT,
|
||||
long oW,
|
||||
long oH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
long k;
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < nslices; k++)
|
||||
{
|
||||
long ti, i, j, maxz, maxy, maxx;
|
||||
for (ti = 0; ti < iT; ti++)
|
||||
{
|
||||
for (i = 0; i < iH; i++)
|
||||
{
|
||||
for (j = 0; j < iW; j++)
|
||||
{
|
||||
long start_t = ti * dT - pT;
|
||||
long start_h = i * dH - pH;
|
||||
long start_w = j * dW - pW;
|
||||
|
||||
real *gradInput_p_k = gradInput_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
|
||||
//real *gradOutput_p_k = gradOutput_p + k*oT*oW*oH + ti*oW*oH*dT + i*oW*dH + j*dW;
|
||||
real *ind_p_k = ind_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
|
||||
|
||||
maxz = ((unsigned char*)(ind_p_k))[0]; /* retrieve position of max */
|
||||
maxy = ((unsigned char*)(ind_p_k))[1];
|
||||
maxx = ((unsigned char*)(ind_p_k))[2];
|
||||
|
||||
if (start_t+maxz<0 || start_h+maxy<0 || start_w+maxx<0 || start_t+maxz>=oT || start_h+maxy>=oH || start_w+maxx>=oW)
|
||||
{
|
||||
THError(
|
||||
"invalid max index z= %d, y= %d, x= %d, oT= %d, oW= %d, oH= %d",
|
||||
start_t+maxz, start_h+maxy, start_w+maxx, oT, oW, oH
|
||||
);
|
||||
}
|
||||
*gradInput_p_k = gradOutput_p[k*oT*oW*oH + oH*oW*(start_t+maxz) + oW*(start_h+maxy) + (start_w+maxx)]; /* update gradient */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricMaxUnpooling_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
THTensor *indices,
|
||||
int oT,
|
||||
int oW,
|
||||
int oH,
|
||||
int dT,
|
||||
int dW,
|
||||
int dH,
|
||||
int pT,
|
||||
int pW,
|
||||
int pH)
|
||||
{
|
||||
int dimw = 3;
|
||||
int dimh = 2;
|
||||
int dimt = 1;
|
||||
int nbatch = 1;
|
||||
int nslices;
|
||||
int iT;
|
||||
int iH;
|
||||
int iW;
|
||||
real *gradInput_data;
|
||||
real *gradOutput_data;
|
||||
real *indices_data;
|
||||
|
||||
if (!THTensor_(isSameSizeAs)(input, indices))
|
||||
{
|
||||
THError("Invalid input size w.r.t current indices size");
|
||||
}
|
||||
|
||||
/* get contiguous gradOutput */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
indices = THTensor_(newContiguous)(indices);
|
||||
|
||||
/* resize */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
if (input->nDimension == 5)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimt++;
|
||||
dimw++;
|
||||
dimh++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimt-1];
|
||||
iT = input->size[dimt];
|
||||
iH = input->size[dimh];
|
||||
iW = input->size[dimw];
|
||||
|
||||
if (oT != gradOutput->size[dimt] || oW != gradOutput->size[dimw] || oH != gradOutput->size[dimh])
|
||||
{
|
||||
THError(
|
||||
"Inconsistent gradOutput size. oT= %d, oH= %d, oW= %d, gradOutput: %dx%d",
|
||||
oT, oH, oW,gradOutput->size[dimh], gradOutput->size[dimw]
|
||||
);
|
||||
}
|
||||
|
||||
/* get raw pointers */
|
||||
gradInput_data = THTensor_(data)(gradInput);
|
||||
gradOutput_data = THTensor_(data)(gradOutput);
|
||||
indices_data = THTensor_(data)(indices);
|
||||
|
||||
/* backprop */
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
THNN_(VolumetricMaxUnpooling_updateGradInput_frame)(
|
||||
gradInput_data, gradOutput_data,
|
||||
indices_data,
|
||||
nslices,
|
||||
iT, iW, iH,
|
||||
oT, oW, oH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(VolumetricMaxUnpooling_updateGradInput_frame)(
|
||||
gradInput_data+p*nslices*iT*iW*iH,
|
||||
gradOutput_data+p*nslices*oT*oW*oH,
|
||||
indices_data+p*nslices*iT*iW*iH,
|
||||
nslices,
|
||||
iT, iW, iH,
|
||||
oT, oW, oH,
|
||||
dT, dW, dH,
|
||||
pT, pW, pH
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
THTensor_(free)(indices);
|
||||
}
|
||||
|
||||
#endif
|
||||
301
torch/lib/THNN/generic/VolumetricReplicationPadding.c
Normal file
301
torch/lib/THNN/generic/VolumetricReplicationPadding.c
Normal file
|
|
@ -0,0 +1,301 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/VolumetricReplicationPadding.c"
|
||||
#else
|
||||
|
||||
static void THNN_(VolumetricReplicationPadding_updateOutput_frame)(
|
||||
real *input_p, real *output_p,
|
||||
long nslices,
|
||||
long iwidth, long iheight, long idepth,
|
||||
long owidth, long oheight, long odepth,
|
||||
int pleft, int pright,
|
||||
int ptop, int pbottom,
|
||||
int pfront, int pback)
|
||||
{
|
||||
int iStartX = fmax(0, -pleft);
|
||||
int iStartY = fmax(0, -ptop);
|
||||
int iStartZ = fmax(0, -pfront);
|
||||
int oStartX = fmax(0, pleft);
|
||||
int oStartY = fmax(0, ptop);
|
||||
int oStartZ = fmax(0, pfront);
|
||||
|
||||
long k, ip_x, ip_y, ip_z;
|
||||
#pragma omp parallel for private(k, ip_x, ip_y, ip_z)
|
||||
for (k = 0; k < nslices; k++) {
|
||||
long i, j, z;
|
||||
for (z = 0; z < odepth; z++) {
|
||||
for (i = 0; i < oheight; i++) {
|
||||
for (j = 0; j < owidth; j++) {
|
||||
if (j < pleft) {
|
||||
ip_x = pleft;
|
||||
} else if (j >= pleft && j < iwidth + pleft) {
|
||||
ip_x = j;
|
||||
} else {
|
||||
ip_x = iwidth + pleft - 1;
|
||||
}
|
||||
ip_x = ip_x - oStartX + iStartX;
|
||||
|
||||
if (i < ptop) {
|
||||
ip_y = ptop;
|
||||
} else if (i >= ptop && i < iheight + ptop) {
|
||||
ip_y = i;
|
||||
} else {
|
||||
ip_y = iheight + ptop - 1;
|
||||
}
|
||||
ip_y = ip_y - oStartY + iStartY;
|
||||
|
||||
if (z < pfront) {
|
||||
ip_z = pfront;
|
||||
} else if (z >= pfront && z < idepth + pfront) {
|
||||
ip_z = z;
|
||||
} else {
|
||||
ip_z = idepth + pfront - 1;
|
||||
}
|
||||
ip_z = ip_z - oStartZ + iStartZ;
|
||||
|
||||
real *dest_p = output_p + k * owidth * oheight * odepth +
|
||||
z * owidth * oheight + i * owidth + j;
|
||||
real *src_p = input_p + k * iwidth * iheight * idepth +
|
||||
ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
|
||||
*dest_p = *src_p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricReplicationPadding_updateOutput)(THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int pleft, int pright,
|
||||
int ptop, int pbottom,
|
||||
int pfront, int pback)
|
||||
{
|
||||
int dimw = 3;
|
||||
int dimh = 2;
|
||||
int dimd = 1;
|
||||
int dimslices = 0;
|
||||
long nbatch = 1;
|
||||
long nslices;
|
||||
long idepth;
|
||||
long iheight;
|
||||
long iwidth;
|
||||
long odepth;
|
||||
long oheight;
|
||||
long owidth;
|
||||
real *input_data;
|
||||
real *output_data;
|
||||
|
||||
THArgCheck(input->nDimension == 4 || input->nDimension == 5,
|
||||
2, "input must be 4 or 5-dimensional");
|
||||
|
||||
if (input->nDimension == 5)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
dimd++;
|
||||
dimslices++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimslices];
|
||||
idepth = input->size[dimd];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
odepth = idepth + pfront + pback;
|
||||
oheight = iheight + ptop + pbottom;
|
||||
owidth = iwidth + pleft + pright;
|
||||
|
||||
THArgCheck(owidth >= 1 || oheight >= 1 || odepth >= 1 , 2,
|
||||
"input is too small");
|
||||
|
||||
/* get contiguous input */
|
||||
input = THTensor_(newContiguous)(input);
|
||||
|
||||
/* resize output */
|
||||
if (input->nDimension == 4)
|
||||
{
|
||||
THTensor_(resize4d)(output, nslices, odepth, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
THNN_(VolumetricReplicationPadding_updateOutput_frame)(
|
||||
input_data, output_data, nslices, iwidth, iheight, idepth,
|
||||
owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront,
|
||||
pback);
|
||||
}
|
||||
else
|
||||
{
|
||||
long p;
|
||||
|
||||
THTensor_(resize5d)(output, nbatch, nslices, odepth, oheight, owidth);
|
||||
|
||||
input_data = THTensor_(data)(input);
|
||||
output_data = THTensor_(data)(output);
|
||||
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++)
|
||||
{
|
||||
THNN_(VolumetricReplicationPadding_updateOutput_frame)(
|
||||
input_data + p * nslices * iwidth * iheight * idepth,
|
||||
output_data + p * nslices * owidth * oheight * odepth,
|
||||
nslices,
|
||||
iwidth, iheight, idepth,
|
||||
owidth, oheight, odepth,
|
||||
pleft, pright,
|
||||
ptop, pbottom,
|
||||
pfront, pback);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(input);
|
||||
}
|
||||
|
||||
static void THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
|
||||
real *ginput_p, real *goutput_p,
|
||||
long nslices,
|
||||
long iwidth, long iheight, long idepth,
|
||||
long owidth, long oheight, long odepth,
|
||||
int pleft, int pright,
|
||||
int ptop, int pbottom,
|
||||
int pfront, int pback)
|
||||
{
|
||||
int iStartX = fmax(0, -pleft);
|
||||
int iStartY = fmax(0, -ptop);
|
||||
int iStartZ = fmax(0, -pfront);
|
||||
int oStartX = fmax(0, pleft);
|
||||
int oStartY = fmax(0, ptop);
|
||||
int oStartZ = fmax(0, pfront);
|
||||
|
||||
long k, ip_x, ip_y, ip_z;
|
||||
#pragma omp parallel for private(k, ip_x, ip_y, ip_z)
|
||||
for (k = 0; k < nslices; k++) {
|
||||
long i, j, z;
|
||||
for (z = 0; z < odepth; z++) {
|
||||
for (i = 0; i < oheight; i++) {
|
||||
for (j = 0; j < owidth; j++) {
|
||||
if (j < pleft) {
|
||||
ip_x = pleft;
|
||||
} else if (j >= pleft && j < iwidth + pleft) {
|
||||
ip_x = j;
|
||||
} else {
|
||||
ip_x = iwidth + pleft - 1;
|
||||
}
|
||||
ip_x = ip_x - oStartX + iStartX;
|
||||
|
||||
if (i < ptop) {
|
||||
ip_y = ptop;
|
||||
} else if (i >= ptop && i < iheight + ptop) {
|
||||
ip_y = i;
|
||||
} else {
|
||||
ip_y = iheight + ptop - 1;
|
||||
}
|
||||
ip_y = ip_y - oStartY + iStartY;
|
||||
|
||||
if (z < pfront) {
|
||||
ip_z = pfront;
|
||||
} else if (z >= pfront && z < idepth + pfront) {
|
||||
ip_z = z;
|
||||
} else {
|
||||
ip_z = idepth + pfront - 1;
|
||||
}
|
||||
ip_z = ip_z - oStartZ + iStartZ;
|
||||
|
||||
real *src_p = goutput_p + k * owidth * oheight * odepth +
|
||||
z * owidth * oheight + i * owidth + j;
|
||||
real *dest_p = ginput_p + k * iwidth * iheight * idepth +
|
||||
ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
|
||||
*dest_p += *src_p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(VolumetricReplicationPadding_updateGradInput)(THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int pleft, int pright,
|
||||
int ptop, int pbottom,
|
||||
int pfront, int pback)
|
||||
{
|
||||
int dimw = 3;
|
||||
int dimh = 2;
|
||||
int dimd = 1;
|
||||
int dimslices = 0;
|
||||
long nbatch = 1;
|
||||
long nslices;
|
||||
long idepth;
|
||||
long iheight;
|
||||
long iwidth;
|
||||
long odepth;
|
||||
long oheight;
|
||||
long owidth;
|
||||
|
||||
if (input->nDimension == 5)
|
||||
{
|
||||
nbatch = input->size[0];
|
||||
dimw++;
|
||||
dimh++;
|
||||
dimd++;
|
||||
dimslices++;
|
||||
}
|
||||
|
||||
/* sizes */
|
||||
nslices = input->size[dimslices];
|
||||
idepth = input->size[dimd];
|
||||
iheight = input->size[dimh];
|
||||
iwidth = input->size[dimw];
|
||||
odepth = idepth + pfront + pback;
|
||||
oheight = iheight + ptop + pbottom;
|
||||
owidth = iwidth + pleft + pright;
|
||||
|
||||
THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
|
||||
"gradOutput width unexpected");
|
||||
THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
|
||||
"gradOutput height unexpected");
|
||||
THArgCheck(odepth == THTensor_(size)(gradOutput, dimd), 3,
|
||||
"gradOutput depth unexpected");
|
||||
|
||||
/* get contiguous gradOutput */
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
|
||||
/* resize */
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
/* backprop */
|
||||
if (input->nDimension == 4) {
|
||||
THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
|
||||
THTensor_(data)(gradInput),
|
||||
THTensor_(data)(gradOutput),
|
||||
nslices,
|
||||
iwidth, iheight, idepth,
|
||||
owidth, oheight, odepth,
|
||||
pleft, pright,
|
||||
ptop, pbottom,
|
||||
pfront, pback);
|
||||
} else {
|
||||
long p;
|
||||
#pragma omp parallel for private(p)
|
||||
for (p = 0; p < nbatch; p++) {
|
||||
THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
|
||||
THTensor_(data)(gradInput) + p * nslices * idepth * iheight * iwidth,
|
||||
THTensor_(data)(gradOutput) + p * nslices * odepth * oheight * owidth,
|
||||
nslices,
|
||||
iwidth, iheight, idepth,
|
||||
owidth, oheight, odepth,
|
||||
pleft, pright,
|
||||
ptop, pbottom,
|
||||
pfront, pback);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
THTensor_(free)(gradOutput);
|
||||
}
|
||||
|
||||
#endif
|
||||
158
torch/lib/THNN/generic/unfold.c
Normal file
158
torch/lib/THNN/generic/unfold.c
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "generic/unfold.c"
|
||||
#else
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
/* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */
|
||||
void THNN_(unfolded_acc)(
|
||||
THTensor *finput,
|
||||
THTensor *input,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH,
|
||||
int nInputPlane,
|
||||
int inputWidth,
|
||||
int inputHeight,
|
||||
int outputWidth,
|
||||
int outputHeight)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
LONG_PTR nip;
|
||||
#else
|
||||
size_t nip;
|
||||
#endif
|
||||
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *finput_data = THTensor_(data)(finput);
|
||||
|
||||
#pragma omp parallel for private(nip)
|
||||
for(nip = 0; nip < nInputPlane; nip++)
|
||||
{
|
||||
size_t kw, kh, y, x;
|
||||
long long ix = 0, iy = 0;
|
||||
for(kh = 0; kh < kH; kh++)
|
||||
{
|
||||
for(kw = 0; kw < kW; kw++)
|
||||
{
|
||||
real *src = finput_data + nip*(kH*kW*outputHeight*outputWidth) + kh*(kW*outputHeight*outputWidth) + kw*(outputHeight*outputWidth);
|
||||
real *dst = input_data + nip*(inputHeight*inputWidth);
|
||||
if (padW > 0 || padH > 0) {
|
||||
size_t lpad,rpad;
|
||||
for(y = 0; y < outputHeight; y++) {
|
||||
iy = (long long)(y*dH - padH + kh);
|
||||
if (iy < 0 || iy >= inputHeight) {
|
||||
} else {
|
||||
if (dW==1){
|
||||
ix = (long long)(0 - padW + kw);
|
||||
lpad = fmaxf(0,(int)(padW-kw));
|
||||
rpad = fmaxf(0,(int)(padW-(kW-kw-1)));
|
||||
THVector_(add)(dst+(size_t)(iy*inputWidth+ix+lpad), src+(size_t)(y*outputWidth+lpad), 1, outputWidth - lpad - rpad); /* note: THVector_add could handle 1 value better */
|
||||
}
|
||||
else{
|
||||
for (x=0; x<outputWidth; x++){
|
||||
ix = (long long)(x*dW - padW + kw);
|
||||
if (ix < 0 || ix >= inputWidth){
|
||||
}else
|
||||
THVector_(add)(dst+(size_t)(iy*inputWidth+ix), src+(size_t)(y*outputWidth+x), 1, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(y = 0; y < outputHeight; y++) {
|
||||
iy = (long long)(y*dH + kh);
|
||||
ix = (long long)(0 + kw);
|
||||
if (dW == 1 )
|
||||
THVector_(add)(dst+(size_t)(iy*inputWidth+ix), src+(size_t)(y*outputWidth), 1, outputWidth); /* note: THVector_add could handle 1 value better */
|
||||
else{
|
||||
for(x = 0; x < outputWidth; x++)
|
||||
THVector_(add)(dst+(size_t)(iy*inputWidth+ix+x*dW), src+(size_t)(y*outputWidth+x), 1, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void THNN_(unfolded_copy)(
|
||||
THTensor *finput,
|
||||
THTensor *input,
|
||||
int kW,
|
||||
int kH,
|
||||
int dW,
|
||||
int dH,
|
||||
int padW,
|
||||
int padH,
|
||||
int nInputPlane,
|
||||
int inputWidth,
|
||||
int inputHeight,
|
||||
int outputWidth,
|
||||
int outputHeight)
|
||||
{
|
||||
long k;
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *finput_data = THTensor_(data)(finput);
|
||||
|
||||
#pragma omp parallel for private(k)
|
||||
for(k = 0; k < nInputPlane*kH*kW; k++) {
|
||||
size_t nip = k / (kH*kW);
|
||||
size_t rest = k % (kH*kW);
|
||||
size_t kh = rest / kW;
|
||||
size_t kw = rest % kW;
|
||||
size_t x,y;
|
||||
long long ix,iy;
|
||||
real *dst = finput_data + nip*(kH*kW*outputHeight*outputWidth) + kh*(kW*outputHeight*outputWidth) + kw*(outputHeight*outputWidth);
|
||||
real *src = input_data + nip*(inputHeight*inputWidth);
|
||||
if (padW > 0 || padH > 0) {
|
||||
size_t lpad,rpad;
|
||||
for(y = 0; y < outputHeight; y++) {
|
||||
iy = (long long)(y*dH - padH + kh);
|
||||
if (iy < 0 || iy >= inputHeight) {
|
||||
memset(dst+y*outputWidth, 0, sizeof(real)*outputWidth);
|
||||
} else {
|
||||
if (dW==1){
|
||||
ix = (long long)(0 - padW + kw);
|
||||
lpad = fmaxf(0,(int)(padW-kw));
|
||||
rpad = fmaxf(0,(int)(padW-(kW-kw-1)));
|
||||
if (outputWidth-rpad-lpad <= 0) {
|
||||
memset(dst+(size_t)(y*outputWidth), 0, sizeof(real)*outputWidth);
|
||||
} else {
|
||||
if (lpad > 0) memset(dst+y*outputWidth, 0, sizeof(real)*lpad);
|
||||
memcpy(dst+(size_t)(y*outputWidth+lpad), src+(size_t)(iy*inputWidth+ix+lpad), sizeof(real)*(outputWidth-rpad-lpad));
|
||||
if (rpad > 0) memset(dst+y*outputWidth + outputWidth - rpad, 0, sizeof(real)*rpad);
|
||||
}
|
||||
}
|
||||
else{
|
||||
for (x=0; x<outputWidth; x++){
|
||||
ix = (long long)(x*dW - padW + kw);
|
||||
if (ix < 0 || ix >= inputWidth)
|
||||
memset(dst+(size_t)(y*outputWidth+x), 0, sizeof(real)*1);
|
||||
else
|
||||
memcpy(dst+(size_t)(y*outputWidth+x), src+(size_t)(iy*inputWidth+ix), sizeof(real)*(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(y = 0; y < outputHeight; y++) {
|
||||
iy = (long long)(y*dH + kh);
|
||||
ix = (long long)(0 + kw);
|
||||
if (dW == 1)
|
||||
memcpy(dst+(size_t)(y*outputWidth), src+(size_t)(iy*inputWidth+ix), sizeof(real)*outputWidth);
|
||||
else{
|
||||
for (x=0; x<outputWidth; x++)
|
||||
memcpy(dst+(size_t)(y*outputWidth+x), src+(size_t)(iy*inputWidth+ix+x*dW), sizeof(real)*(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
182
torch/lib/THNN/init.c
Normal file
182
torch/lib/THNN/init.c
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
#include "TH.h"
|
||||
#include "THNN.h"
|
||||
|
||||
#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
|
||||
#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME)
|
||||
|
||||
#include "generic/Abs.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/AbsCriterion.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/ClassNLLCriterion.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialClassNLLCriterion.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/DistKLDivCriterion.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/ELU.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/HardShrink.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/HardTanh.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/L1Cost.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/LeakyReLU.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/LogSigmoid.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/LogSoftMax.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/LookupTable.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/MSECriterion.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/MarginCriterion.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SoftMarginCriterion.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/MultiLabelMarginCriterion.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/MultiMarginCriterion.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/PReLU.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/RReLU.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/Sigmoid.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SmoothL1Criterion.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SoftMax.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SoftPlus.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SoftShrink.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SparseLinear.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/Sqrt.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/Square.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/Tanh.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/Threshold.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/TemporalConvolution.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/TemporalSubSampling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/TemporalMaxPooling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/BatchNormalization.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/unfold.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialConvolutionMap.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialConvolutionMM.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialConvolutionLocal.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialFullConvolution.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialFullConvolutionMap.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialDilatedConvolution.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialAdaptiveMaxPooling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialAveragePooling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialFractionalMaxPooling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialMaxPooling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialMaxUnpooling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialSubSampling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialUpSamplingNearest.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialUpSamplingBilinear.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/VolumetricAveragePooling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/VolumetricConvolution.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/VolumetricConvolutionMM.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/VolumetricFullConvolution.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/VolumetricDilatedConvolution.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/VolumetricMaxPooling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/VolumetricMaxUnpooling.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialReflectionPadding.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/SpatialReplicationPadding.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
|
||||
#include "generic/VolumetricReplicationPadding.c"
|
||||
#include "THGenerateFloatTypes.h"
|
||||
Loading…
Reference in New Issue
Block a user