Add 'torch/lib/THNN/' from commit '4fe7059a315d156ecd080ff7bd5b4fe3d3a9efad'

git-subtree-dir: torch/lib/THNN
git-subtree-mainline: c3f0c1e2e0
git-subtree-split: 4fe7059a31
This commit is contained in:
Adam Paszke 2016-08-04 10:58:50 -07:00
commit 035eb28e18
67 changed files with 14062 additions and 0 deletions

View File

@ -0,0 +1,65 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
CMAKE_POLICY(VERSION 2.6)
IF(NOT Torch_FOUND)
FIND_PACKAGE(Torch REQUIRED)
ENDIF()
IF(NOT THNN_INSTALL_LIB_SUBDIR)
SET(THNN_INSTALL_LIB_SUBDIR "lib" CACHE PATH "THNN install library directory")
ENDIF()
# Flags
# When using MSVC
IF(MSVC)
# we want to respect the standard, and we are bored of those **** .
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1)
ENDIF(MSVC)
IF (CMAKE_VERSION VERSION_LESS "3.1")
SET(CMAKE_C_FLAGS "-std=c99 ${CMAKE_C_FLAGS}")
ELSE ()
SET(CMAKE_C_STANDARD 99)
ENDIF ()
# OpenMP support?
SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?")
IF (APPLE AND CMAKE_COMPILER_IS_GNUCC)
EXEC_PROGRAM (uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}")
IF (DARWIN_VERSION GREATER 9)
SET(APPLE_OPENMP_SUCKS 1)
ENDIF (DARWIN_VERSION GREATER 9)
EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion
OUTPUT_VARIABLE GCC_VERSION)
IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2)
MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)")
MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unknown-pragmas")
SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE)
ENDIF ()
ENDIF ()
IF (WITH_OPENMP)
FIND_PACKAGE(OpenMP)
IF(OPENMP_FOUND)
MESSAGE(STATUS "Compiling with OpenMP support")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
ENDIF(OPENMP_FOUND)
ENDIF (WITH_OPENMP)
LINK_DIRECTORIES("${Torch_INSTALL_LIB}")
SET(src init.c)
ADD_LIBRARY(THNN MODULE init.c)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
### Torch packages supposes libraries prefix is "lib"
SET_TARGET_PROPERTIES(THNN PROPERTIES
PREFIX "lib"
IMPORT_PREFIX "lib")
TARGET_LINK_LIBRARIES(THNN TH)
INSTALL(TARGETS THNN LIBRARY DESTINATION ${THNN_INSTALL_LIB_SUBDIR})

32
torch/lib/THNN/README.md Normal file
View File

@ -0,0 +1,32 @@
# THNN
THNN is a library that gathers nn's C implementations of neural network modules. It's entirely free of Lua dependency and therefore can be used in any application that has a C FFI. Please note that it only contains quite low level functions, and an object oriented C/C++ wrapper will be created soon as another library.
There is also a CUDA counterpart of THNN (THCUNN) in the [cunn repository](https://github.com/torch/cunn/tree/master/lib/THCUNN).
## Links
* [API reference](doc/api_reference.md)
* [Style guidelines](doc/style_guidelines.md)
## Motivation
Torch's neural network package (nn) provided many optimized C implementations of modules, but the source files contained Lua specific code and headers so they couldn't be easily compiled and included anywhere else.
THNN is based on the same code, but is written in pure C, so it can be easily included in other code. **Future C implementations should be committed to THNN.**
## API
THNN is a purely functional library. It provides 2-3 functions for each module, that perform the most important operations:
* **updateOutput** - applies the module to an input
* **updateGradInput** - accepts gradient w.r.t. output and previous module input, and computes a gradient w.r.t. that input
* **accGradParameters** - *(optional, only modules with parameters)* accepts gradient w.r.t. output and previous module input, and computes gradient w.r.t. the parameters
For information on argument types please check the [API reference](doc/api_reference.md).
## Developer docs
* [Style guidelines](doc/style_guidelines.md)
This section will be expanded when FFI refactoring will be finished.

25
torch/lib/THNN/THNN.h Normal file
View File

@ -0,0 +1,25 @@
#ifndef THNN_H
#define THNN_H
#include <stdbool.h>
#include <TH.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#define THNN_(NAME) TH_CONCAT_3(THNN_, Real, NAME)
#define THIndexTensor THLongTensor
#define THIndexTensor_(NAME) THLongTensor_ ## NAME
#define THIntegerTensor THIntTensor
#define THIntegerTensor_(NAME) THIntTensor_ ## NAME
typedef long THIndex_t;
typedef int THInteger_t;
typedef void THNNState;
#include "generic/THNN.h"
#include <THGenerateFloatTypes.h>
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,106 @@
--[[
This script regenerates api_reference.md based on comments placed in THNN.h.
]]--
local header = [[
# API docs
This document only describes a THNN API. For a thorough review of all modules present here please refer to [nn's docs](http://github.com/torch/nn/tree/master/doc).
### Note on function names
Please remember, that because C doesn't support function overloading, functions taking different tensor types have different names. So e.g. for an Abs module, there are actually two updateOutput functions:
* `void THNN_FloatAbs_updateOutput(...)`
* `void THNN_DoubleAbs_updateOutput(...)`
In these docs such function will be referred to as `void THNN_Abs_updateOutput(...)`, and it's up to developer to add a type prefix. `real` is an alias for that type.
### Argument types
Some arguments have additional tags placed in square brackets:
* **[OUT]** - This is the output argument. It will be reshaped if needed.
* **[OPTIONAL]** - This argument is optional and can be safely set to NULL
* **[BUFFER]** - A buffer. `updateGradInput` and `accGradParameters` should get the same buffers that were used in `updateOutput` call.
* **[MODIFIED]** - Some functions accept an `inplace` flag. If set to true, this argument might be modified (in addition to the output).
## Module list
These are all modules implemented in THNN:
]]
local hfile = io.open('../generic/THNN.h', 'r')
local lines = hfile:read('*a'):split('\n')
hfile:close()
-- Parse input
local declarations = {}
local current_declaration
local declaration_module
for i,line in ipairs(lines) do
if line:sub(1, 6) == 'TH_API' then
current_declaration = ''
declaration_module = line:match('THNN_%((.+)_.+%)')
end
if current_declaration then
current_declaration = current_declaration .. line .. '\n'
end
if line:match('%);') then
current_declaration = current_declaration:sub(1, -2) -- remove a trailing newline
declarations[declaration_module] = declarations[declaration_module] or {}
table.insert(declarations[declaration_module], current_declaration)
current_declaration = nil
declaration_module = nil
end
end
declarations["unfolded"] = nil
-- Sort modules
modules = {}
for k,_ in pairs(declarations) do table.insert(modules, k) end
table.sort(modules)
-- Create an index
local outfile = io.open('api_reference.md', 'w')
outfile:write(header)
for i, name in ipairs(modules) do
outfile:write(string.format('* [%s](#%s)\n', name, name:lower()))
end
outfile:write('\n')
-- Write proper docs
for i,name in ipairs(modules) do
outfile:write('## ' .. name ..'\n')
for i,declaration in ipairs(declarations[name]) do
-- Write source code
outfile:write('```C' .. '\n')
local declaration_lines = declaration:split('\n')
for i, line in ipairs(declaration_lines) do
if i == 1 then
line = line:gsub('TH_API ', ''):gsub('%(', ''):gsub('%)', '') .. '(' -- remove macro junk
else
line = line:gsub('%s*//.*$', '') -- remove the comment
end
outfile:write(line .. '\n')
end
outfile:write('```' .. '\n')
-- Describe arguments
table.remove(declaration_lines, 1)
for i,line in ipairs(declaration_lines) do
local param, comment = line:match('^%s*(.*),%s*// (.*)$')
if param == nil then param, comment = line:match('^%s*(.*)%);%s*// (.*)$') end
if param ~= nil then
comment = comment:gsub('%[', '%*%*%['):gsub('%]', '%]%*%*') -- use bold font for tags
outfile:write(string.format('`%s` - %s\n<br/>\n', param, comment))
end
end
end
end
outfile:close()

View File

@ -0,0 +1,59 @@
## API design guidelines
Functions should return `void`.
All functions should accept arguments in the following order. `...` represent any module-specific parameters or buffers, disregarding whether they are used for writing or reading. Arguments in `...` below should be ordered like this:
```
[weight], [bias], [any buffers], [additional arguments], [optional arguments]
```
### Modules
```
updateOutput: state, input, output, ...
updateGradInput: state, input, gradOutput, gradInput, ...
accGradParameters: state, input, gradOutput, [gradWeight], [gradBias], ...
```
e.g.
```C
void THNN_(HardShrink_updateGradInput)(
THNNState* state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
real lambda)
```
### Criterions
```
updateOutput: state, input, target, output, ...
updateGradInput: state, input, target, gradInput, ...
```
e.g.
```C
void THNN_(ClassNLLCriterion_updateOutput)(
THNNState* state,
THTensor *input,
THLongTensor *target,
THTensor *output,
THTensor *weights,
THTensor *total_weight,
bool sizeAverage)
```
## Code style guide
```C
void THNN_Linear_updateOutput(
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias);
//<- 10 ->
```
All arguments should start on a new line after function name, and they should be indented using 10 spaces.
Use 2 spaces for block indentation.

View File

@ -0,0 +1,27 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/Abs.c"
#else
void THNN_(Abs_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output)
{
THTensor_(resizeAs)(output, input);
THTensor_(abs)(output, input);
}
void THNN_(Abs_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput)
{
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
real z = *input_data;
*gradInput_data = *gradOutput_data * (z >= 0 ? 1 : -1);
);
}
#endif

View File

@ -0,0 +1,39 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/AbsCriterion.c"
#else
void THNN_(AbsCriterion_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *output,
bool sizeAverage)
{
real sum = 0;
TH_TENSOR_APPLY2(real, input, real, target,
sum += fabs(*input_data - *target_data);
);
if (sizeAverage)
sum /= THTensor_(nElement)(input);
THTensor_(set1d)(output, 0, sum);
}
void THNN_(AbsCriterion_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *gradInput,
bool sizeAverage)
{
real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
*gradInput_data = (*input_data - *target_data) >= 0 ? norm : -norm;
);
}
#endif

View File

@ -0,0 +1,144 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/BatchNormalization.c"
#else
void THNN_(BatchNormalization_updateOutput)(
THNNState *state, THTensor *input, THTensor *output,
THTensor *weight, THTensor *bias,
THTensor *running_mean, THTensor *running_var,
THTensor *save_mean, THTensor *save_std,
bool train, double momentum, double eps)
{
long nInput = THTensor_(size)(input, 1);
long f,n = THTensor_(nElement)(input) / nInput;
#pragma omp parallel for
for (f = 0; f < nInput; ++f) {
THTensor *in = THTensor_(newSelect)(input, 1, f);
THTensor *out = THTensor_(newSelect)(output, 1, f);
real mean, invstd;
if (train) {
// compute mean per input
accreal sum = 0;
TH_TENSOR_APPLY(real, in, sum += *in_data;);
mean = (real) sum / n;
THTensor_(set1d)(save_mean, f, (real) mean);
// compute variance per input
sum = 0;
TH_TENSOR_APPLY(real, in,
sum += (*in_data - mean) * (*in_data - mean););
if (sum == 0 && eps == 0.0) {
invstd = 0;
} else {
invstd = (real) (1 / sqrt(sum/n + eps));
}
THTensor_(set1d)(save_std, f, (real) invstd);
// update running averages
THTensor_(set1d)(running_mean, f,
(real) (momentum * mean + (1 - momentum) * THTensor_(get1d)(running_mean, f)));
accreal unbiased_var = sum / (n - 1);
THTensor_(set1d)(running_var, f,
(real) (momentum * unbiased_var + (1 - momentum) * THTensor_(get1d)(running_var, f)));
} else {
mean = THTensor_(get1d)(running_mean, f);
invstd = 1 / sqrt(THTensor_(get1d)(running_var, f) + eps);
}
// compute output
real w = weight ? THTensor_(get1d)(weight, f) : 1;
real b = bias ? THTensor_(get1d)(bias, f) : 0;
TH_TENSOR_APPLY2(real, in, real, out,
*out_data = (real) (((*in_data - mean) * invstd) * w + b););
THTensor_(free)(out);
THTensor_(free)(in);
}
}
void THNN_(BatchNormalization_backward)(
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput,
THTensor *gradWeight, THTensor *gradBias, THTensor *weight,
THTensor *running_mean, THTensor *running_var,
THTensor *save_mean, THTensor *save_std,
bool train, double scale, double eps)
{
long nInput = THTensor_(size)(input, 1);
long f,n = THTensor_(nElement)(input) / nInput;
#pragma omp parallel for
for (f = 0; f < nInput; ++f) {
THTensor *in = THTensor_(newSelect)(input, 1, f);
THTensor *gradOut = THTensor_(newSelect)(gradOutput, 1, f);
real w = weight ? THTensor_(get1d)(weight, f) : 1;
real mean, invstd;
if (train) {
mean = THTensor_(get1d)(save_mean, f);
invstd = THTensor_(get1d)(save_std, f);
} else {
mean = THTensor_(get1d)(running_mean, f);
invstd = 1 / sqrt(THTensor_(get1d)(running_var, f) + eps);
}
// sum over all gradOutput in feature plane
accreal sum = 0;
TH_TENSOR_APPLY(real, gradOut, sum += *gradOut_data;);
// dot product of the Q(X) and gradOuput
accreal dotp = 0;
TH_TENSOR_APPLY2(real, in, real, gradOut,
dotp += (*in_data - mean) * (*gradOut_data););
if (gradInput) {
THTensor *gradIn = THTensor_(newSelect)(gradInput, 1, f);
if (train) {
// when in training mode
// Q(X) = X - E[x] ; i.e. input centered to zero mean
// Y = Q(X) / σ ; i.e. BN output before weight and bias
// dL/dX = (Q(dL/dY) - dot(Y, dL/dY) * Y) / σ * w
// projection of gradOutput on to output scaled by std
real k = (real) dotp * invstd * invstd / n;
TH_TENSOR_APPLY2(real, gradIn, real, in,
*gradIn_data = (*in_data - mean) * k;);
accreal gradMean = sum / n;
TH_TENSOR_APPLY2(real, gradIn, real, gradOut,
*gradIn_data = (*gradOut_data - gradMean - *gradIn_data) * invstd * w;);
} else {
// when in evaluation mode
// Q(X) = X - running_mean ; i.e. input centered to zero mean
// Y = Q(X) / running_std ; i.e. BN output before weight and bias
// dL/dX = w / running_std
TH_TENSOR_APPLY2(real, gradIn, real, gradOut,
*gradIn_data = *gradOut_data * invstd * w;);
}
THTensor_(free)(gradIn);
}
if (gradWeight) {
real val = THTensor_(get1d)(gradWeight, f);
THTensor_(set1d)(gradWeight, f, val + scale * dotp * invstd);
}
if (gradBias) {
real val = THTensor_(get1d)(gradBias, f);
THTensor_(set1d)(gradBias, f, val + scale * sum);
}
THTensor_(free)(gradOut);
THTensor_(free)(in);
}
}
#endif

View File

@ -0,0 +1,147 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/ClassNLLCriterion.c"
#else
void THNN_(ClassNLLCriterion_updateOutput)(
THNNState *state,
THTensor *input,
THIndexTensor *target,
THTensor *output,
bool sizeAverage,
THTensor *weights,
THTensor *total_weight)
{
int n_dims = THTensor_(nDimension)(input);
int n_classes = THTensor_(size)(input, n_dims - 1);
if (THIndexTensor_(nDimension)(target) > 1) {
THError("multi-target not supported");
}
if (THTensor_(nDimension)(input) > 2) {
THError("input tensor should be 1D or 2D");
}
if (weights && THTensor_(nElement)(weights) != n_classes) {
THError("weight tensor should be defined either for all or no classes");
}
input = THTensor_(newContiguous)(input);
target = THIndexTensor_(newContiguous)(target);
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
real *input_data = THTensor_(data)(input);
THIndex_t *target_data = THIndexTensor_(data)(target);
real *weights_data = weights ? THTensor_(data)(weights) : NULL;
real *output_data = THTensor_(data)(output);
real *total_weight_data = THTensor_(data)(total_weight);
output_data[0] = total_weight_data[0] = 0.0;
if (THTensor_(nDimension)(input) == 1) {
int cur_target = target_data[0] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
total_weight_data[0] = weights ? weights_data[cur_target] : 1.0f;
output_data[0] = -input_data[cur_target] * total_weight_data[0];
} else if (THTensor_(nDimension)(input) == 2) {
int batch_size = THTensor_(size)(input, 0);
THAssert(THIndexTensor_(size)(target, 0) == batch_size);
int n_target = THTensor_(size)(input, 1);
int i;
for (i = 0; i < batch_size; i++) {
int cur_target = target_data[i] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
real cur_weight = weights ? weights_data[cur_target] : 1.0f;
total_weight_data[0] += cur_weight;
output_data[0] -= input_data[i * n_target + cur_target] * cur_weight;
}
}
if (sizeAverage && total_weight_data[0]) {
output_data[0] /= total_weight_data[0];
}
if (weights) {
THTensor_(free)(weights);
}
THTensor_(free)(input);
THIndexTensor_(free)(target);
}
void THNN_(ClassNLLCriterion_updateGradInput)(
THNNState *state,
THTensor *input,
THIndexTensor *target,
THTensor *gradInput,
bool sizeAverage,
THTensor *weights,
THTensor *total_weight)
{
int n_dims = THTensor_(nDimension)(input);
int n_classes = THTensor_(size)(input, n_dims - 1);
if (!THTensor_(isContiguous)(gradInput)) {
THError("gradInput must be contiguous");
}
real *total_weight_data = THTensor_(data)(total_weight);
if (!(*total_weight_data > 0)) {
return;
}
if (THIndexTensor_(nDimension)(target) > 1) {
THError("multi-target not supported");
}
if (THTensor_(nDimension)(input) > 2) {
THError("input tensor should be 1D or 2D");
}
if (weights && THTensor_(nElement)(weights) != n_classes) {
THError("weight tensor should be defined either for all or no classes");
}
target = THIndexTensor_(newContiguous)(target);
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
THIndex_t *target_data = THIndexTensor_(data)(target);
real *weights_data = weights ? THTensor_(data)(weights) : NULL;
real *gradInput_data = THTensor_(data)(gradInput);
if (THTensor_(nDimension)(input) == 1) {
int cur_target = target_data[0] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
gradInput_data[cur_target] =
(!sizeAverage && weights) ? -weights_data[cur_target] : -1;
} else if (THTensor_(nDimension)(input) == 2) {
int batch_size = THTensor_(size)(input, 0);
THAssert(THIndexTensor_(size)(target, 0) == batch_size);
int n_target = THTensor_(size)(input, 1);
int i;
for (i = 0; i < batch_size; i++){
int cur_target = target_data[i] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
gradInput_data[i * n_target + cur_target] =
-(weights ? weights_data[cur_target] : 1.0f);
if (sizeAverage && *total_weight_data) {
gradInput_data[i * n_target + cur_target] /= *total_weight_data;
}
}
}
THIndexTensor_(free)(target);
if (weights) {
THTensor_(free)(weights);
}
}
#endif

View File

@ -0,0 +1,39 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/DistKLDivCriterion.c"
#else
void THNN_(DistKLDivCriterion_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *output,
bool sizeAverage)
{
real sum = 0;
TH_TENSOR_APPLY2(real, input, real, target,
sum += *target_data > 0 ? *target_data * (log(*target_data) - *input_data) : 0;
);
if (sizeAverage)
sum /= THTensor_(nElement)(input);
THTensor_(set1d)(output, 0, sum);
}
void THNN_(DistKLDivCriterion_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *gradInput,
bool sizeAverage)
{
real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
*gradInput_data = *target_data > 0 ? norm * (-*target_data) : 0;
);
}
#endif

View File

@ -0,0 +1,51 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/ELU.c"
#else
void THNN_(ELU_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
real alpha,
bool inplace)
{
if(inplace) {
TH_TENSOR_APPLY(real, input,
if(*input_data <= 0) {
*input_data = (exp(*input_data) - 1) * alpha;
}
);
THTensor_(set)(output, input);
} else {
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, input, real, output,
*output_data = *input_data <= 0 ? (exp(*input_data)-1)*alpha : *input_data;
);
}
}
void THNN_(ELU_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *output,
real alpha,
bool inplace)
{
if(inplace) {
TH_TENSOR_APPLY2(real, gradOutput, real, output,
if(*output_data <= 0) {
*gradOutput_data *= *output_data + alpha;
}
);
THTensor_(set)(gradInput, gradOutput);
} else {
THTensor_(resizeAs)(gradInput, output);
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
*gradInput_data = *output_data <= 0 ? *gradOutput_data * (*output_data + alpha) : *gradOutput_data;
);
}
}
#endif

View File

@ -0,0 +1,39 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/HardShrink.c"
#else
void THNN_(HardShrink_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
real lambda)
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, output, real, input,
if (*input_data > lambda)
*output_data = *input_data;
else if (*input_data < -lambda)
*output_data = *input_data;
else
*output_data = 0;
);
}
void THNN_(HardShrink_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
real lambda)
{
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
if (*input_data > lambda || *input_data < -lambda)
*gradInput_data = *gradOutput_data;
else
*gradInput_data = 0;
);
}
#endif

View File

@ -0,0 +1,127 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/HardTanh.c"
#else
void THNN_(HardTanh_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
real min_val,
real max_val,
bool inplace)
{
if (inplace)
THTensor_(set)(output, input);
else
THTensor_(resizeAs)(output, input);
if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output))
{
if (inplace)
TH_TENSOR_APPLY(real, input,
if (*input_data < min_val)
*input_data = min_val;
else if (*input_data > max_val)
*input_data = max_val;
);
TH_TENSOR_APPLY2(real, output, real, input,
if (*input_data < min_val)
*output_data = min_val;
else if (*input_data <= max_val)
*output_data = *input_data;
else
*output_data = max_val;
);
}
else
{
real* ptr_input = THTensor_(data)(input);
real* ptr_output = THTensor_(data)(output);
long i;
long n = THTensor_(nElement)(input);
if (inplace)
#pragma omp parallel for private(i)
for (i = 0; i < n; i++)
{
if (ptr_input[i] < min_val)
ptr_input[i] = min_val;
else if (ptr_input[i] > max_val)
ptr_input[i] = max_val;
}
else
#pragma omp parallel for private(i)
for (i = 0; i < n; i++)
{
if (ptr_input[i] < min_val)
ptr_output[i] = min_val;
else if (ptr_input[i] <= max_val)
ptr_output[i] = ptr_input[i];
else
ptr_output[i] = max_val;
}
}
}
void THNN_(HardTanh_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
real min_val,
real max_val,
bool inplace)
{
if (inplace)
THTensor_(set)(gradInput, gradOutput);
else
THTensor_(resizeAs)(gradInput, input);
if (input->nDimension == 1 ||
!THTensor_(isContiguous)(input) ||
!THTensor_(isContiguous)(gradOutput) ||
!THTensor_(isContiguous)(gradInput))
{
if (inplace)
{
TH_TENSOR_APPLY2(real, gradOutput, real, input,
if (*input_data < min_val || *input_data > max_val)
*gradOutput_data = 0;
);
}
else
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
if (*input_data < min_val || *input_data > max_val)
*gradInput_data = 0;
else
*gradInput_data = *gradOutput_data;
);
}
else
{
real* ptr_gradOutput = THTensor_(data)(gradOutput);
real* ptr_gradInput = THTensor_(data)(gradInput);
real* ptr_input = THTensor_(data)(input);
long i;
long n = THTensor_(nElement)(input);
if (inplace)
#pragma omp parallel for private(i)
for (i = 0; i < n; i++)
{
if (ptr_input[i] <= min_val || ptr_input[i] >= max_val)
ptr_gradInput[i] = 0;
}
else
#pragma omp parallel for private(i)
for (i = 0; i < n; i++)
{
if (ptr_input[i] < min_val || ptr_input[i] > max_val)
ptr_gradInput[i] = 0;
else
ptr_gradInput[i] = ptr_gradOutput[i];
}
}
}
#endif

View File

@ -0,0 +1,36 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/L1Cost.c"
#else
void THNN_(L1Cost_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output)
{
accreal sum = 0;
TH_TENSOR_APPLY(real, input,
sum += fabs(*input_data);
);
THTensor_(set1d)(output, 0, sum);
}
void THNN_(L1Cost_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput)
{
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY2(real, gradInput, real, input,
if (*input_data > 0)
*gradInput_data = 1;
else if (*input_data < 0)
*gradInput_data = -1;
else
*gradInput_data = 0;
);
}
#endif

View File

@ -0,0 +1,54 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/LeakyReLU.c"
#else
void THNN_(LeakyReLU_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
real negval,
bool inplace)
{
if (inplace)
{
TH_TENSOR_APPLY(real, input,
if (*input_data <= 0)
*input_data *= negval;
);
THTensor_(set)(output, input);
}
else
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, output, real, input,
*output_data = *input_data > 0 ? *input_data : *input_data * negval;
);
}
}
void THNN_(LeakyReLU_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
real negval,
bool inplace)
{
if (inplace)
{
TH_TENSOR_APPLY2(real, gradOutput, real, input,
if (*input_data <= 0)
*gradOutput_data *= negval;
);
THTensor_(set)(gradInput, gradOutput);
}
else
{
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
*gradInput_data = *input_data > 0 ? *gradOutput_data : *gradOutput_data * negval;
);
}
}
#endif

View File

@ -0,0 +1,35 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/LogSigmoid.c"
#else
void THNN_(LogSigmoid_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *buffer)
{
THTensor_(resizeAs)(output, input);
THTensor_(resizeAs)(buffer, input);
TH_TENSOR_APPLY3(real, output, real, input, real, buffer,
real z = exp(-*input_data);
*buffer_data = z;
*output_data = -log(1. + z);
);
}
void THNN_(LogSigmoid_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *buffer)
{
THTensor_(resizeAs)(gradInput, buffer);
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, buffer,
real z = *buffer_data;
*gradInput_data = *gradOutput_data * z / (1. + z);
);
}
#endif

View File

@ -0,0 +1,110 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/LogSoftMax.c"
#else
void THNN_(LogSoftMax_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output)
{
real *input_data, *output_data;
long nframe = 0, dim = 0;
long t, d;
if (input->nDimension == 1)
{
nframe = 1;
dim = input->size[0];
}
else if (input->nDimension == 2)
{
nframe = input->size[0];
dim = input->size[1];
}
else
{
THArgCheck(0, 2, "vector or matrix expected");
}
input = THTensor_(newContiguous)(input);
THTensor_(resizeAs)(output, input);
real *input_data0 = THTensor_(data)(input);
real *output_data0 = THTensor_(data)(output);
accreal logsum;
real maxInput;
#pragma omp parallel for private(t, d, maxInput, logsum, input_data, output_data)
for (t = 0; t < nframe; t++)
{
logsum = 0;
maxInput = -THInf;
input_data = input_data0 + dim*t;
output_data = output_data0 + dim*t;
for (d = 0; d < dim; d++)
maxInput = THMax(maxInput, input_data[d]);
for (d = 0; d < dim; d++)
logsum += exp(input_data[d] - maxInput);
logsum = maxInput + log(logsum);
for (d = 0; d < dim; d++)
output_data[d] = input_data[d] - logsum;
}
THTensor_(free)(input);
}
void THNN_(LogSoftMax_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *output)
{
gradOutput = THTensor_(newContiguous)(gradOutput);
real *gradInput_data, *gradOutput_data, *output_data;
long nframe = 0, dim = 0;
long t, d;
if (output->nDimension == 1)
{
nframe = 1;
dim = output->size[0];
}
else if (output->nDimension == 2)
{
nframe = output->size[0];
dim = output->size[1];
}
else
{
THError("vector or matrix expected");
}
THTensor_(resizeAs)(gradInput, output);
real *gradInput_data0 = THTensor_(data)(gradInput);
real *output_data0 = THTensor_(data)(output);
real *gradOutput_data0 = THTensor_(data)(gradOutput);
accreal sum;
#pragma omp parallel for private(t, sum, d, gradInput_data, output_data, gradOutput_data)
for (t = 0; t < nframe; t++)
{
sum = 0;
gradInput_data = gradInput_data0 + dim*t;
output_data = output_data0 + dim*t;
gradOutput_data = gradOutput_data0 + dim*t;
for (d = 0; d < dim; d++)
sum += gradOutput_data[d];
for (d = 0; d < dim; d++)
gradInput_data[d] = gradOutput_data[d] - exp(output_data[d])*sum;
}
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,213 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/LookupTable.c"
#else
static void THNN_(LookupTable_resetCount)(
THInteger_t *count_data,
THIndexTensor *input)
{
int i;
THIndex_t *input_data = THIndexTensor_(data)(input);
long numel = THIndexTensor_(nElement)(input);
for (i = 0; i<numel; i++)
{
long k = input_data[i] - TH_INDEX_BASE;
count_data[k] = 0;
}
for (i = 0; i<numel; i++)
{
long k = input_data[i] - TH_INDEX_BASE;
count_data[k]++;
}
}
void THNN_(LookupTable_accGradParameters)(
THNNState *state,
THIndexTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THIntegerTensor *count,
THTensor *sorted,
THTensor *indices,
bool scaleGradByFreq,
int paddingValue,
real scale)
{
long i;
THInteger_t *count_data = NULL;
if (scaleGradByFreq)
{
THIntegerTensor_(resize1d)(count, gradWeight->size[0]);
count_data = THIntegerTensor_(data)(count);
}
if (!THTensor_(isContiguous)(gradWeight))
THError("gradWeight must be contiguous");
if (!THIndexTensor_(isContiguous)(input))
THError("input must be contiguous");
if (THIndexTensor_(nDimension)(input) != 1 && THIndexTensor_(nDimension)(input) != 2)
THError("input must be a vector or matrix");
THIndex_t *input_data = THIndexTensor_(data)(input);
long numel = THIndexTensor_(nElement)(input);
long numw = THTensor_(size)(gradWeight, 0);
// check that inputs are all within range
for (i=0; i<numel; i++)
if (input_data[i] < TH_INDEX_BASE || input_data[i] >= numw + TH_INDEX_BASE)
THError("input out of range");
gradOutput = THTensor_(newContiguous)(gradOutput);
real *gw = THTensor_(data)(gradWeight);
real *go = THTensor_(data)(gradOutput);
long stride = THTensor_(stride)(gradWeight, 0);
if (count_data)
THNN_(LookupTable_resetCount)(count_data, input);
#ifdef _OPENMP
if (numel > 1000)
{
// The strategy is to parallelize over sections of the vocabulary, so that
// thread 1 handles updates to gradWeight[0..nVocab/nThreads]. Every thread
// has to traverse the entire input, but the dominating factor is the axpy
// BLAS call.
#pragma omp parallel private(i)
{
int tid = omp_get_thread_num();
int nthreads = omp_get_num_threads();
long start = tid * (numw/nthreads + 1);
long end = start + (numw/nthreads + 1);
for (i=0; i<numel; i++)
{
if (input_data[i] != paddingValue)
{
long k = input_data[i] - TH_INDEX_BASE;
if (k >= start && k < end)
{
real scale_ = scale;
if (count_data) scale_ /= count_data[k];
THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1);
}
}
}
}
THTensor_(free)(gradOutput);
return;
}
#endif
for (i=0; i<numel; i++)
{
if (input_data[i] != paddingValue)
{
long k = input_data[i] - TH_INDEX_BASE;
real scale_ = scale;
if (count_data) scale_ /= count_data[k];
THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1);
}
}
THTensor_(free)(gradOutput);
}
/*
* Keep the norm of weight smaller than maxNorm
*/
static void THNN_(LookupTable_renormRow)(
real *row_data,
long stride,
real maxNorm,
real normType)
{
real norm = 0;
real new_norm;
long j;
for (j=0; j<stride; j++)
{
if (normType == 1) {
norm += fabs(row_data[j]);
} else if (normType == 2) {
norm += row_data[j] * row_data[j];
} else {
norm += pow(fabs(row_data[j]), normType);
}
}
norm = pow(norm, 1.0 / normType);
if (norm > maxNorm)
{
new_norm = maxNorm / (norm + 1e-7);
for (j=0; j<stride; j++) {
row_data[j] *= new_norm;
}
}
}
static int THNN_(compare_THIndex)(const void* a, const void* b)
{
return *(const THIndex_t*)a < *(const THIndex_t*)b ? -1 : 1;
}
void THNN_(LookupTable_renorm)(
THNNState *state,
THIndexTensor *idx,
THTensor *weight,
real maxNorm,
real normType)
{
if (!THTensor_(isContiguous)(weight))
THError("weight must be contiguous");
if (!THIndexTensor_(isContiguous)(idx))
THError("input must be contiguous");
if (THIndexTensor_(nDimension)(idx) != 1)
THError("idx must be a vector");
if (normType <= 0)
THError("non-positive-norm not supported");
long i;
THIndex_t *row_idx = THIndexTensor_(data)(idx);
long numel = THIndexTensor_(nElement)(idx);
long numw = THTensor_(size)(weight, 0);
long stride = THTensor_(stride)(weight, 0);
real *gw = THTensor_(data)(weight);
for (i=0; i<numel; i++)
if (row_idx[i] < TH_INDEX_BASE || row_idx[i] >= numw + TH_INDEX_BASE)
THError("input out of range");
// get unique indices
qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex));
long ptr = 0;
for (i=0; i<numel; i++)
if (i == 0 || row_idx[i] != row_idx[i-1])
row_idx[ptr++] = row_idx[i];
numel = ptr;
#ifdef _OPENMP
if (numel > 1000)
{
// The strategy is to parallelize over the rows that appear in
// row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads].
// This distributes the work evenly to each thread.
#pragma omp parallel for private(i)
for (i=0; i<numel; i++)
{
long k = row_idx[i] - TH_INDEX_BASE;
THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
}
return;
}
#endif
for (i=0; i<numel; i++)
{
long k = row_idx[i] - TH_INDEX_BASE;
THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
}
}
#endif

View File

@ -0,0 +1,40 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/MSECriterion.c"
#else
void THNN_(MSECriterion_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *output,
bool sizeAverage)
{
real sum = 0;
TH_TENSOR_APPLY2(real, input, real, target,
real z = (*input_data - *target_data);
sum += z*z;
);
if (sizeAverage)
sum /= THTensor_(nElement)(input);
THTensor_(set1d)(output, 0, sum);
}
void THNN_(MSECriterion_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *gradInput,
bool sizeAverage)
{
real norm = (sizeAverage ? 2./((real)THTensor_(nElement)(input)) : 2.);
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
*gradInput_data = norm * (*input_data - *target_data);
);
}
#endif

View File

@ -0,0 +1,42 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/MarginCriterion.c"
#else
void THNN_(MarginCriterion_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *output,
bool sizeAverage,
real margin)
{
real sum = 0;
TH_TENSOR_APPLY2(real, input, real, target,
real z = (margin - *input_data * *target_data);
sum += z>0 ? z : 0;
);
if (sizeAverage)
sum /= THTensor_(nElement)(input);
THTensor_(set1d)(output, 0, sum);
}
void THNN_(MarginCriterion_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *gradInput,
bool sizeAverage,
real margin)
{
real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
*gradInput_data = (*input_data * *target_data) < margin ? -norm * *target_data : 0;
);
}
#endif

View File

@ -0,0 +1,174 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/MultiLabelMarginCriterion.c"
#else
void THNN_(MultiLabelMarginCriterion_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *output,
THTensor *isTarget,
bool sizeAverage)
{
real *input_data, *target_data, *isTarget_data;
long nframe, dim;
long t, d, dt, ddt;
real sum;
THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
if (input->nDimension == 1)
{
nframe = 1;
dim = input->size[0];
THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size");
}
else
{
nframe = input->size[0];
dim = input->size[1];
THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size");
}
THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range");
THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range");
target = THTensor_(newContiguous)(target);
input = THTensor_(newContiguous)(input);
input_data = THTensor_(data)(input);
target_data = THTensor_(data)(target);
THTensor_(resizeAs)(isTarget, target);
THTensor_(zero)(isTarget);
isTarget_data = THTensor_(data)(isTarget);
sum = 0;
for (t = 0; t < nframe; t++)
{
for (ddt = 0; ddt < dim; ddt++)
{
long target_idx = (long)target_data[ddt] - TH_INDEX_BASE;
if (target_idx < 0)
break;
isTarget_data[target_idx] = 1;
}
for (dt = 0; dt < dim; dt++)
{
long target_idx = (long)target_data[dt] - TH_INDEX_BASE;
real input_target;
if (target_idx < 0)
break;
input_target = input_data[target_idx];
for (d = 0; d < dim; d++)
{
if (!isTarget_data[d])
{
real z = 1 - input_target + input_data[d];
if (z > 0)
sum += z;
}
}
}
input_data += dim;
target_data += dim;
isTarget_data += dim;
}
sum /= dim;
if (sizeAverage)
sum /= nframe;
THTensor_(set1d)(output, 0, sum);
THTensor_(free)(input);
THTensor_(free)(target);
}
void THNN_(MultiLabelMarginCriterion_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *gradInput,
THTensor *isTarget,
bool sizeAverage)
{
real *input_data;
real *gradInput_data;
real *target_data;
real *isTarget_data;
long nframe, dim;
long t, d, dt;
real g;
THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
if (input->nDimension == 1)
{
nframe = 1;
dim = input->size[0];
THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size");
THArgCheck((isTarget->nDimension == 1) && (isTarget->size[0] == dim), 3, "inconsistent isTarget size");
}
else
{
nframe = input->size[0];
dim = input->size[1];
THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size");
THArgCheck((isTarget->nDimension == 2) && (isTarget->size[0] == nframe) && (isTarget->size[1] == dim), 3, "inconsistent isTarget size");
}
THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range");
THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range");
THArgCheck(THTensor_(minall)(isTarget) >= 0, 3, "isTarget out of range");
THArgCheck(THTensor_(maxall)(isTarget) <= 1, 3, "isTarget out of range");
target = THTensor_(newContiguous)(target);
input = THTensor_(newContiguous)(input);
isTarget = THTensor_(newContiguous)(isTarget);
input_data = THTensor_(data)(input);
target_data = THTensor_(data)(target);
isTarget_data = THTensor_(data)(isTarget);
g = sizeAverage ? ( 1./((real)(nframe*dim)) ) : ( 1./((real)dim) );
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
gradInput_data = THTensor_(data)(gradInput);
for (t = 0; t < nframe; t++)
{
for (dt = 0; dt < dim; dt++)
{
long target_idx = (long)target_data[dt] - TH_INDEX_BASE;
real input_target;
if (target_idx < 0)
break;
input_target = input_data[target_idx];
for (d = 0; d < dim; d++)
{
if (!isTarget_data[d])
{
real z = 1 - input_target + input_data[d];
if (z > 0)
{
gradInput_data[target_idx] -= g;
gradInput_data[d] += g;
}
}
}
}
input_data += dim;
target_data += dim;
isTarget_data += dim;
gradInput_data += dim;
}
THTensor_(free)(input);
THTensor_(free)(target);
THTensor_(free)(isTarget);
}
#endif

View File

@ -0,0 +1,159 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/MultiMarginCriterion.c"
#else
void THNN_(MultiMarginCriterion_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *output,
bool sizeAverage,
int p,
THTensor *weights,
real margin)
{
real *input_data, *target_data, *weights_data;
long nframe, dim;
long t, d;
real sum;
THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
if (input->nDimension == 1)
{
nframe = 1;
dim = input->size[0];
}
else
{
nframe = input->size[0];
dim = input->size[1];
THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size");
}
for (t = 0; t < nframe; t++)
{
real idx = THTensor_(get1d)(target, t);
THArgCheck((idx >= TH_INDEX_BASE) && (idx < dim + TH_INDEX_BASE), 3, "target out of range");
}
input = THTensor_(newContiguous)(input);
target = THTensor_(newContiguous)(target);
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
input_data = THTensor_(data)(input);
target_data = THTensor_(data)(target);
weights_data = weights ? THTensor_(data)(weights) : NULL;
sum = 0;
for (t = 0; t < nframe; t++)
{
long target_idx = (long)(target_data[t] - TH_INDEX_BASE);
real input_target = input_data[target_idx];
for (d = 0; d < dim; d++)
{
real z = margin - input_target + input_data[d];
if (d == target_idx)
continue;
if (z > 0) {
real h = (p==1) ? z : z*z;
if(weights_data)
h *= weights_data[target_idx];
sum += h;
}
}
input_data += dim;
}
sum /= dim;
if(sizeAverage)
sum /= nframe;
THTensor_(set1d)(output, 0, sum);
THTensor_(free)(input);
THTensor_(free)(target);
if(weights)
THTensor_(free)(weights);
}
void THNN_(MultiMarginCriterion_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *gradInput,
bool sizeAverage,
int p,
THTensor *weights,
real margin)
{
real *input_data;
real *gradInput_data;
real *target_data;
real *weights_data;
long nframe, dim;
long t, d;
real g;
THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
if (input->nDimension == 1)
{
nframe = 1;
dim = input->size[0];
}
else
{
nframe = input->size[0];
dim = input->size[1];
THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size");
}
g = (sizeAverage ? 1./((real)(nframe*dim)) : 1./((real)dim));
input = THTensor_(newContiguous)(input);
target = THTensor_(newContiguous)(target);
input_data = THTensor_(data)(input);
THTensor_(resizeAs)(gradInput, input);
gradInput_data = THTensor_(data)(gradInput);
target_data = THTensor_(data)(target);
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
weights_data = weights ? THTensor_(data)(weights) : NULL;
for (t = 0; t < nframe; t++)
{
long target_idx = (long)(target_data[t]) - TH_INDEX_BASE;
real input_target = input_data[target_idx];
real gradInput_target = 0;
for (d = 0; d < dim; d++)
{
real z = margin - input_target + input_data[d];
if (d == target_idx)
continue;
if (z > 0)
{
real h = (p == 1) ? g : 2*g*z;
if(weights_data)
h *= weights_data[target_idx];
gradInput_target -= h;
gradInput_data[d] = h;
}
else
gradInput_data[d] = 0;
}
gradInput_data[target_idx] = gradInput_target;
input_data += dim;
gradInput_data += dim;
}
THTensor_(free)(input);
THTensor_(free)(target);
if(weights)
THTensor_(free)(weights);
}
#endif

View File

@ -0,0 +1,228 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/PReLU.c"
#else
void THNN_(PReLU_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THIndex_t nOutputPlane)
{
THTensor_(resizeAs)(output, input);
if (nOutputPlane == 0)
{
// handle shared parameter case
real w = *THTensor_(data)(weight);
TH_TENSOR_APPLY2(real, output, real, input,
*output_data = (*input_data > 0) ? *input_data : w*(*input_data);
);
}
else
{
long bs, ks;
{
long input_ndim = THTensor_(nDimension)(input);
switch (input_ndim)
{
case 1:
bs = 1;
ks = 1;
break;
case 2:
bs = input->size[0];
ks = 1;
break;
case 3:
bs = 1;
ks = input->size[1] * input->size[2];
break;
case 4:
bs = input->size[0];
ks = input->size[2] * input->size[3];
break;
}
if (input->size[(input_ndim + 1) % 2] != nOutputPlane)
THError("wrong number of input planes");
}
real *output_data = THTensor_(data)(output);
real *input_data = THTensor_(data)(input);
real *weight_data = THTensor_(data)(weight);
THIndex_t i, j, k;
#pragma omp parallel for private(j,k)
for (i = 0; i < bs; ++i)
{
real* n_input_data = input_data + i*nOutputPlane*ks;
real* n_output_data = output_data + i*nOutputPlane*ks;
for (j = 0; j < nOutputPlane; ++j)
{
for (k = 0; k < ks; ++k)
n_output_data[k] = (n_input_data[k] > 0) ? n_input_data[k] : weight_data[j] * n_input_data[k];
n_input_data += ks;
n_output_data += ks;
}
}
}
}
void THNN_(PReLU_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THIndex_t nOutputPlane)
{
THTensor_(resizeAs)(gradInput, input);
if (nOutputPlane == 0)
{
real w = THTensor_(data)(weight)[0];
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
if ((*input_data) > 0)
*gradInput_data = *gradOutput_data;
else
*gradInput_data = w * (*gradOutput_data);
);
}
else
{
const real *input_data = THTensor_(data)(input);
const real *gradOutput_data = THTensor_(data)(gradOutput);
const real *weight_data = THTensor_(data)(weight);
real *gradInput_data = THTensor_(data)(gradInput);
long bs, ks;
{
long input_ndim = THTensor_(nDimension)(input);
switch (input_ndim)
{
case 1:
bs = 1;
ks = 1;
break;
case 2:
bs = input->size[0];
ks = 1;
break;
case 3:
bs = 1;
ks = input->size[1] * input->size[2];
break;
case 4:
bs = input->size[0];
ks = input->size[2] * input->size[3];
break;
}
if (input->size[(input_ndim + 1) % 2] != nOutputPlane)
THError("wrong number of input planes");
}
THIndex_t i, j, k;
#pragma omp parallel for private(j,k)
for (i = 0; i < bs; ++i)
{
const real *n_input_data = input_data + i*nOutputPlane*ks;
const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
real *n_gradInput_data = gradInput_data + i*nOutputPlane*ks;
for (j = 0; j < nOutputPlane; ++j)
{
real w = weight_data[j];
for (k = 0; k < ks; ++k)
{
if (n_input_data[k] > 0)
n_gradInput_data[k] = n_gradOutput_data[k];
else
n_gradInput_data[k] = n_gradOutput_data[k] * w;
}
n_input_data += ks;
n_gradInput_data += ks;
n_gradOutput_data += ks;
}
}
}
}
void THNN_(PReLU_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *gradWeight,
THTensor *gradWeightBuf,
THTensor *gradWeightBuf2,
THIndex_t nOutputPlane,
real scale)
{
real *gradWeight_data = THTensor_(data)(gradWeight);
if (nOutputPlane == 0)
{
real sum = 0;
TH_TENSOR_APPLY2(real, input, real, gradOutput,
if ((*input_data) <= 0)
sum += (*input_data) * (*gradOutput_data);
);
gradWeight_data[0] += scale * sum;
}
else
{
long bs, ks;
{
long input_ndim = THTensor_(nDimension)(input);
switch (input_ndim)
{
case 1:
bs = 1;
ks = 1;
break;
case 2:
bs = input->size[0];
ks = 1;
break;
case 3:
bs = 1;
ks = input->size[1] * input->size[2];
break;
case 4:
bs = input->size[0];
ks = input->size[2] * input->size[3];
break;
}
if (input->size[(input_ndim + 1) % 2] != nOutputPlane)
THError("wrong number of input planes");
}
const real *input_data = THTensor_(data)(input);
const real *gradOutput_data = THTensor_(data)(gradOutput);
const real *weight_data = THTensor_(data)(weight);
real *gradWeight_data = THTensor_(data)(gradWeight);
THIndex_t i, j, k;
for (i = 0; i < bs; ++i)
{
const real *n_input_data = input_data + i*nOutputPlane*ks;
const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
for (j = 0; j < nOutputPlane; ++j)
{
real sum = 0;
for (k = 0; k < ks; ++k)
if (n_input_data[k] <= 0)
sum += n_gradOutput_data[k] * n_input_data[k];
gradWeight_data[j] += scale * sum;
n_input_data += ks;
n_gradOutput_data += ks;
}
}
}
}
#endif

View File

@ -0,0 +1,127 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/RReLU.c"
#else
void THNN_(RReLU_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *noise,
real lower,
real upper,
bool train,
bool inplace,
THGenerator *generator)
{
if (train)
{
// get default random generator
THTensor_(resizeAs)(noise, input);
if (inplace)
{
TH_TENSOR_APPLY2(real, input, real, noise,
if (*input_data <= 0)
{
const real r = (real)THRandom_uniform(generator, lower, upper);
*input_data = (*input_data) * r;
*noise_data = r;
}
else
{
*noise_data = 1;
}
);
THTensor_(set)(output, input);
}
else
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY3(real, input, real, output, real, noise,
if (*input_data <= 0)
{
const real r = (real)THRandom_uniform(generator, lower, upper);
*output_data = (*input_data) * r;
*noise_data = r;
}
else
{
*output_data = *input_data;
*noise_data = 1;
}
);
}
}
else
{
const real negSlope = (lower + upper) / 2;
if (inplace)
{
TH_TENSOR_APPLY(real, input,
if (*input_data <= 0)
{
*input_data = *input_data * negSlope;
}
);
THTensor_(set)(output, input);
}
else
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, input, real, output,
const real r = (*input_data) <= 0 ? negSlope : 1;
*output_data = *input_data * r;
);
}
}
}
void THNN_(RReLU_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *noise,
real lower,
real upper,
bool train,
bool inplace)
{
if (train && upper - lower > 1E-6) // e.g. if upper == lower, RReLU behaves like LeakyReLU
{
// multiply the gradient by the noise tensor
if (inplace)
{
THTensor_(cmul)(gradOutput, gradOutput, noise);
THTensor_(set)(gradInput, gradOutput);
}
else
{
THTensor_(resizeAs)(gradInput, input);
THTensor_(cmul)(gradInput, gradOutput, noise);
}
}
else
{
// use constant factor for negative input values
const real negSlope = (lower + upper) / 2;
if (inplace)
{
TH_TENSOR_APPLY2(real, gradOutput, real, input,
if (*input_data <= 0)
{
*gradOutput_data = (*gradOutput_data) * negSlope;
}
);
THTensor_(set)(gradInput, gradOutput);
}
else
{
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
*gradInput_data = (*input_data) <= 0 ? (*gradOutput_data) * negSlope : (*gradOutput_data);
);
}
}
}
#endif

View File

@ -0,0 +1,31 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/Sigmoid.c"
#else
void THNN_(Sigmoid_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output)
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, output, real, input,
*output_data = 1./(1.+ exp(- *input_data));
);
}
void THNN_(Sigmoid_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *output)
{
THTensor_(resizeAs)(gradInput, output);
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
real z = *output_data;
*gradInput_data = *gradOutput_data * (1. - z) * z;
);
}
#endif

View File

@ -0,0 +1,45 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SmoothL1Criterion.c"
#else
void THNN_(SmoothL1Criterion_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *output,
bool sizeAverage)
{
real sum = 0;
TH_TENSOR_APPLY2(real, input, real, target,
real z = fabs(*input_data - *target_data);
sum += z < 1 ? 0.5*z*z : z - 0.5;
);
if (sizeAverage)
sum /= THTensor_(nElement)(input);
THTensor_(set1d)(output, 0, sum);
}
void THNN_(SmoothL1Criterion_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *gradInput,
bool sizeAverage)
{
real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
real x = *input_data - *target_data;
if (x < -1.)
*gradInput_data = - norm;
else if (x > 1.)
*gradInput_data = norm;
else
*gradInput_data = norm * x;
);
}
#endif

View File

@ -0,0 +1,40 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SoftMarginCriterion.c"
#else
void THNN_(SoftMarginCriterion_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *output,
bool sizeAverage)
{
real sum;
sum = 0;
TH_TENSOR_APPLY2(real, input, real, target,
real z = log(1. + exp(-*input_data* *target_data));
sum += z;)
if(sizeAverage)
sum /= THTensor_(nElement)(input);
THTensor_(set1d)(output, 0, sum);
}
void THNN_(SoftMarginCriterion_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *target,
THTensor *gradInput,
bool sizeAverage)
{
real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
real z = exp(-*target_data * *input_data);
*gradInput_data = -norm*(*target_data)*z/(1. + z);)
}
#endif

View File

@ -0,0 +1,149 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SoftMax.c"
#else
void THNN_(SoftMax_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output)
{
real *input_data, *output_data;
long nframe = 0, dim = 0, stride = 0;
long t;
if (input->nDimension == 1)
{
nframe = 1;
dim = input->size[0];
stride = 1;
}
else if (input->nDimension == 2)
{
nframe = input->size[0];
dim = input->size[1];
stride = 1;
}
else if (input->nDimension == 3)
{
nframe = 1;
dim = input->size[0];
stride = input->size[1]*input->size[2];
}
else if (input->nDimension == 4)
{
nframe = input->size[0];
dim = input->size[1];
stride = input->size[2]*input->size[3];
}
else
{
THArgCheck(0, 2, "1D, 2D, 3D or 4D tensor expected");
}
input = THTensor_(newContiguous)(input);
THTensor_(resizeAs)(output, input);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
#pragma omp parallel for private(t)
for (t = 0; t < stride*nframe; t++)
{
real *input_ptr = input_data + (t/stride)*dim*stride + t % stride;
real *output_ptr = output_data + (t/stride)*dim*stride + t % stride;
real inputMax = -THInf;
accreal sum;
long d;
for (d = 0; d < dim; d++)
{
if (input_ptr[d*stride] >= inputMax) inputMax = input_ptr[d*stride];
}
sum = 0;
for (d = 0; d < dim; d++)
{
real z = exp(input_ptr[d*stride] - inputMax);
output_ptr[d*stride] = z;
sum += z;
}
for (d = 0; d < dim; d++)
{
output_ptr[d*stride] *= 1/sum;
}
}
THTensor_(free)(input);
}
void THNN_(SoftMax_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *output)
{
real *gradInput_data, *gradOutput_data, *output_data;
long nframe = 0, dim = 0, stride = 0;
long t;
if (output->nDimension == 1)
{
nframe = 1;
dim = output->size[0];
stride = 1;
}
else if (output->nDimension == 2)
{
nframe = output->size[0];
dim = output->size[1];
stride = 1;
}
else if (output->nDimension == 3)
{
nframe = 1;
dim = output->size[0];
stride = output->size[1]*output->size[2];
}
else if (output->nDimension == 4)
{
nframe = output->size[0];
dim = output->size[1];
stride = output->size[2]*output->size[3];
}
else
{
THError("1D, 2D, 3D or 4D tensor expected");
}
gradOutput = THTensor_(newContiguous)(gradOutput);
output = THTensor_(newContiguous)(output);
THTensor_(resizeAs)(gradInput, output);
gradInput_data = THTensor_(data)(gradInput);
output_data = THTensor_(data)(output);
gradOutput_data = THTensor_(data)(gradOutput);
#pragma omp parallel for private(t)
for (t = 0; t < stride*nframe; t++)
{
real *gradInput_ptr = gradInput_data + (t/stride)*dim*stride + t % stride;
real *output_ptr = output_data + (t/stride)*dim*stride + t % stride;
real *gradOutput_ptr = gradOutput_data + (t/stride)*dim*stride + t % stride;
long d;
accreal sum = 0;
for (d = 0; d < dim; d++)
sum += (accreal)gradOutput_ptr[d*stride] * output_ptr[d*stride];
for (d = 0; d < dim; d++)
gradInput_ptr[d*stride] = output_ptr[d*stride] * (gradOutput_ptr[d*stride] - sum);
}
THTensor_(free)(gradOutput);
THTensor_(free)(output);
}
#endif

View File

@ -0,0 +1,42 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SoftPlus.c"
#else
void THNN_(SoftPlus_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
real beta,
real threshold)
{
THTensor_(resizeAs)(output, input);
// f(x) = 1/beta * log(1 + exp(beta * x))
TH_TENSOR_APPLY2(real, output, real, input, \
*output_data = (*input_data * beta) > threshold ? *input_data : THLog1p(exp(*input_data * beta)) / beta;
);
}
void THNN_(SoftPlus_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *output,
real beta,
real threshold)
{
THTensor_(resizeAs)(gradInput, output);
// d/dx[log(1+exp(k*x))/k] = exp(kx) / (exp(kx) + 1)
// SINCE
// y = (1/k)*log(1+exp(k*x)) --> x = (1/k)*log(exp(k*y)-1)
// THEREFORE:
// d/dx(f(x)) = (exp(k*y) - 1) / exp(k*y)
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
real z = exp(*output_data * beta);
*gradInput_data = (*output_data * beta) > threshold ? *gradOutput_data : *gradOutput_data * (z - 1.)/z;
);
}
#endif

View File

@ -0,0 +1,39 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SoftShrink.c"
#else
void THNN_(SoftShrink_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
real lambda)
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, output, real, input,
if ((*input_data) > lambda)
*output_data = *input_data - lambda;
else if ((*input_data) < -lambda)
*output_data = *input_data + lambda;
else
*output_data = 0;
);
}
void THNN_(SoftShrink_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
real lambda)
{
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
if ((*input_data) > lambda || (*input_data) < -lambda)
*gradInput_data = (*gradOutput_data);
else
*gradInput_data = 0;
);
}
#endif

View File

@ -0,0 +1,550 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SparseLinear.c"
#else
#ifdef _OPENMP
#include <omp.h>
#endif
#define ROW_PTR2(t, r) (THTensor_(data)(t) + (r) * (t)->stride[0])
#define COL_PTR2(t, c) (THTensor_(data)(t) + (c) * (t)->stride[1])
static bool THNN_(checkLegacyInput)(THTensor* t)
{
return t->nDimension == 3 && t->size[2] == 2;
}
static bool THNN_(checkInput)(THTensor* t)
{
return t->nDimension == 2 && t->size[1] == 3;
}
static bool THNN_(checkSize2D)(THTensor* t, long size0, long size1)
{
return t->nDimension == 2 && t->size[0] == size0 && t->size[1] == size1;
}
static bool THNN_(checkSize1D)(THTensor* t, long size0)
{
return t->nDimension == 1 && t->size[0] == size0;
}
static void THNN_(set1d)(THTensor *t, long x0, real value) {
THStorage_(set)(t->storage, t->storageOffset + x0*t->stride[0], value);
}
static real THNN_(get3d)(const THTensor *t, long x0, long x1, long x2) {
return THStorage_(get)(t->storage, t->storageOffset +
x0*t->stride[0] + x1*t->stride[1] + x2*t->stride[2]);
}
static real THNN_(get2d)(const THTensor *t, long x0, long x1) {
return THStorage_(get)(t->storage, t->storageOffset +
x0*t->stride[0] + x1*t->stride[1]);
}
void THNN_(SparseLinear_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias)
{
long h, i, j, hp0, hp1;
long outDim = THTensor_(size)(weight, 0);
long inDim = THTensor_(size)(weight, 1);
long batchSize = THTensor_(size)(output, 0);
THArgCheck(THNN_(checkInput)(input), 2, "input must be in coo format, nnz x 3");
THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong");
long nnz = THTensor_(size)(input, 0);
THLongTensor * csr = THLongTensor_newWithSize1d(batchSize+1);
THLongTensor_zero(csr);
//#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000)
for (i=0; i<nnz; i++) {
hp0 = (long)(THNN_(get2d)(input, i, 0)) - 1;
hp1 = (i+1 == nnz) ?
batchSize :
(long)(THNN_(get2d)(input, i+1, 0)) - 1;
if (hp0 != hp1) for (h = hp0; h < hp1; h++) {
THLongTensor_set1d(csr, h+1, i+1);
}
}
// output = weight * input + bias
THTensor_(zero)(output);
#pragma omp parallel for private(h, i) schedule(static) if (nnz > 10000)
for (h = 0; h < batchSize; h++) {
long i_start = THLongTensor_get1d(csr, h);
long i_end = THLongTensor_get1d(csr, h+1);
for (i = i_start; i < i_end; i++) {
real val = THNN_(get2d)(input, i, 2);
if (val == 0) {
continue;
}
long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
if (offset >= 0 && offset < inDim) {
THBlas_(axpy)(outDim,
val,
COL_PTR2(weight, offset), weight->stride[0],
ROW_PTR2(output, h), output->stride[1]);
} else {
THError("index out of bound. updateOutput: %d not between 1 and %d",
offset + 1, inDim);
}
}
}
THTensor* output_row = THTensor_(new)();
for (h = 0; h < batchSize; h++) {
THTensor_(select)(output_row, output, 0, h);
THTensor_(cadd)(output_row, bias, 1.0, output_row);
}
THTensor_(free)(output_row);
THLongTensor_free(csr);
}
void THNN_(SparseLinear_legacyUpdateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias)
{
long h, i;
long outDim = THTensor_(size)(weight, 0);
long inDim = THTensor_(size)(weight, 1);
THArgCheck(THNN_(checkLegacyInput)(input), 2, "input size must be batchsize x nnz x 2");
THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong");
long batchSize = THTensor_(size)(input, 0);
long nnz = THTensor_(size)(input, 1);
THTensor_(resize2d)(output, batchSize, outDim);
// output = weight * input + bias
THTensor_(zero)(output);
#pragma omp parallel for private(h, i) schedule(static) if ( \
batchSize > 1 && batchSize * nnz * outDim > 10000)
for (h = 0; h < batchSize; h++) {
for (i = 0; i < nnz; i++) {
real val = THNN_(get3d)(input, h, i, 1);
if (val == 0) {
continue;
}
long offset = (long)(THNN_(get3d)(input, h, i, 0)) - 1;
if (offset >= 0 && offset < inDim) {
THBlas_(axpy)(outDim,
val,
COL_PTR2(weight, offset), weight->stride[0],
ROW_PTR2(output, h), output->stride[1]);
} else {
THError("index out of bound. updateOutput: %d not between 1 and %d",
offset + 1, inDim);
}
}
}
THTensor* output_row = THTensor_(new)();
for (h = 0; h < batchSize; h++) {
THTensor_(select)(output_row, output, 0, h);
THTensor_(cadd)(output_row, bias, 1.0, output_row);
}
THTensor_(free)(output_row);
}
void THNN_(SparseLinear_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *weight,
THTensor *bias,
real weightDecay,
real scale)
{
long h, i, col, hp0, hp1;
long outDim = THTensor_(size)(weight, 0);
long inDim = THTensor_(size)(weight, 1);
THArgCheck(THNN_(checkInput)(input), 2,
"input must be in coo format, nnz x 3");
THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
"gradWeight size wrong");
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5,
"gradBias size wrong");
THArgCheck(THTensor_(isContiguous)(gradOutput), 1,
"gradOutput must be contiguous");
long nnz = THTensor_(size)(input, 0);
THLongTensor* csc = THLongTensor_newWithSize1d(inDim+1);
THLongTensor_zero(csc);
#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000)
for (i = 0; i < nnz; i++) {
hp0 = (long)(THNN_(get2d)(input, i, 1)) - 1;
hp1 = (i+1 == nnz) ?
inDim :
(long)(THNN_(get2d)(input, i+1, 1)) - 1;
if (hp0 != hp1) for (h = hp0; h < hp1; h++) {
THLongTensor_set1d(csc, h+1, i+1);
}
}
// gradWeight += gradOutput * input
#pragma omp parallel for private(h, i, col) schedule(static) if (nnz > 10000)
for (col = 0; col < inDim; col++) {
long i_start = THLongTensor_get1d(csc, col);
long i_end = THLongTensor_get1d(csc, col+1);
for (i = i_start; i < i_end; i++) {
real val = scale * THNN_(get2d)(input, i, 2);
h = (long)(THNN_(get2d)(input, i, 0)) - 1;
long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
if (offset >= 0 && offset < inDim) {
THBlas_(axpy)(outDim,
val,
ROW_PTR2(gradOutput, h), gradOutput->stride[1],
COL_PTR2(gradWeight, offset), gradWeight->stride[0]);
} else {
THError(
"index out of bound. accGradParameters: %d not between 1 and %d",
offset + 1,
inDim);
}
}
}
// gradBias += gradOutput
THTensor* buf = THTensor_(new)();
THTensor_(sum)(buf, gradOutput, 0);
THTensor_(cadd)(gradBias, gradBias, scale, buf);
THTensor_(free)(buf);
THLongTensor_free(csc);
if (weightDecay != 0) {
THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
}
}
void THNN_(SparseLinear_legacyAccGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *weight,
THTensor *bias,
real weightDecay,
real scale)
{
long h, i;
long outDim = THTensor_(size)(weight, 0);
long inDim = THTensor_(size)(weight, 1);
THArgCheck(THNN_(checkLegacyInput)(input), 2,
"input size must be batchsize x nnz x 2");
THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
"gradWeight size wrong");
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5,
"gradBias size wrong");
THArgCheck(THTensor_(isContiguous)(gradOutput), 1,
"gradOutput must be contiguous");
long batchSize = THTensor_(size)(input, 0);
long nnz = THTensor_(size)(input, 1);
THTensor_(resize2d)(gradOutput, batchSize, outDim);
// gradWeight += gradOutput * input
#pragma omp parallel for private(h, i) schedule(static) if (\
batchSize * nnz * outDim > 10000)
for (i = 0; i < nnz; i++) {
for (h = 0; h < batchSize; h++) {
real val = scale * THNN_(get3d)(input, h, i, 1);
if (val == 0) {
continue;
}
long offset = (long)(THNN_(get3d)(input, h, i, 0)) - 1;
if (offset >= 0 && offset < inDim) {
THBlas_(axpy)(outDim,
val,
ROW_PTR2(gradOutput, h), gradOutput->stride[1],
COL_PTR2(gradWeight, offset), gradWeight->stride[0]);
} else {
THError(
"index out of bound. accGradParameters: %d not between 1 and %d",
offset + 1,
inDim);
}
}
}
// gradBias += gradOutput
THTensor* gradOutput_row = THTensor_(new)();
for (h = 0; h < batchSize; h++) {
THTensor_(select)(gradOutput_row, gradOutput, 0, h);
THTensor_(cadd)(gradBias, gradBias, scale, gradOutput_row);
}
THTensor_(free)(gradOutput_row);
if (weightDecay != 0) {
THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
}
}
void THNN_(SparseLinear_updateParameters)(
THNNState *state,
THTensor *weight,
THTensor *bias,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *lastInput,
real learningRate)
{
long h, i;
long outDim = weight->size[0];
long inDim = weight->size[1];
THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
"gradWeight size wrong");
THArgCheck(THNN_(checkSize1D)(bias, outDim), 3, "bias size wrong");
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, "gradBias size wrong");
THArgCheck(THNN_(checkInput)(lastInput), 6,
"input must be in coo format, nnz x 3");
long nnz = THTensor_(size)(lastInput, 0);
// collect unique offsets of non-0 val in input
THTensor* offsets = THTensor_(newWithSize1d)(nnz);
long cnt = 0;
for (i = 0; i < nnz; i++) {
real val = THNN_(get2d)(lastInput, i, 2);
if (val == 0) {
continue;
}
long offset = (long)(THNN_(get2d)(lastInput, i, 1)) - 1;
if (offset >= 0 && offset < inDim) {
THNN_(set1d)(offsets, cnt++, offset);
} else {
THError(
"index out of bound. updateParameters: %d not between 1 and %d",
offset + 1,
inDim);
}
}
if (cnt == 0) return;
THTensor_(resize1d)(offsets, cnt);
THTensor* uniqueOffsets = THTensor_(new)();
THLongTensor* ri = THLongTensor_new();
THTensor_(sort)(uniqueOffsets, ri, offsets, 0, 0);
THLongTensor_free(ri);
THTensor_(free)(offsets);
cnt = 1;
real* uniqueOffsets_p = THTensor_(data)(uniqueOffsets);
for (i = 1; i < THTensor_(size)(uniqueOffsets, 0); i++) {
if (uniqueOffsets_p[i] != uniqueOffsets_p[i - 1]) {
uniqueOffsets_p[cnt++] = uniqueOffsets_p[i];
}
}
THTensor_(resize1d)(uniqueOffsets, cnt);
// weight += -learningRate * gradWeight
THTensor_(cadd)(bias, bias, -learningRate, gradBias);
#pragma omp parallel for private(i) schedule(static) if (cnt * outDim > 10000)
for (i = 0; i < cnt; i++) {
long offset = (long)uniqueOffsets_p[i];
THBlas_(axpy)(outDim,
-learningRate,
COL_PTR2(gradWeight, offset), gradWeight->stride[0],
COL_PTR2(weight, offset), weight->stride[0]);
}
THTensor_(free)(uniqueOffsets);
}
void THNN_(SparseLinear_legacyUpdateParameters)(
THNNState *state,
THTensor *weight,
THTensor *bias,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *lastInput,
real learningRate)
{
long h, i;
long outDim = weight->size[0];
long inDim = weight->size[1];
THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
"gradWeight size wrong");
THArgCheck(THNN_(checkSize1D)(bias, outDim), 3, "bias size wrong");
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, "gradBias size wrong");
THArgCheck(THNN_(checkLegacyInput)(lastInput), 6,
"input size must be batchsize x nnz x 2");
long batchSize = THTensor_(size)(lastInput, 0);
long nnz = THTensor_(size)(lastInput, 1);
// collect unique offsets of non-0 val in input
THTensor* offsets = THTensor_(newWithSize1d)(batchSize * nnz);
long cnt = 0;
for (h = 0; h < batchSize; h++) {
for (i = 0; i < nnz; i++) {
real val = THNN_(get3d)(lastInput, h, i, 1);
if (val == 0 ) {
continue;
}
long offset = (long)(THNN_(get3d)(lastInput, h, i, 0)) - 1;
if (offset >= 0 && offset < inDim) {
THNN_(set1d)(offsets, cnt++, offset);
} else {
THError(
"index out of bound. updateParameters: %d not between 1 and %d",
offset + 1,
inDim);
}
}
}
THTensor_(resize1d)(offsets, cnt);
THTensor* uniqueOffsets = THTensor_(new)();
THLongTensor* ri = THLongTensor_new();
THTensor_(sort)(uniqueOffsets, ri, offsets, 0, 0);
THLongTensor_free(ri);
THTensor_(free)(offsets);
cnt = 1;
real* uniqueOffsets_p = THTensor_(data)(uniqueOffsets);
for (i = 1; i < THTensor_(size)(uniqueOffsets, 0); i++) {
if (uniqueOffsets_p[i] != uniqueOffsets_p[i - 1]) {
uniqueOffsets_p[cnt++] = uniqueOffsets_p[i];
}
}
THTensor_(resize1d)(uniqueOffsets, cnt);
// weight += -learningRate * gradWeight
THTensor_(cadd)(bias, bias, -learningRate, gradBias);
#pragma omp parallel for private(i) schedule(static) if (cnt * outDim > 10000)
for (i = 0; i < cnt; i++) {
long offset = (long)uniqueOffsets_p[i];
THBlas_(axpy)(outDim,
-learningRate,
COL_PTR2(gradWeight, offset), gradWeight->stride[0],
COL_PTR2(weight, offset), weight->stride[0]);
}
THTensor_(free)(uniqueOffsets);
}
void THNN_(SparseLinear_zeroGradParameters)(
THNNState *state,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *lastInput)
{
long h, i, j;
long outDim = gradWeight->size[0];
long inDim = gradWeight->size[1];
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 3, "gradBias size wrong");
THArgCheck(THNN_(checkInput)(lastInput), 4,
"input must be in coo format, nnz x 3");
THTensor_(zero)(gradBias);
long nnz = THTensor_(size)(lastInput, 0);
#pragma omp parallel for private(i, j) schedule(static) if ( \
nnz * outDim > 10000)
for (i = 0; i < nnz; i++) {
if (THNN_(get2d)(lastInput, i, 2) == 0 ) {
continue;
}
long offset = (long)(THNN_(get2d)(lastInput, i, 1)) - 1;
if (offset >= 0 && offset < inDim) {
real* pGradWeight = COL_PTR2(gradWeight, offset);
if (gradWeight->stride[0] == 1) {
THVector_(fill)(pGradWeight, 0, outDim);
} else {
long stride = gradWeight->stride[0];
for (j = 0; j < outDim; ++j) {
pGradWeight[j * stride] = 0;
}
}
} else {
THError(
"index out of bound. zeroGradParameters: %d not between 1 and %d",
offset + 1,
inDim);
}
}
}
void THNN_(SparseLinear_legacyZeroGradParameters)(
THNNState *state,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *lastInput)
{
long h, i, j;
long outDim = gradWeight->size[0];
long inDim = gradWeight->size[1];
THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 3, "gradBias size wrong");
THArgCheck(THNN_(checkLegacyInput)(lastInput), 4,
"input size must be batchsize x nnz x 2");
THTensor_(zero)(gradBias);
long batchSize = THTensor_(size)(lastInput, 0);
long nnz = THTensor_(size)(lastInput, 1);
#pragma omp parallel for private(h, i, j) schedule(static) if ( \
batchSize > 1 && batchSize * nnz * outDim > 10000)
for (h = 0; h < batchSize; h++) {
for (i = 0; i < nnz; i++) {
if (THNN_(get3d)(lastInput, h, i, 1) == 0 ) {
continue;
}
long offset = (long)(THNN_(get3d)(lastInput, h, i, 0)) - 1;
if (offset >= 0 && offset < inDim) {
real* pGradWeight = COL_PTR2(gradWeight, offset);
if (gradWeight->stride[0] == 1) {
THVector_(fill)(pGradWeight, 0, outDim);
} else {
long stride = gradWeight->stride[0];
for (j = 0; j < outDim; ++j) {
pGradWeight[j * stride] = 0;
}
}
} else {
THError(
"index out of bound. zeroGradParameters: %d not between 1 and %d",
offset + 1,
inDim);
}
}
}
}
#undef ROW_PTR2
#undef COL_PTR2
#endif

View File

@ -0,0 +1,274 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialAdaptiveMaxPooling.c"
#else
static void THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(
real *input_p,
real *output_p,
real *indx_p,
real *indy_p,
long nslices,
long iwidth,
long iheight,
long owidth,
long oheight,
long stridew,
long strideh,
long strided)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
/* loop over output */
long i, j;
for(i = 0; i < oheight; i++)
{
int y_start = (int)floor((float)i / oheight * iheight);
int y_end = (int)ceil((float)(i + 1) / oheight * iheight);
int kH = y_end-y_start;
for(j = 0; j < owidth; j++)
{
int x_start = (int)floor((float)j / owidth * iwidth);
int x_end = (int)ceil((float)(j + 1) / owidth * iwidth);
int kW = x_end-x_start;
/* local pointers */
real *ip = input_p + k*strided + y_start*strideh + x_start*stridew;
real *op = output_p + k*owidth*oheight + i*owidth + j;
real *indyp = indy_p + k*owidth*oheight + i*owidth + j;
real *indxp = indx_p + k*owidth*oheight + i*owidth + j;
/* compute local max: */
long maxindex = -1;
real maxval = -FLT_MAX;
long tcntr = 0;
int x,y;
for(y = 0; y < kH; y++)
{
for(x = 0; x < kW; x++)
{
real val = *(ip + y*strideh + x*stridew);
if (val > maxval)
{
maxval = val;
maxindex = tcntr;
}
tcntr++;
}
}
/* set output to local max */
*op = maxval;
/* store location of max (x,y) */
*indyp = (int)(maxindex / kW) + TH_INDEX_BASE;
*indxp = (maxindex % kW) + TH_INDEX_BASE;
}
}
}
}
void THNN_(SpatialAdaptiveMaxPooling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *indices,
int owidth,
int oheight)
{
int dimw = 2;
int dimh = 1;
long nbatch = 1;
long nslices;
long iheight;
long iwidth;
long istride_d;
long istride_h;
long istride_w;
long istride_b;
real *input_data;
real *output_data;
real *indices_data;
THArgCheck(input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
if (input->nDimension == 4)
{
istride_b = input->stride[0];
nbatch = input->size[0];
dimw++;
dimh++;
}
/* sizes */
nslices = input->size[dimh-1];
iheight = input->size[dimh];
iwidth = input->size[dimw];
/* strides */
istride_d = input->stride[dimh-1];
istride_h = input->stride[dimh];
istride_w = input->stride[dimw];
/* resize output */
if (input->nDimension == 3)
{
THTensor_(resize3d)(output, nslices, oheight, owidth);
/* indices will contain i,j locations for each output point */
THTensor_(resize4d)(indices, 2, nslices, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data,
indices_data+nslices*owidth*oheight, indices_data,
nslices,
iwidth, iheight,
owidth, oheight,
istride_w,istride_h,
istride_d);
}
else
{
long p;
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
/* indices will contain i,j locations for each output point */
THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight,
indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
nslices,
iwidth, iheight,
owidth, oheight,
istride_w,istride_h,
istride_d);
}
}
}
static void THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(
real *gradInput_p,
real *gradOutput_p,
real *indx_p,
real *indy_p,
long nslices,
long iwidth,
long iheight,
long owidth,
long oheight)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
real *indx_p_k = indx_p + k*owidth*oheight;
real *indy_p_k = indy_p + k*owidth*oheight;
/* calculate max points */
long i, j;
for(i = 0; i < oheight; i++)
{
int y_start = (int)floor((float) i / oheight * iheight);
for(j = 0; j < owidth; j++)
{
int x_start = (int)floor((float) j / owidth * iwidth);
/* retrieve position of max */
long maxi = indy_p_k[i*owidth + j] - TH_INDEX_BASE + y_start;
long maxj = indx_p_k[i*owidth + j] - TH_INDEX_BASE + x_start;
/* update gradient */
gradInput_p_k[maxi*iwidth + maxj] += gradOutput_p_k[i*owidth + j];
}
}
}
}
void THNN_(SpatialAdaptiveMaxPooling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *indices)
{
int dimw = 2;
int dimh = 1;
long nbatch = 1;
int nslices;
int iheight;
int iwidth;
int oheight;
int owidth;
real *gradInput_data;
real *gradOutput_data;
real *indices_data;
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
/* sizes */
nslices = input->size[dimh-1];
iheight = input->size[dimh];
iwidth = input->size[dimw];
oheight = gradOutput->size[dimh];
owidth = gradOutput->size[dimw];
/* get raw pointers */
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
indices_data = THTensor_(data)(indices);
/* backprop */
if (input->nDimension == 3)
{
THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
indices_data+nslices*owidth*oheight, indices_data,
nslices,
iwidth, iheight,
owidth, oheight);
}
else
{
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
nslices,
iwidth, iheight,
owidth, oheight);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,258 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialAveragePooling.c"
#else
void THNN_(SpatialAveragePooling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
bool ceil_mode,
bool count_include_pad)
{
real *output_data;
real *input_data;
int dimw = 2;
int dimh = 1;
int dimc = 0;
long nbatch = 1;
long inputWidth;
long inputHeight;
long outputWidth;
long outputHeight;
long nInputPlane; // number of channels (or colors)
long k;
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size");
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
dimc++;
}
inputWidth = input->size[dimw];
inputHeight = input->size[dimh];
nInputPlane = input->size[dimc];
if(ceil_mode)
{
outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1;
outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1;
}
else
{
outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1;
outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1;
}
if (padW || padH)
{
// ensure that the last pooling starts inside the image
// needed to avoid problems in ceil mode
if ((outputHeight - 1)*dH >= inputHeight + padH)
--outputHeight;
if ((outputWidth - 1)*dW >= inputWidth + padW)
--outputWidth;
}
THArgCheck(inputWidth >= kW - 2 * padW && inputHeight >= kH - 2 * padH, 2, "input image smaller than kernel size");
if (input->nDimension == 3)
THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
else
THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth);
input = THTensor_(newContiguous)(input);
THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
long p;
for(p = 0; p < nbatch; p++)
{
long xx, yy;
/* For all output pixels... */
real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight;
real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
long i;
for(i = 0; i < outputWidth*outputHeight; i++)
ptr_output[i] = 0;
for(yy = 0; yy < outputHeight; yy++)
{
for(xx = 0; xx < outputWidth; xx++)
{
/* Compute the mean of the input image... */
long hstart = yy * dH - padH;
long wstart = xx * dW - padW;
long hend = fminf(hstart + kH, inputHeight + padH);
long wend = fminf(wstart + kW, inputWidth + padW);
int pool_size = (hend - hstart) * (wend - wstart);
hstart = fmaxf(hstart, 0);
wstart = fmaxf(wstart, 0);
hend = fminf(hend, inputHeight);
wend = fminf(wend, inputWidth);
real sum = 0;
int divide_factor;
if(count_include_pad)
divide_factor = pool_size;
else
divide_factor = (hend - hstart) * (wend - wstart);
long kx, ky;
for(ky = hstart; ky < hend; ky++)
{
for(kx = wstart; kx < wend; kx++)
sum += ptr_input[ky*inputWidth + kx];
}
/* Update output */
*ptr_output++ += sum/divide_factor;
}
}
}
}
THTensor_(free)(input);
}
void THNN_(SpatialAveragePooling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
bool ceil_mode,
bool count_include_pad)
{
int dimw = 2;
int dimh = 1;
int dimc = 0;
long nbatch = 1;
long inputWidth;
long inputHeight;
long outputWidth;
long outputHeight;
long nInputPlane; // number of channels (or colors)
real *gradOutput_data;
real *input_data, *gradInput_data;
long k;
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
dimc++;
}
inputWidth = input->size[dimw];
inputHeight = input->size[dimh];
nInputPlane = input->size[dimc];
if(ceil_mode)
{
outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1;
outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1;
}
else
{
outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1;
outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1;
}
if (padW || padH)
{
// ensure that the last pooling starts inside the image
// needed to avoid problems in ceil mode
if ((outputHeight - 1)*dH >= inputHeight + padH)
--outputHeight;
if ((outputWidth - 1)*dW >= inputWidth + padW)
--outputWidth;
}
input_data = THTensor_(data)(input);
THTensor_(resizeAs)(gradInput, input);
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
THArgCheck(THTensor_(isContiguous)(gradInput), 4, "gradInput must be contiguous");
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
long p;
for(p = 0; p < nbatch; p++)
{
real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
long xx, yy;
real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
long i;
for(i=0; i<inputWidth*inputHeight; i++)
ptr_gi[i] = 0.0;
for(yy = 0; yy < outputHeight; yy++)
{
for(xx = 0; xx < outputWidth; xx++)
{
long hstart = yy * dH - padH;
long wstart = xx * dW - padW;
long hend = fminf(hstart + kH, inputHeight + padH);
long wend = fminf(wstart + kW, inputWidth + padW);
int pool_size = (hend - hstart) * (wend - wstart);
hstart = fmaxf(hstart, 0);
wstart = fmaxf(wstart, 0);
hend = fminf(hend, inputHeight);
wend = fminf(wend, inputWidth);
real z = *ptr_gradOutput++;
int divide_factor;
if(count_include_pad)
divide_factor = pool_size;
else
divide_factor = (hend - hstart) * (wend - wstart);
long kx, ky;
for(ky = hstart ; ky < hend; ky++)
{
for(kx = wstart; kx < wend; kx++)
ptr_gradInput[ky*inputWidth + kx] += z/divide_factor;
}
}
}
}
}
THTensor_(free)(input);
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,128 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialClassNLLCriterion.c"
#else
#define INITIAL_CHECK \
THArgCheck(THIndexTensor_(nDimension)(target) == 3, 3, \
"only batches of spatial targets supported (3D tensors)"); \
THArgCheck(THTensor_(nDimension)(input) == 4, 2, \
"only batches of spatial inputs supported (4D tensors)"); \
if (weights && THTensor_(nElement)(weights) != THTensor_(size)(input, 1)) { \
THError("weight tensor should be defined either for all or no classes"); \
} \
\
{ \
long input0 = THTensor_(size)(input, 0); \
long input1 = THTensor_(size)(input, 1); \
long input2 = THTensor_(size)(input, 2); \
long input3 = THTensor_(size)(input, 3); \
long target0 = THIndexTensor_(size)(target, 0); \
long target1 = THIndexTensor_(size)(target, 1); \
long target2 = THIndexTensor_(size)(target, 2); \
THAssertMsg(input0 == target0 && input2 == target1 && input3 == target2, \
"size mismatch (got input: %ldx%ldx%ldx%ld, target: %ldx%ldx%ld)", \
input0, input1, input2, input3, target0, target1, target2); \
}
void THNN_(SpatialClassNLLCriterion_updateOutput)(
THNNState *state,
THTensor *input,
THIndexTensor *target,
THTensor *output,
bool sizeAverage,
THTensor *weights,
THTensor *total_weight)
{
INITIAL_CHECK;
input = THTensor_(newContiguous)(input);
target = THIndexTensor_(newContiguous)(target);
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
real *input_data = THTensor_(data)(input);
THIndex_t *target_data = THIndexTensor_(data)(target);
real *weights_data = weights ? THTensor_(data)(weights) : NULL;
real *output_data = THTensor_(data)(output);
real *total_weight_data = THTensor_(data)(total_weight);
long batch_size = THTensor_(size)(input, 0);
long n_classes = THTensor_(size)(input, 1);
long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3);
long sample_size = map_size * n_classes;
real total_weight_acc = 0;
real output_acc = 0;
for (int b = 0; b < batch_size; b++) {
for (int elem = 0; elem < map_size; elem++) {
int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
real cur_weight = weights ? weights_data[cur_target] : 1.0f;
total_weight_acc += cur_weight;
output_acc -= input_data[b * sample_size + cur_target * map_size + elem] * cur_weight;
}
}
*total_weight_data = total_weight_acc;
*output_data = output_acc;
if (sizeAverage && *total_weight_data)
*output_data /= *total_weight_data;
THTensor_(free)(input);
THIndexTensor_(free)(target);
if (weights)
THTensor_(free)(weights);
}
void THNN_(SpatialClassNLLCriterion_updateGradInput)(
THNNState *state,
THTensor *input,
THIndexTensor *target,
THTensor *gradInput,
bool sizeAverage,
THTensor *weights,
THTensor *total_weight)
{
INITIAL_CHECK;
THArgCheck(THTensor_(isContiguous)(gradInput), 4,
"gradInput must be contiguous");
real *total_weight_data = THTensor_(data)(total_weight);
if (*total_weight_data <= 0)
return;
target = THIndexTensor_(newContiguous)(target);
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
THIndex_t *target_data = THIndexTensor_(data)(target);
real *weights_data = weights ? THTensor_(data)(weights) : NULL;
real *gradInput_data = THTensor_(data)(gradInput);
long batch_size = THTensor_(size)(input, 0);
long n_classes = THTensor_(size)(input, 1);
long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3);
long sample_size = map_size * n_classes;
real normalize = sizeAverage ? *total_weight_data : 1.0f;
int b;
#pragma omp parallel for
for (b = 0; b < batch_size; b++) {
int elem;
for (elem = 0; elem < map_size; elem++) {
int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
gradInput_data[b * sample_size + cur_target * map_size + elem] =
-(weights ? weights_data[cur_target] : 1.0f) / normalize;
}
}
THIndexTensor_(free)(target);
if (weights)
THTensor_(free)(weights);
}
#undef INITIAL_CHECK
#endif

View File

@ -0,0 +1,241 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialConvolutionLocal.c"
#else
static void THNN_(SpatialConvolutionLocal_updateOutput_frame)(THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *finput,
int kW, int kH, int dW, int dH, int padW, int padH,
long nInputPlane, long inputWidth, long inputHeight,
long nOutputPlane, long outputWidth, long outputHeight)
{
long i;
THTensor *output3d, *finput3d;
THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight);
THTensor_(copy)(output, bias);
output3d = THTensor_(newWithStorage3d)(output->storage, output->storageOffset,
outputHeight*outputWidth, 1,
nOutputPlane, outputHeight*outputWidth,
1, nOutputPlane*outputHeight*outputWidth);
finput3d = THTensor_(newWithStorage3d)(finput->storage, finput->storageOffset,
outputHeight*outputWidth, 1,
kW*kH*nInputPlane, outputHeight*outputWidth,
1, kW*kH*nInputPlane*outputHeight*outputWidth);
// weight: oH*oW x nOutputPlane x nInputPlane*kH*kW
// finput3d: oH*oW x nInputPlane*kH*kW x 1
THTensor_(baddbmm)(output3d, 1.0, output3d, 1.0, weight, finput3d);
// output3d: oH*oW x nOutputPlane x 1
THTensor_(free)(output3d);
THTensor_(free)(finput3d);
}
void THNN_(SpatialConvolutionLocal_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
THTensor *finput,
THTensor *fgradInput,
int kW, int kH,
int dW, int dH,
int padW, int padH,
long inputWidth, long inputHeight,
long outputWidth, long outputHeight)
{
long nInputPlane = THTensor_(size)(weight,2)/(kW*kH);
long nOutputPlane = THTensor_(size)(weight,1);
if(input->nDimension == 3)
{
THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth);
THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
THNN_(SpatialConvolutionLocal_updateOutput_frame)(input, output, weight, bias, finput,
kW, kH, dW, dH, padW, padH,
nInputPlane, inputWidth, inputHeight,
nOutputPlane, outputWidth, outputHeight);
}
else
{
long T = input->size[0];
long t;
THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth);
THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth);
#pragma omp parallel for private(t)
for(t = 0; t < T; t++)
{
THTensor *input_t = THTensor_(newSelect)(input, 0, t);
THTensor *output_t = THTensor_(newSelect)(output, 0, t);
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
THNN_(SpatialConvolutionLocal_updateOutput_frame)(input_t, output_t, weight, bias, finput_t,
kW, kH, dW, dH, padW, padH,
nInputPlane, inputWidth, inputHeight,
nOutputPlane, outputWidth, outputHeight);
THTensor_(free)(input_t);
THTensor_(free)(output_t);
THTensor_(free)(finput_t);
}
}
}
static void THNN_(SpatialConvolutionLocal_updateGradInput_frame)(THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput,
int kW, int kH, int dW, int dH, int padW, int padH,
long nInputPlane, long inputWidth, long inputHeight,
long nOutputPlane, long outputWidth, long outputHeight)
{
THTensor *gradOutput3d, *fgradInput3d;
gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset,
outputHeight*outputWidth, 1,
nOutputPlane, outputHeight*outputWidth,
1, nOutputPlane*outputHeight*outputWidth);
fgradInput3d = THTensor_(newWithStorage3d)(fgradInput->storage, fgradInput->storageOffset,
outputHeight*outputWidth, 1,
kW*kH*nInputPlane, outputHeight*outputWidth,
1, kW*kH*nInputPlane*outputHeight*outputWidth);
// weight: oH*oW x nInputPlane*kH*kW x nOutputPlane
// gradOutput3d: oH*oW x nOutputPlane x 1
THTensor_(baddbmm)(fgradInput3d, 0.0, fgradInput3d, 1.0, weight, gradOutput3d);
// fgradInput3d: oH*oW x nInputPlane*kH*kW x 1
THTensor_(free)(gradOutput3d);
THTensor_(free)(fgradInput3d);
THTensor_(zero)(gradInput);
THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH,
nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight);
}
void THNN_(SpatialConvolutionLocal_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *finput,
THTensor *fgradInput,
int kW, int kH,
int dW, int dH,
int padW, int padH,
long inputWidth, long inputHeight,
long outputWidth, long outputHeight)
{
long nInputPlane = THTensor_(size)(weight,2)/(kW*kH);
long nOutputPlane = THTensor_(size)(weight,1);
THTensor_(resizeAs)(gradInput, input);
THTensor_(resizeAs)(fgradInput, finput);
THTensor_(transpose)(weight, weight, 1, 2);
if(input->nDimension == 3)
{
THNN_(SpatialConvolutionLocal_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH,
nInputPlane, inputWidth, inputHeight,
nOutputPlane, outputWidth, outputHeight);
}
else
{
long T = input->size[0];
long t;
#pragma omp parallel for private(t)
for(t = 0; t < T; t++)
{
THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
THNN_(SpatialConvolutionLocal_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH,
nInputPlane, inputWidth, inputHeight,
nOutputPlane, outputWidth, outputHeight);
THTensor_(free)(gradInput_t);
THTensor_(free)(gradOutput_t);
THTensor_(free)(fgradInput_t);
}
}
THTensor_(transpose)(weight, weight, 1, 2);
}
static void THNN_(SpatialConvolutionLocal_accGradParameters_frame)(THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, real scale,
int kW, int kH, int dW, int dH, int padW, int padH,
long nInputPlane, long inputWidth, long inputHeight,
long nOutputPlane, long outputWidth, long outputHeight)
{
THTensor *gradOutput3d, *finput3d;
gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset,
outputHeight*outputWidth, 1,
nOutputPlane, outputHeight*outputWidth,
1, nOutputPlane*outputHeight*outputWidth);
finput3d = THTensor_(newWithStorage3d)(finput->storage, finput->storageOffset,
outputHeight*outputWidth, 1,
1, kW*kH*nInputPlane*outputHeight*outputWidth,
kW*kH*nInputPlane, outputHeight*outputWidth);
// gradOutput3d: oH*oW x nOutputPlane x 1
// finput3d: oH*oW x 1 x kW*kH*nInputPlane
THTensor_(baddbmm)(gradWeight, 1.0, gradWeight, scale, gradOutput3d, finput3d);
// gradWeight: oH*oW x nOutputPlane x kW*kH*nInputPlane
THTensor_(cadd)(gradBias, gradBias, scale, gradOutput);
THTensor_(free)(gradOutput3d);
THTensor_(free)(finput3d);
}
void THNN_(SpatialConvolutionLocal_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput,
THTensor *fgradInput,
int kW, int kH,
int dW, int dH,
int padW, int padH,
long inputWidth, long inputHeight,
long outputWidth, long outputHeight,
real scale)
{
long nInputPlane = THTensor_(size)(gradWeight,2)/(kW*kH);
long nOutputPlane = THTensor_(size)(gradWeight,1);
if(input->nDimension == 3)
{
THNN_(SpatialConvolutionLocal_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale, kW, kH, dW, dH, padW, padH,
nInputPlane, inputWidth, inputHeight,
nOutputPlane, outputWidth, outputHeight);
}
else
{
long T = input->size[0];
long t;
for(t = 0; t < T; t++)
{
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
THNN_(SpatialConvolutionLocal_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale, kW, kH, dW, dH, padW, padH,
nInputPlane, inputWidth, inputHeight,
nOutputPlane, outputWidth, outputHeight);
THTensor_(free)(gradOutput_t);
THTensor_(free)(finput_t);
}
}
}
#endif

View File

@ -0,0 +1,284 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialConvolutionMM.c"
#else
static void THNN_(SpatialConvolutionMM_updateOutput_frame)(
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
THTensor *finput,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
long nInputPlane,
long inputWidth,
long inputHeight,
long nOutputPlane,
long outputWidth,
long outputHeight)
{
long i;
THTensor *output2d;
THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight);
output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset,
nOutputPlane, -1,
outputHeight*outputWidth, -1);
if (bias) {
for(i = 0; i < nOutputPlane; i++)
THVector_(fill)(output->storage->data+output->storageOffset+output->stride[0]*i, THTensor_(get1d)(bias, i), outputHeight*outputWidth);
} else {
THTensor_(zero)(output);
}
THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput);
THTensor_(free)(output2d);
}
void THNN_(SpatialConvolutionMM_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
THTensor *finput,
THTensor *fgradInput,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH)
{
int dimf = 0;
int dimw = 2;
int dimh = 1;
long nInputPlane;
long inputWidth;
long inputHeight;
long nOutputPlane;
long outputWidth;
long outputHeight;
THArgCheck( input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor expected");
THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");
if (input->nDimension == 4) {
dimf++;
dimw++;
dimh++;
}
nInputPlane = input->size[dimf];
inputWidth = input->size[dimw];
inputHeight = input->size[dimh];
nOutputPlane = weight->size[0];
outputWidth = (inputWidth + 2*padW - kW) / dW + 1;
outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
if (outputWidth < 1 || outputHeight < 1)
THError("Given input size: (%dx%dx%d). Calculated output size: (%dx%dx%d). Output size is too small",
nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);
if (nInputPlane*kW*kH != weight->size[1])
THError("Wrong number of input channels! Input has %d channels, expected %d",nInputPlane,weight->size[1]/(kW*kH));
if(input->nDimension == 3)
{
THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth);
THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
THNN_(SpatialConvolutionMM_updateOutput_frame)(input, output, weight, bias, finput,
kW, kH, dW, dH, padW, padH,
nInputPlane, inputWidth, inputHeight,
nOutputPlane, outputWidth, outputHeight);
}
else
{
long T = input->size[0];
long t;
THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth);
THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth);
#pragma omp parallel for private(t)
for(t = 0; t < T; t++)
{
THTensor *input_t = THTensor_(newSelect)(input, 0, t);
THTensor *output_t = THTensor_(newSelect)(output, 0, t);
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
THNN_(SpatialConvolutionMM_updateOutput_frame)(input_t, output_t, weight, bias, finput_t,
kW, kH, dW, dH, padW, padH,
nInputPlane, inputWidth, inputHeight,
nOutputPlane, outputWidth, outputHeight);
THTensor_(free)(input_t);
THTensor_(free)(output_t);
THTensor_(free)(finput_t);
}
}
}
static void THNN_(SpatialConvolutionMM_updateGradInput_frame)(
THTensor *gradInput,
THTensor *gradOutput,
THTensor *weight,
THTensor *fgradInput,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH)
{
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset,
gradOutput->size[0], -1,
gradOutput->size[1]*gradOutput->size[2], -1);
THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
THTensor_(free)(gradOutput2d);
THTensor_(zero)(gradInput);
THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH, gradInput->size[0], gradInput->size[2], gradInput->size[1], gradOutput->size[2], gradOutput->size[1]);
}
void THNN_(SpatialConvolutionMM_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *finput,
THTensor *fgradInput,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH)
{
long nOutputPlane = weight->size[0];
THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero");
THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero");
THTensor_(resizeAs)(gradInput, input);
THTensor_(resizeAs)(fgradInput, finput);
// depending on the BLAS library, fgradInput (result tensor) might
// be left uninitialized on zero alpha, which might lead to weird behavior
// hence, to be safe, zero it
THTensor_(zero)(fgradInput);
THTensor_(transpose)(weight, weight, 0, 1);
if(input->nDimension == 3)
{
THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH);
}
else
{
long T = input->size[0];
long t;
#pragma omp parallel for private(t)
for(t = 0; t < T; t++)
{
THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH);
THTensor_(free)(gradInput_t);
THTensor_(free)(gradOutput_t);
THTensor_(free)(fgradInput_t);
}
}
THTensor_(transpose)(weight, weight, 0, 1);
}
static void THNN_(SpatialConvolutionMM_accGradParameters_frame)(
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput,
real scale)
{
long i;
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset,
gradOutput->size[0], -1,
gradOutput->size[1]*gradOutput->size[2], -1);
THTensor_(transpose)(finput, finput, 0, 1);
THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput);
THTensor_(transpose)(finput, finput, 0, 1);
if (gradBias) {
for(i = 0; i < gradBias->size[0]; i++)
{
long k;
real sum = 0;
real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0];
for(k = 0; k < gradOutput2d->size[1]; k++)
sum += data[k];
(gradBias->storage->data + gradBias->storageOffset)[i] += scale*sum;
}
}
THTensor_(free)(gradOutput2d);
}
void THNN_(SpatialConvolutionMM_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput,
THTensor *fgradInput,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
real scale)
{
long nOutputPlane = gradWeight->size[0];
THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");
if(input->nDimension == 3)
{
THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
}
else
{
long T = input->size[0];
long t;
for(t = 0; t < T; t++)
{
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);
THTensor_(free)(gradOutput_t);
THTensor_(free)(finput_t);
}
}
}
#endif

View File

@ -0,0 +1,259 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialConvolutionMap.c"
#else
void THNN_(SpatialConvolutionMap_updateOutput)(
THNNState *state, THTensor *input, THTensor *output, THTensor *weight, THTensor *bias,
THTensor *connTable, int nInputPlane, int nOutputPlane,
int dW, int dH)
{
THArgCheck(
weight != NULL && weight->nDimension == 3
&& connTable != NULL && connTable->size[0] == weight->size[0], 4,
"3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
real *weight_data = THTensor_(data)(weight);
real *bias_data = THTensor_(data)(bias);
real *connTable_data = THTensor_(data)(connTable);
int dimw = 2;
int dimh = 1;
int dimc = 0;
long nbatch = 1;
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
if (input->nDimension == 4)
{
nbatch = input->size[0];
dimc++;
dimw++;
dimh++;
}
const long kH = weight->size[1];
const long kW = weight->size[2];
THArgCheck(input->size[dimc] >= nInputPlane, 2, "invalid number of input planes");
THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH, 2, "input image smaller than kernel size");
const long input_w = input->size[dimw];
const long input_h = input->size[dimh];
const long output_w = (input_w - kW) / dW + 1;
const long output_h = (input_h - kH) / dH + 1;
if (input->nDimension == 3)
THTensor_(resize3d)(output, nOutputPlane, output_h, output_w);
else
THTensor_(resize4d)(output, input->size[0], nOutputPlane, output_h, output_w);
/* contiguous */
input = THTensor_(newContiguous)(input);
output = THTensor_(newContiguous)(output);
/* get raw pointers */
real *input_data = THTensor_(data)(input);
real *output_data = THTensor_(data)(output);
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nOutputPlane; p++)
{
long m;
for (m = 0; m < nbatch; m++)
{
/* add bias */
real *ptr_output = output_data + p*output_w*output_h + m*nOutputPlane*output_w*output_h;
long j, k;
real z= bias_data[p];
for (j = 0; j < output_h*output_w; j++)
ptr_output[j] = z;
/* convolve all maps */
int nweight = connTable->size[0];
for (k = 0; k < nweight; k++)
{
/* get offsets for input/output */
int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
if (o == p)
{
THTensor_(validXCorr2Dptr)(
output_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h,
1.0,
input_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, input_h, input_w,
weight_data + k*kW*kH,
kH, kW,
dH, dW
);
}
}
}
}
/* clean up */
THTensor_(free)(input);
THTensor_(free)(output);
}
void THNN_(SpatialConvolutionMap_updateGradInput)(
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *weight, THTensor *bias,
THTensor *connTable, int nInputPlane, int nOutputPlane,
int dW, int dH)
{
THArgCheck(
weight != NULL && weight->nDimension == 3
&& connTable != NULL && connTable->size[0] == weight->size[0], 5,
"3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
real *weight_data = THTensor_(data)(weight);
real *connTable_data = THTensor_(data)(connTable);
/* and dims */
int dimw = 2;
int dimh = 1;
long nbatch = 1;
if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
dimh++;
}
const long input_h = input->size[dimh];
const long input_w = input->size[dimw];
const long output_h = gradOutput->size[dimh];
const long output_w = gradOutput->size[dimw];
const long kH = weight->size[1];
const long kW = weight->size[2];
/* contiguous */
gradInput = THTensor_(newContiguous)(gradInput);
gradOutput = THTensor_(newContiguous)(gradOutput);
/* Resize/Zero */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
/* get raw pointers */
real *gradInput_data = THTensor_(data)(gradInput);
real *gradOutput_data = THTensor_(data)(gradOutput);
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nInputPlane; p++)
{
long m;
for (m = 0; m < nbatch; m++)
{
long k;
/* backward all */
int nkernel = connTable->size[0];
for (k = 0; k < nkernel; k++)
{
int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
if (i == p)
{
/* gradient to input */
THTensor_(fullConv2Dptr)(
gradInput_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, 1.0,
gradOutput_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h, output_h, output_w,
weight_data + k*kW*kH, kH, kW, dH, dW
);
}
}
}
}
/* clean up */
THTensor_(free)(gradInput);
THTensor_(free)(gradOutput);
}
void THNN_(SpatialConvolutionMap_accGradParameters)(
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias,
THTensor *connTable, int nInputPlane, int nOutputPlane,
int dW, int dH, real scale)
{
THArgCheck(
gradWeight != NULL && gradWeight->nDimension == 3
&& connTable != NULL && connTable->size[0] == gradWeight->size[0], 5,
"3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
real *gradWeight_data = THTensor_(data)(gradWeight);
real *gradBias_data = THTensor_(data)(gradBias);
/* and dims */
int dimw = 2;
int dimh = 1;
long nbatch = 1;
if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
dimh++;
}
const long input_h = input->size[dimh];
const long input_w = input->size[dimw];
const long output_h = gradOutput->size[dimh];
const long output_w = gradOutput->size[dimw];
const long kH = gradWeight->size[1];
const long kW = gradWeight->size[2];
/* contiguous */
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
/* get raw pointers */
real *input_data = THTensor_(data)(input);
real *gradOutput_data = THTensor_(data)(gradOutput);
long k;
/* gradients wrt bias */
#pragma omp parallel for private(k)
for (k = 0; k < nOutputPlane; k++)
{
long m;
for (m = 0; m < nbatch; m++)
{
real *ptr_gradOutput = gradOutput_data + k*output_w*output_h + m*nOutputPlane*output_w*output_h;
long l;
for (l = 0; l < output_h*output_w; l++)
gradBias_data[k] += scale*ptr_gradOutput[l];
}
}
/* gradients wrt weight */
const int nkernel = connTable->size[0];
#pragma omp parallel for private(k)
for (k = 0; k < nkernel; k++)
{
long m;
for (m = 0; m < nbatch; m++)
{
int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE;
int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE;
/* gradient to kernel */
THTensor_(validXCorr2DRevptr)(
gradWeight_data + k*kW*kH,
scale,
input_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, input_h, input_w,
gradOutput_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h , output_h, output_w,
dH, dW
);
}
}
/* clean up */
THTensor_(free)(input);
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,339 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialDilatedConvolution.c"
#else
void THNN_(SpatialDilatedConvolution_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
THTensor *columns,
THTensor *ones,
int kW, int kH,
int dW, int dH,
int padW, int padH,
int dilationW, int dilationH)
{
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
THArgCheck(weight->nDimension == 4, 4, "weight tensor must be 4D (nOutputPlane,nInputPlane,kH,kW)");
THArgCheck(!bias || weight->size[0] == bias->size[0], 4, "nOutputPlane mismatch in weight and bias");
THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");
// Params:
int nInputPlane = weight->size[1];
int nOutputPlane = weight->size[0];
int batch = 1;
if (input->nDimension == 3) {
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
// Force batch
batch = 0;
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
} else {
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
}
long inputWidth = input->size[3];
long inputHeight = input->size[2];
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
if (outputWidth < 1 || outputHeight < 1)
THError("Given input size: (%dx%dx%d). Calculated output size: (%dx%dx%d). Output size is too small",
nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);
// Batch size + input planes
long batchSize = input->size[0];
// Resize output
THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);
THTensor_(zero)(output);
// Resize temporary columns
THTensor_(resize2d)(columns, nInputPlane*kW*kH, outputHeight*outputWidth);
// Define a buffer of ones, for bias accumulation
// Note: this buffer can be shared with other modules, it only ever gets increased,
// and always contains ones.
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
// Resize plane and fill with ones...
THTensor_(resize2d)(ones, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
}
// Helpers
THTensor *input_n = THTensor_(new)();
THTensor *output_n = THTensor_(new)();
// For each elt in batch, do:
for (int elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per output:
THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(output_n, output, 0, elt);
// Do Bias first:
// M,N,K are dims of matrix A and B
long m_ = nOutputPlane;
long n_ = outputHeight * outputWidth;
long k_ = 1;
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
if (bias) {
THBlas_(gemm)(
't', 'n',
n_, m_, k_,
1,
THTensor_(data)(ones), k_,
THTensor_(data)(bias), k_,
0,
THTensor_(data)(output_n), n_
);
} else {
THTensor_(zero)(output_n);
}
// Extract columns:
THNN_(im2col)(
THTensor_(data)(input_n),
nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
dilationH, dilationW,
THTensor_(data)(columns)
);
// M,N,K are dims of matrix A and B
long m = nOutputPlane;
long n = columns->size[1];
long k = nInputPlane*kH*kW;
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
'n', 'n',
n, m, k,
1,
THTensor_(data)(columns), n,
THTensor_(data)(weight), k,
1,
THTensor_(data)(output_n), n
);
}
// Free
THTensor_(free)(input_n);
THTensor_(free)(output_n);
// Resize output
if (batch == 0) {
THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
}
}
void THNN_(SpatialDilatedConvolution_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *gradColumns,
int kW, int kH,
int dW, int dH,
int padW, int padH,
int dilationW, int dilationH)
{
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
THArgCheck(weight->nDimension == 4, 4, "weight tensor must be 4D (nOutputPlane,nInputPlane,kH,kW)");
THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero");
THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero");
// Params
int nInputPlane = weight->size[1];
int nOutputPlane = weight->size[0];
int batch = 1;
if (input->nDimension == 3) {
// Force batch
batch = 0;
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
}
long inputWidth = input->size[3];
long inputHeight = input->size[2];
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
// Batch size + input planes
long batchSize = input->size[0];
// Resize output
THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth);
// Resize temporary columns
THTensor_(resize2d)(gradColumns, nInputPlane*kW*kH, outputHeight*outputWidth);
THTensor_(zero)(gradColumns);
// Helpers
THTensor *gradInput_n = THTensor_(new)();
THTensor *gradOutput_n = THTensor_(new)();
// For each elt in batch, do:
for (int elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per sample:
THTensor_(select)(gradInput_n, gradInput, 0, elt);
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
// M,N,K are dims of matrix A and B
long m = nInputPlane*kW*kH;
long n = gradColumns->size[1];
long k = nOutputPlane;
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
'n', 't',
n, m, k,
1,
THTensor_(data)(gradOutput_n), n,
THTensor_(data)(weight), m,
0,
THTensor_(data)(gradColumns), n
);
// Unpack columns back into input:
THNN_(col2im)(
THTensor_(data)(gradColumns),
nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
dilationH, dilationW,
THTensor_(data)(gradInput_n)
);
}
// Free
THTensor_(free)(gradInput_n);
THTensor_(free)(gradOutput_n);
// Resize output
if (batch == 0) {
THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
}
}
void THNN_(SpatialDilatedConvolution_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *columns,
THTensor *ones,
int kW, int kH,
int dW, int dH,
int padW, int padH,
int dilationW, int dilationH,
real scale)
{
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
THArgCheck(gradWeight->nDimension == 4, 4, "gradWeight tensor must be 4D (nOutputPlane,nInputPlane,kH,kW)");
THArgCheck(!gradBias || gradWeight->size[0] == gradBias->size[0], 4, "nOutputPlane mismatch in gradWeight and gradBias");
THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");
// Params
int nInputPlane = gradWeight->size[1];
int nOutputPlane = gradWeight->size[0];
int batch = 1;
if (input->nDimension == 3) {
// Force batch
batch = 0;
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
}
long inputWidth = input->size[3];
long inputHeight = input->size[2];
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
// Batch size + input planes
long batchSize = input->size[0];
// Define a buffer of ones, for bias accumulation
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
// Resize plane and fill with ones...
THTensor_(resize2d)(ones, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
}
// Resize temporary columns
THTensor_(resize2d)(columns, nInputPlane*kW*kH, outputHeight*outputWidth);
// Helpers
THTensor *input_n = THTensor_(new)();
THTensor *gradOutput_n = THTensor_(new)();
// For each elt in batch, do:
for (int elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per output:
THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
// Extract columns:
THNN_(im2col)(
THTensor_(data)(input_n),
nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
dilationH, dilationW,
THTensor_(data)(columns)
);
// M,N,K are dims of matrix A and B
long m = nOutputPlane;
long n = nInputPlane*kW*kH;
long k = columns->size[1];
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
't', 'n',
n, m, k,
scale,
THTensor_(data)(columns), k,
THTensor_(data)(gradOutput_n), k,
1,
THTensor_(data)(gradWeight), n
);
// Do Bias:
// M,N,K are dims of matrix A and B
long m_ = nOutputPlane;
long k_ = outputHeight * outputWidth;
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
if (gradBias) {
THBlas_(gemv)(
't',
k_, m_,
scale,
THTensor_(data)(gradOutput_n), k_,
THTensor_(data)(ones), 1,
1,
THTensor_(data)(gradBias), 1
);
}
}
// Free
THTensor_(free)(input_n);
THTensor_(free)(gradOutput_n);
// Resize
if (batch == 0) {
THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
}
}
#endif

View File

@ -0,0 +1,251 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialFractionalMaxPooling.c"
#else
static long* THNN_(SpatialFractionalMaxPooling_generateIntervals)(
real sample,
long inputSize,
long outputSize,
int poolSize) {
real alpha = (real) (inputSize - poolSize) / (real) (outputSize - 1);
long* sequence = (long*) THAlloc(sizeof(long) * outputSize);
long i;
for (i = 0; i < outputSize - 1; ++i) {
sequence[i] =
(long) ((i + sample) * alpha) - (long) (sample * alpha);
}
sequence[outputSize - 1] = inputSize - poolSize;
return sequence;
}
static void THNN_(SpatialFractionalMaxPooling_updateOutput_frame)(
real* input,
real* output,
real* indices,
real* randomSamples,
long numPlanes,
long inputW, long inputH,
long outputW, long outputH,
int poolSizeW, int poolSizeH) {
long plane;
#pragma omp parallel for private(plane)
for (plane = 0; plane < numPlanes; ++plane) {
/* each plane contains 2 random samples, one for W and one for H */
real* randomSamplesForPlane = randomSamples + plane * 2;
/* Generate interval sequence */
long* sequenceW =
THNN_(SpatialFractionalMaxPooling_generateIntervals)(
randomSamplesForPlane[0], inputW, outputW, poolSizeW);
long* sequenceH =
THNN_(SpatialFractionalMaxPooling_generateIntervals)(
randomSamplesForPlane[1], inputH, outputH, poolSizeH);
/* loop over output */
long h, w;
real* inputForPlane = input + plane * inputW * inputH;
real* outputForPlane = output + plane * outputW * outputH;
real* indicesForPlane = indices + plane * outputW * outputH;
for (h = 0; h < outputH; ++h) {
long inputHStart = sequenceH[h];
for (w = 0; w < outputW; ++w) {
long inputWStart = sequenceW[w];
real maxVal = -THInf;
long maxIndex = -1;
long h2, w2;
for (h2 = inputHStart; h2 < inputHStart + poolSizeH; ++h2) {
for (w2 = inputWStart; w2 < inputWStart + poolSizeW; ++w2) {
THAssert(h2 >= 0 && h2 < inputH);
THAssert(w2 >= 0 && w2 < inputW);
long planeIndex = h2 * inputW + w2;
real val = inputForPlane[planeIndex];
if (val > maxVal) {
maxVal = val;
maxIndex = planeIndex;
}
}
}
THAssert(maxVal != -THInf);
THAssert(maxIndex != -1);
outputForPlane[h * outputW + w] = maxVal;
/* +1 to lua index */
indicesForPlane[h * outputW + w] = (real) maxIndex + TH_INDEX_BASE;
}
}
THFree(sequenceW);
THFree(sequenceH);
}
}
void THNN_(SpatialFractionalMaxPooling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
int outputW, int outputH,
int poolSizeW, int poolSizeH,
THTensor *indices,
THTensor *randomSamples) {
long numBatch = 1;
int planeDim = 0;
int heightDim = 1;
int widthDim = 2;
long numInputDims = THTensor_(nDimension)(input);
THArgCheck(numInputDims == 3 || numInputDims == 4, 2,
"3D or 4D (batch mode) tensor expected");
if (numInputDims == 4) {
numBatch = THTensor_(size)(input, 0);
planeDim++;
heightDim++;
widthDim++;
}
/* sizes */
long numPlanes = THTensor_(size)(input, planeDim);
long inputH = THTensor_(size)(input, heightDim);
long inputW = THTensor_(size)(input, widthDim);
THArgCheck(outputH + poolSizeH - 1 < inputH, 7,
"poolSizeH too large relative to input height");
THArgCheck(outputW + poolSizeW - 1 < inputW, 6,
"poolSizeW too large relative to input width");
/* get contiguous input */
input = THTensor_(newContiguous)(input);
if (numInputDims == 3) {
/* resize output */
THTensor_(resize3d)(output, numPlanes, outputH, outputW);
/* indices will contain the locations for each output point */
THTensor_(resize3d)(indices, numPlanes, outputH, outputW);
THNN_(SpatialFractionalMaxPooling_updateOutput_frame)(
THTensor_(data)(input),
THTensor_(data)(output),
THTensor_(data)(indices),
THTensor_(data)(randomSamples),
numPlanes, inputW, inputH, outputW, outputH, poolSizeW, poolSizeH);
} else {
THTensor_(resize4d)(output, numBatch, numPlanes, outputH, outputW);
/* indices will contain the locations for each output point */
THTensor_(resize4d)(indices, numBatch, numPlanes, outputH, outputW);
long batch;
#pragma omp parallel for private(batch)
for (batch = 0; batch < numBatch; ++batch) {
THNN_(SpatialFractionalMaxPooling_updateOutput_frame)(
THTensor_(data)(input) + batch * numPlanes * inputH * inputW,
THTensor_(data)(output) + batch * numPlanes * outputH * outputW,
THTensor_(data)(indices) + batch * numPlanes * outputH * outputW,
THTensor_(data)(randomSamples) + batch * numPlanes * 2,
numPlanes, inputW, inputH, outputW, outputH, poolSizeW, poolSizeH);
}
}
/* cleanup */
THTensor_(free)(input);
}
static void THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)(
real* gradInput,
real* gradOutput,
real* indices,
long numPlanes,
long inputW, long inputH,
long outputW, long outputH) {
long plane;
#pragma omp parallel for private(plane)
for (plane = 0; plane < numPlanes; plane++) {
real* gradInputForPlane = gradInput + plane * inputW * inputH;
real* gradOutputForPlane = gradOutput + plane * outputW * outputH;
real* indicesForPlane = indices + plane * outputW * outputH;
long h, w;
for (h = 0; h < outputH; ++h) {
for (w = 0; w < outputW; ++w) {
long outputIndex = h * outputW + w;
long index = indicesForPlane[outputIndex] - TH_INDEX_BASE;
THAssert(index >= 0 && index < inputW * inputH);
gradInputForPlane[index] += gradOutputForPlane[outputIndex];
}
}
}
}
void THNN_(SpatialFractionalMaxPooling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
int outputW, int outputH,
int poolSizeW, int poolSizeH,
THTensor *indices) {
long numBatch = 1;
int planeDim = 0;
int heightDim = 1;
int widthDim = 2;
long numInputDims = THTensor_(nDimension)(input);
if (numInputDims == 4) {
numBatch = THTensor_(size)(input, 0);
planeDim = 1;
heightDim++;
widthDim++;
}
/* sizes */
long numPlanes = THTensor_(size)(input, planeDim);
long inputH = THTensor_(size)(input, heightDim);
long inputW = THTensor_(size)(input, widthDim);
THArgCheck(outputW == THTensor_(size)(gradOutput, widthDim), 3,
"gradOutput width unexpected");
THArgCheck(outputH == THTensor_(size)(gradOutput, heightDim), 3,
"gradOutput height unexpected");
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
/* backprop */
if (numInputDims == 3) {
THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)(
THTensor_(data)(gradInput),
THTensor_(data)(gradOutput),
THTensor_(data)(indices),
numPlanes, inputW, inputH, outputW, outputH);
} else {
long batch;
#pragma omp parallel for private(batch)
for (batch = 0; batch < numBatch; ++batch) {
THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)(
THTensor_(data)(gradInput) + batch * numPlanes * inputH * inputW,
THTensor_(data)(gradOutput) + batch * numPlanes * outputH * outputW,
THTensor_(data)(indices) + batch * numPlanes * outputH * outputW,
numPlanes, inputW, inputH, outputW, outputH);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,385 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialFullConvolution.c"
#else
static void THNN_(im2col)(const real* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
real* data_col) {
const int height_col = (height + 2 * pad_h -
(dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int width_col = (width + 2 * pad_w -
(dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
const int channels_col = channels * kernel_h * kernel_w;
for (int c_col = 0; c_col < channels_col; ++c_col) {
int w_offset = c_col % kernel_w;
int h_offset = (c_col / kernel_w) % kernel_h;
int c_im = c_col / kernel_h / kernel_w;
for (int h_col = 0; h_col < height_col; ++h_col) {
for (int w_col = 0; w_col < width_col; ++w_col) {
int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
data_col[(c_col * height_col + h_col) * width_col + w_col] =
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
data_im[(c_im * height + h_im) * width + w_im] : 0;
}
}
}
}
static void THNN_(col2im)(const real* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
real* data_im) {
memset(data_im, 0, sizeof(real) * height * width * channels);
const int height_col = (height + 2 * pad_h -
(dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int width_col = (width + 2 * pad_w -
(dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
const int channels_col = channels * kernel_h * kernel_w;
for (int c_col = 0; c_col < channels_col; ++c_col) {
int w_offset = c_col % kernel_w;
int h_offset = (c_col / kernel_w) % kernel_h;
int c_im = c_col / kernel_h / kernel_w;
for (int h_col = 0; h_col < height_col; ++h_col) {
for (int w_col = 0; w_col < width_col; ++w_col) {
int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
data_im[(c_im * height + h_im) * width + w_im] +=
data_col[(c_col * height_col + h_col) * width_col + w_col];
}
}
}
}
void THNN_(SpatialFullConvolution_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
THTensor *columns,
THTensor *ones,
int kW, int kH,
int dW, int dH,
int padW, int padH,
int adjW, int adjH)
{
int nInputPlane = THTensor_(size)(weight,0);
int nOutputPlane = THTensor_(size)(weight,1);
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
int batch = 1;
if (input->nDimension == 3) {
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
// Force batch
batch = 0;
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
} else {
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
}
long inputWidth = input->size[3];
long inputHeight = input->size[2];
long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
// Batch size + input planes
long batchSize = input->size[0];
// Resize output
THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);
// Resize temporary columns
THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);
THTensor_(zero)(columns);
// Define a buffer of ones, for bias accumulation
// Note: this buffer can be shared with other modules, it only ever gets increased,
// and always contains ones.
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
// Resize plane and fill with ones...
THTensor_(resize2d)(ones, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
}
// Helpers
THTensor *input_n = THTensor_(new)();
THTensor *output_n = THTensor_(new)();
int elt;
// For each elt in batch, do:
for (elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per output:
THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(output_n, output, 0, elt);
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
long m = weight->size[1] * weight->size[2] * weight->size[3];
long n = columns->size[1];
long k = weight->size[0];
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
'n', 't',
n, m, k,
1,
THTensor_(data)(input_n), n,
THTensor_(data)(weight), m,
0,
THTensor_(data)(columns), n
);
// Unpack columns back into input:
THNN_(col2im)(
THTensor_(data)(columns),
nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
1, 1,
THTensor_(data)(output_n)
);
// Do Bias after:
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
long m_ = nOutputPlane;
long n_ = outputHeight * outputWidth;
long k_ = 1;
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
if (bias) {
THBlas_(gemm)(
't', 'n',
n_, m_, k_,
1,
THTensor_(data)(ones), k_,
THTensor_(data)(bias), k_,
1,
THTensor_(data)(output_n), n_
);
}
}
// Free
THTensor_(free)(input_n);
THTensor_(free)(output_n);
// Resize output
if (batch == 0) {
THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
}
}
void THNN_(SpatialFullConvolution_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *gradColumns,
int kW, int kH,
int dW, int dH,
int padW, int padH,
int adjW, int adjH)
{
int nInputPlane = THTensor_(size)(weight,0);
int nOutputPlane = THTensor_(size)(weight,1);
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
int batch = 1;
if (input->nDimension == 3) {
// Force batch
batch = 0;
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
}
long inputWidth = input->size[3];
long inputHeight = input->size[2];
long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
// Batch size + input planes
long batchSize = input->size[0];
// Resize output
THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth);
THTensor_(zero)(gradInput);
// Resize temporary columns
THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth);
// Helpers
THTensor *gradInput_n = THTensor_(new)();
THTensor *gradOutput_n = THTensor_(new)();
int elt;
// For each elt in batch, do:
for (elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per sample:
THTensor_(select)(gradInput_n, gradInput, 0, elt);
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
// Extract columns:
THNN_(im2col)(
THTensor_(data)(gradOutput_n),
nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
1, 1,
THTensor_(data)(gradColumns)
);
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
long m = weight->size[0];
long n = gradColumns->size[1];
long k = weight->size[1] * weight->size[2] * weight->size[3];
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
'n', 'n',
n, m, k,
1,
THTensor_(data)(gradColumns), n,
THTensor_(data)(weight), k,
0,
THTensor_(data)(gradInput_n), n
);
}
// Free
THTensor_(free)(gradInput_n);
THTensor_(free)(gradOutput_n);
// Resize output
if (batch == 0) {
THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
}
}
void THNN_(SpatialFullConvolution_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *columns,
THTensor *ones,
int kW, int kH,
int dW, int dH,
int padW, int padH,
int adjW, int adjH,
real scale)
{
int nInputPlane = THTensor_(size)(gradWeight,0);
int nOutputPlane = THTensor_(size)(gradWeight,1);
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");
int batch = 1;
if (input->nDimension == 3) {
// Force batch
batch = 0;
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
}
long inputWidth = input->size[3];
long inputHeight = input->size[2];
long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
// Batch size + input planes
long batchSize = input->size[0];
// Define a buffer of ones, for bias accumulation
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
// Resize plane and fill with ones...
THTensor_(resize2d)(ones, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
}
// Resize temporary columns
THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);
// Helpers
THTensor *input_n = THTensor_(new)();
THTensor *gradOutput_n = THTensor_(new)();
int elt;
// For each elt in batch, do:
for (elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per output:
THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
// Extract columns:
THNN_(im2col)(
THTensor_(data)(gradOutput_n),
nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
1, 1,
THTensor_(data)(columns)
);
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
long n = columns->size[0]; // nOutputPlane * kh * kw
long m = input_n->size[0]; // nInputPlane
long k = columns->size[1]; // inputHeight * inputWidth
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
't', 'n',
n, m, k,
scale,
THTensor_(data)(columns), k,
THTensor_(data)(input_n), k,
1,
THTensor_(data)(gradWeight), n
);
// Do Bias:
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
long m_ = nOutputPlane;
long k_ = outputHeight * outputWidth;
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
if (gradBias) {
THBlas_(gemv)(
't',
k_, m_,
scale,
THTensor_(data)(gradOutput_n), k_,
THTensor_(data)(ones), 1,
1,
THTensor_(data)(gradBias), 1
);
}
}
// Free
THTensor_(free)(input_n);
THTensor_(free)(gradOutput_n);
// Resize
if (batch == 0) {
THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
}
}
#endif

View File

@ -0,0 +1,212 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialFullConvolutionMap.c"
#else
void THNN_(SpatialFullConvolutionMap_updateOutput)(
THNNState *state, THTensor *input, THTensor *output_, THTensor *weight, THTensor *bias,
THTensor *connTable, int nInputPlane, int nOutputPlane,
int dW, int dH)
{
THArgCheck(
weight != NULL && weight->nDimension == 3
&& connTable != NULL && connTable->size[0] == weight->size[0], 4,
"3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
const int kH = (int)weight->size[1];
const int kW = (int)weight->size[2];
THArgCheck(input != NULL && input->nDimension == 3, 2, "3D tensor expected");
THArgCheck(input->size[0] >= nInputPlane, 2, "invalid number of input planes");
THTensor_(resize3d)(
output_, nOutputPlane,
(input->size[1] - 1) * dH + kH,
(input->size[2] - 1) * dW + kW
);
/* contiguous */
input = THTensor_(newContiguous)(input);
THTensor* output = THTensor_(newContiguous)(output_);
/* get raw pointers */
real *input_data = THTensor_(data)(input);
real *output_data = THTensor_(data)(output);
real *weight_data = THTensor_(data)(weight);
real *bias_data = THTensor_(data)(bias);
real *connTable_data = THTensor_(data)(connTable);
/* and dims */
const long input_h = input->size[1];
const long input_w = input->size[2];
const long output_h = output->size[1];
const long output_w = output->size[2];
const long weight_h = weight->size[1];
const long weight_w = weight->size[2];
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nOutputPlane; p++)
{
/* add bias */
real *ptr_output = output_data + p*output_w*output_h;
long j;
int nweight;
long k;
for (j = 0; j < output_h*output_w; j++)
ptr_output[j] = bias_data[p];
/* convolve all maps */
nweight = connTable->size[0];
for (k = 0; k < nweight; k++)
{
/* get offsets for input/output */
int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
if (o == p)
{
THTensor_(fullConv2Dptr)(
output_data + o*output_w*output_h,
1.0,
input_data + i*input_w*input_h, input_h, input_w,
weight_data + k*weight_w*weight_h, weight_h, weight_w,
dH, dW
);
}
}
}
/* clean up */
THTensor_(free)(input);
THTensor_(freeCopyTo)(output, output_);
}
void THNN_(SpatialFullConvolutionMap_updateGradInput)(
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput_, THTensor *weight, THTensor *bias,
THTensor *connTable, int nInputPlane, int nOutputPlane,
int dW, int dH)
{
THArgCheck(
weight != NULL && weight->nDimension == 3
&& connTable != NULL && connTable->size[0] == weight->size[0], 5,
"3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
/* contiguous */
THTensor* gradInput = THTensor_(newContiguous)(gradInput_);
gradOutput = THTensor_(newContiguous)(gradOutput);
/* Resize/Zero */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
/* get raw pointers */
real *gradInput_data = THTensor_(data)(gradInput);
real *gradOutput_data = THTensor_(data)(gradOutput);
real *weight_data = THTensor_(data)(weight);
real *connTable_data = THTensor_(data)(connTable);
/* and dims */
const long input_h = input->size[1];
const long input_w = input->size[2];
const long output_h = gradOutput->size[1];
const long output_w = gradOutput->size[2];
const long kH = weight->size[1];
const long kW = weight->size[2];
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nInputPlane; p++)
{
long k;
/* backward all */
int nkernel = connTable->size[0];
for (k = 0; k < nkernel; k++)
{
int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
if (i == p)
{
/* gradient to input */
THTensor_(validXCorr2Dptr)(
gradInput_data + i*input_w*input_h,
1.0,
gradOutput_data + o*output_w*output_h, output_h, output_w,
weight_data + k*kW*kH, kH, kW,
dH, dW
);
}
}
}
/* clean up */
THTensor_(freeCopyTo)(gradInput, gradInput_);
THTensor_(free)(gradOutput);
}
void THNN_(SpatialFullConvolutionMap_accGradParameters)(
THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias,
THTensor *connTable, int nInputPlane, int nOutputPlane,
int dW, int dH, real scale)
{
THArgCheck(
gradWeight != NULL && gradWeight->nDimension == 3
&& connTable != NULL && connTable->size[0] == gradWeight->size[0], 5,
"3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
/* contiguous */
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
/* get raw pointers */
real *input_data = THTensor_(data)(input);
real *gradOutput_data = THTensor_(data)(gradOutput);
real *gradWeight_data = THTensor_(data)(gradWeight);
real *gradBias_data = THTensor_(data)(gradBias);
/* and dims */
const long input_h = input->size[1];
const long input_w = input->size[2];
const long output_h = gradOutput->size[1];
const long output_w = gradOutput->size[2];
const long weight_h = gradWeight->size[1];
const long weight_w = gradWeight->size[2];
/* gradients wrt bias */
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nOutputPlane; k++)
{
real *ptr_gradOutput = gradOutput_data + k*output_w*output_h;
long l;
for (l = 0; l < output_h*output_w; l++)
gradBias_data[k] += scale*ptr_gradOutput[l];
}
/* gradients wrt weight */
int nkernel = connTable->size[0];
#pragma omp parallel for private(k)
for (k = 0; k < nkernel; k++)
{
int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE;
int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE;
/* gradient to kernel */
THTensor_(validXCorr2DRevptr)(
gradWeight_data + k*weight_w*weight_h,
scale,
gradOutput_data + o*output_w*output_h, output_h, output_w,
input_data + i*input_w*input_h, input_h, input_w,
dH, dW
);
}
/* clean up */
THTensor_(free)(input);
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,300 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialMaxPooling.c"
#else
static void THNN_(SpatialMaxPooling_updateOutput_frame)(
real *input_p,
real *output_p,
real *ind_p,
long nslices,
long iwidth,
long iheight,
long owidth,
long oheight,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
/* loop over output */
long i, j;
real *ip = input_p + k*iwidth*iheight;
for(i = 0; i < oheight; i++)
{
for(j = 0; j < owidth; j++)
{
long hstart = i * dH - padH;
long wstart = j * dW - padW;
long hend = fminf(hstart + kH, iheight);
long wend = fminf(wstart + kW, iwidth);
hstart = fmaxf(hstart, 0);
wstart = fmaxf(wstart, 0);
/* local pointers */
real *op = output_p + k*owidth*oheight + i*owidth + j;
real *indp = ind_p + k*owidth*oheight + i*owidth + j;
/* compute local max: */
long maxindex = -1;
real maxval = -THInf;
long tcntr = 0;
long x,y;
for(y = hstart; y < hend; y++)
{
for(x = wstart; x < wend; x++)
{
tcntr = y*iwidth + x;
real val = *(ip + tcntr);
if (val > maxval)
{
maxval = val;
maxindex = tcntr;
}
}
}
/* set output to local max */
*op = maxval;
/* store location of max */
*indp = maxindex + TH_INDEX_BASE;
}
}
}
}
void THNN_(SpatialMaxPooling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *indices,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
bool ceil_mode)
{
int dimw = 2;
int dimh = 1;
long nbatch = 1;
long nslices;
long iheight;
long iwidth;
long oheight;
long owidth;
real *input_data;
real *output_data;
real *indices_data;
THArgCheck(input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
dimh++;
}
THArgCheck(input->size[dimw] >= kW - padW && input->size[dimh] >= kH - padH, 2, "input image smaller than kernel size");
THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size");
/* sizes */
nslices = input->size[dimh-1];
iheight = input->size[dimh];
iwidth = input->size[dimw];
if (ceil_mode)
{
oheight = (long)(ceil((float)(iheight - kH + 2*padH) / dH)) + 1;
owidth = (long)(ceil((float)(iwidth - kW + 2*padW) / dW)) + 1;
}
else
{
oheight = (long)(floor((float)(iheight - kH + 2*padH) / dH)) + 1;
owidth = (long)(floor((float)(iwidth - kW + 2*padW) / dW)) + 1;
}
if (padW || padH)
{
// ensure that the last pooling starts inside the image
if ((oheight - 1)*dH >= iheight + padH)
--oheight;
if ((owidth - 1)*dW >= iwidth + padW)
--owidth;
}
/* get contiguous input */
input = THTensor_(newContiguous)(input);
/* resize output */
if (input->nDimension == 3)
{
THTensor_(resize3d)(output, nslices, oheight, owidth);
/* indices will contain the locations for each output point */
THTensor_(resize3d)(indices, nslices, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
THNN_(SpatialMaxPooling_updateOutput_frame)(input_data, output_data,
indices_data,
nslices,
iwidth, iheight,
owidth, oheight,
kW, kH, dW, dH,
padW, padH);
}
else
{
long p;
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
/* indices will contain the locations for each output point */
THTensor_(resize4d)(indices, nbatch, nslices, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(SpatialMaxPooling_updateOutput_frame)(input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight,
indices_data+p*nslices*owidth*oheight,
nslices,
iwidth, iheight,
owidth, oheight,
kW, kH, dW, dH,
padW, padH);
}
}
/* cleanup */
THTensor_(free)(input);
}
static void THNN_(SpatialMaxPooling_updateGradInput_frame)(
real *gradInput_p,
real *gradOutput_p,
real *ind_p,
long nslices,
long iwidth,
long iheight,
long owidth,
long oheight,
int dW,
int dH)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
real *ind_p_k = ind_p + k*owidth*oheight;
/* calculate max points */
long i, j;
for(i = 0; i < oheight; i++)
{
for(j = 0; j < owidth; j++)
{
/* retrieve position of max */
long maxp = ind_p_k[i*owidth + j] - TH_INDEX_BASE;
/* update gradient */
gradInput_p_k[maxp] += gradOutput_p_k[i*owidth + j];
}
}
}
}
void THNN_(SpatialMaxPooling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *indices,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
bool ceil_mode)
{
int dimw = 2;
int dimh = 1;
long nbatch = 1;
int nslices;
int iheight;
int iwidth;
int oheight;
int owidth;
real *gradInput_data;
real *gradOutput_data;
real *indices_data;
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
/* sizes */
nslices = input->size[dimh-1];
iheight = input->size[dimh];
iwidth = input->size[dimw];
oheight = gradOutput->size[dimh];
owidth = gradOutput->size[dimw];
/* get raw pointers */
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
indices_data = THTensor_(data)(indices);
/* backprop */
if (input->nDimension == 3)
{
THNN_(SpatialMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
indices_data,
nslices,
iwidth, iheight,
owidth, oheight,
dW, dH);
}
else
{
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(SpatialMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
indices_data+p*nslices*owidth*oheight,
nslices,
iwidth, iheight,
owidth, oheight,
dW, dH);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,223 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialMaxUnpooling.c"
#else
static void THNN_(SpatialMaxUnpooling_updateOutput_frame)(real *input_p, real *output_p,
real *ind_p,
long nslices,
long iwidth, long iheight,
long owidth, long oheight)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
real *output_p_k = output_p + k*owidth*oheight;
real *input_p_k = input_p + k*iwidth*iheight;
real *ind_p_k = ind_p + k*iwidth*iheight;
long i, j, maxp;
for(i = 0; i < iheight; i++)
{
for(j = 0; j < iwidth; j++)
{
maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */
if(maxp<0 || maxp>=owidth*oheight){
THError("invalid max index %d, owidth= %d, oheight= %d",maxp,owidth,oheight);
}
output_p_k[maxp] = input_p_k[i*iwidth + j]; /* update output */
}
}
}
}
void THNN_(SpatialMaxUnpooling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *indices,
int owidth, int oheight)
{
int dimw = 2;
int dimh = 1;
int nbatch = 1;
int nslices;
int iheight;
int iwidth;
real *input_data;
real *output_data;
real *indices_data;
THArgCheck(input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
if (!THTensor_(isSameSizeAs)(input, indices)){
THError("Invalid input size w.r.t current indices size");
}
if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
dimh++;
}
/* sizes */
nslices = input->size[dimh-1];
iheight = input->size[dimh];
iwidth = input->size[dimw];
/* get contiguous input and indices */
input = THTensor_(newContiguous)(input);
indices = THTensor_(newContiguous)(indices);
/* resize output */
if (input->nDimension == 3)
{
THTensor_(resize3d)(output, nslices, oheight, owidth);
THTensor_(zero)(output);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data, output_data,
indices_data,
nslices,
iwidth, iheight,
owidth, oheight);
}
else
{
long p;
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
THTensor_(zero)(output);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight,
indices_data+p*nslices*iwidth*iheight,
nslices,
iwidth, iheight,
owidth, oheight);
}
}
/* cleanup */
THTensor_(free)(input);
THTensor_(free)(indices);
}
static void THNN_(SpatialMaxUnpooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p,
real *ind_p,
long nslices,
long iwidth, long iheight,
long owidth, long oheight)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
real *ind_p_k = ind_p + k*iwidth*iheight;
long i, j, maxp;
for(i = 0; i < iheight; i++)
{
for(j = 0; j < iwidth; j++)
{
maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */
if(maxp<0 || maxp>=owidth*oheight){
THError("invalid max index %d, owidth= %d, oheight= %d",maxp,owidth,oheight);
}
gradInput_p_k[i*iwidth + j] = gradOutput_p_k[maxp]; /* update gradient */
}
}
}
}
void THNN_(SpatialMaxUnpooling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *indices,
int owidth, int oheight)
{
int dimw = 2;
int dimh = 1;
int nbatch = 1;
int nslices;
int iheight;
int iwidth;
real *gradInput_data;
real *gradOutput_data;
real *indices_data;
if (!THTensor_(isSameSizeAs)(input, indices)){
THError("Invalid input size w.r.t current indices size");
}
/* get contiguous gradOutput and indices */
gradOutput = THTensor_(newContiguous)(gradOutput);
indices = THTensor_(newContiguous)(indices);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
/* sizes */
nslices = input->size[dimh-1];
iheight = input->size[dimh];
iwidth = input->size[dimw];
if(owidth!=gradOutput->size[dimw] || oheight!=gradOutput->size[dimh]){
THError("Inconsistent gradOutput size. oheight= %d, owidth= %d, gradOutput: %dx%d", oheight, owidth,gradOutput->size[dimh],gradOutput->size[dimw]);
}
/* get raw pointers */
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
indices_data = THTensor_(data)(indices);
/* backprop */
if (input->nDimension == 3)
{
THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
indices_data,
nslices,
iwidth, iheight,
owidth, oheight);
}
else
{
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
indices_data+p*nslices*iwidth*iheight,
nslices,
iwidth, iheight,
owidth, oheight);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
THTensor_(free)(indices);
}
#endif

View File

@ -0,0 +1,255 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialReflectionPadding.c"
#else
static void THNN_(SpatialReflectionPadding_updateOutput_frame)(
real *input_p, real *output_p,
long nslices,
long iwidth, long iheight,
long owidth, long oheight,
int pad_l, int pad_r,
int pad_t, int pad_b)
{
int iStartX = fmax(0, -pad_l);
int iStartY = fmax(0, -pad_t);
int oStartX = fmax(0, pad_l);
int oStartY = fmax(0, pad_t);
long k, ip_x, ip_y;
#pragma omp parallel for private(k, ip_x, ip_y)
for (k = 0; k < nslices; k++)
{
long i, j;
for (i = 0; i < oheight; i++) {
for (j = 0; j < owidth; j++) {
if (j < pad_l) {
ip_x = pad_l * 2 - j;
} else if (j >= pad_l && j < iwidth + pad_l) {
ip_x = j;
} else {
ip_x = (iwidth + pad_l - 1) * 2 - j;
}
ip_x = ip_x - oStartX + iStartX;
if (i < pad_t) {
ip_y = pad_t * 2 - i;
} else if (i >= pad_t && i < iheight + pad_t) {
ip_y = i;
} else {
ip_y = (iheight + pad_t - 1) * 2 - i;
}
ip_y = ip_y - oStartY + iStartY;
real *dest_p = output_p + k*owidth*oheight + i * owidth + j;
real *src_p = input_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
*dest_p = *src_p;
}
}
}
}
void THNN_(SpatialReflectionPadding_updateOutput)(THNNState *state,
THTensor *input,
THTensor *output,
int pad_l, int pad_r,
int pad_t, int pad_b)
{
int dimw = 2;
int dimh = 1;
int dimslices = 0;
long nbatch = 1;
long nslices;
long iheight;
long iwidth;
long oheight;
long owidth;
real *input_data;
real *output_data;
THArgCheck(input->nDimension == 3 ||
input->nDimension == 4 , 2, "input must be 3 or 4-dimensional");
if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
dimh++;
dimslices++;
}
/* sizes */
nslices = input->size[dimslices];
iheight = input->size[dimh];
iwidth = input->size[dimw];
oheight = iheight + pad_t + pad_b;
owidth = iwidth + pad_l + pad_r;
THArgCheck(owidth >= 1 || oheight >= 1 , 2, "input is too small");
/* get contiguous input */
input = THTensor_(newContiguous)(input);
/* resize output */
if (input->nDimension == 3)
{
THTensor_(resize3d)(output, nslices, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
THNN_(SpatialReflectionPadding_updateOutput_frame)(input_data, output_data,
nslices,
iwidth, iheight,
owidth, oheight,
pad_l, pad_r,
pad_t, pad_b);
}
else
{
long p;
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(SpatialReflectionPadding_updateOutput_frame)(
input_data+p*nslices*iwidth*iheight,
output_data+p*nslices*owidth*oheight,
nslices,
iwidth, iheight,
owidth, oheight,
pad_l, pad_r,
pad_t, pad_b);
}
}
/* cleanup */
THTensor_(free)(input);
}
static void THNN_(SpatialReflectionPadding_updateGradInput_frame)(
real *ginput_p, real *goutput_p,
long nslices,
long iwidth, long iheight,
long owidth, long oheight,
int pad_l, int pad_r,
int pad_t, int pad_b)
{
int iStartX = fmax(0, -pad_l);
int iStartY = fmax(0, -pad_t);
int oStartX = fmax(0, pad_l);
int oStartY = fmax(0, pad_t);
long k, ip_x, ip_y;
#pragma omp parallel for private(k, ip_x, ip_y)
for (k = 0; k < nslices; k++)
{
long i, j;
for (i = 0; i < oheight; i++) {
for (j = 0; j < owidth; j++) {
if (j < pad_l) {
ip_x = pad_l * 2 - j;
} else if (j >= pad_l && j < iwidth + pad_l) {
ip_x = j;
} else {
ip_x = (iwidth + pad_l - 1) * 2 - j;
}
ip_x = ip_x - oStartX + iStartX;
if (i < pad_t) {
ip_y = pad_t * 2 - i;
} else if (i >= pad_t && i < iheight + pad_t) {
ip_y = i;
} else {
ip_y = (iheight + pad_t - 1) * 2 - i;
}
ip_y = ip_y - oStartY + iStartY;
real *src_p = goutput_p + k*owidth*oheight + i * owidth + j;
real *dest_p = ginput_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
*dest_p += *src_p;
}
}
}
}
void THNN_(SpatialReflectionPadding_updateGradInput)(THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
int pad_l, int pad_r,
int pad_t, int pad_b)
{
int dimw = 2;
int dimh = 1;
int dimslices = 0;
long nbatch = 1;
long nslices;
long iheight;
long iwidth;
long oheight;
long owidth;
if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
dimh++;
dimslices++;
}
/* sizes */
nslices = input->size[dimslices];
iheight = input->size[dimh];
iwidth = input->size[dimw];
oheight = iheight + pad_t + pad_b;
owidth = iwidth + pad_l + pad_r;
THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
"gradOutput width unexpected");
THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
"gradOutput height unexpected");
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
/* backprop */
if (input->nDimension == 3) {
THNN_(SpatialReflectionPadding_updateGradInput_frame)(
THTensor_(data)(gradInput),
THTensor_(data)(gradOutput),
nslices,
iwidth, iheight,
owidth, oheight,
pad_l, pad_r,
pad_t, pad_b);
} else {
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++) {
THNN_(SpatialReflectionPadding_updateGradInput_frame)(
THTensor_(data)(gradInput) + p * nslices * iheight * iwidth,
THTensor_(data)(gradOutput) + p * nslices * oheight * owidth,
nslices,
iwidth, iheight,
owidth, oheight,
pad_l, pad_r,
pad_t, pad_b);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,254 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialReplicationPadding.c"
#else
static void THNN_(SpatialReplicationPadding_updateOutput_frame)(
real *input_p, real *output_p,
long nslices,
long iwidth, long iheight,
long owidth, long oheight,
int pad_l, int pad_r,
int pad_t, int pad_b)
{
int iStartX = fmax(0, -pad_l);
int iStartY = fmax(0, -pad_t);
int oStartX = fmax(0, pad_l);
int oStartY = fmax(0, pad_t);
long k, ip_x, ip_y;
#pragma omp parallel for private(k, ip_x, ip_y)
for (k = 0; k < nslices; k++)
{
long i, j;
for (i = 0; i < oheight; i++) {
for (j = 0; j < owidth; j++) {
if (j < pad_l) {
ip_x = pad_l;
} else if (j >= pad_l && j < iwidth + pad_l) {
ip_x = j;
} else {
ip_x = iwidth + pad_l - 1;
}
ip_x = ip_x - oStartX + iStartX;
if (i < pad_t) {
ip_y = pad_t;
} else if (i >= pad_t && i < iheight + pad_t) {
ip_y = i;
} else {
ip_y = iheight + pad_t - 1;
}
ip_y = ip_y - oStartY + iStartY;
real *dest_p = output_p + k*owidth*oheight + i * owidth + j;
real *src_p = input_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
*dest_p = *src_p;
}
}
}
}
void THNN_(SpatialReplicationPadding_updateOutput)(THNNState *state,
THTensor *input,
THTensor *output,
int pad_l, int pad_r,
int pad_t, int pad_b)
{
int dimw = 2;
int dimh = 1;
int dimslices = 0;
long nbatch = 1;
long nslices;
long iheight;
long iwidth;
long oheight;
long owidth;
real *input_data;
real *output_data;
THArgCheck(input->nDimension == 3 || input->nDimension == 4,
2, "input must be 3 or 4-dimensional");
if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
dimh++;
dimslices++;
}
/* sizes */
nslices = input->size[dimslices];
iheight = input->size[dimh];
iwidth = input->size[dimw];
oheight = iheight + pad_t + pad_b;
owidth = iwidth + pad_l + pad_r;
THArgCheck(owidth >= 1 || oheight >= 1 , 2, "input is too small");
/* get contiguous input */
input = THTensor_(newContiguous)(input);
/* resize output */
if (input->nDimension == 3)
{
THTensor_(resize3d)(output, nslices, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
THNN_(SpatialReplicationPadding_updateOutput_frame)(input_data, output_data,
nslices,
iwidth, iheight,
owidth, oheight,
pad_l, pad_r,
pad_t, pad_b);
}
else
{
long p;
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(SpatialReplicationPadding_updateOutput_frame)(
input_data+p*nslices*iwidth*iheight,
output_data+p*nslices*owidth*oheight,
nslices,
iwidth, iheight,
owidth, oheight,
pad_l, pad_r,
pad_t, pad_b);
}
}
/* cleanup */
THTensor_(free)(input);
}
static void THNN_(SpatialReplicationPadding_updateGradInput_frame)(
real *ginput_p, real *goutput_p,
long nslices,
long iwidth, long iheight,
long owidth, long oheight,
int pad_l, int pad_r,
int pad_t, int pad_b)
{
int iStartX = fmax(0, -pad_l);
int iStartY = fmax(0, -pad_t);
int oStartX = fmax(0, pad_l);
int oStartY = fmax(0, pad_t);
long k, ip_x, ip_y;
#pragma omp parallel for private(k, ip_x, ip_y)
for (k = 0; k < nslices; k++)
{
long i, j;
for (i = 0; i < oheight; i++) {
for (j = 0; j < owidth; j++) {
if (j < pad_l) {
ip_x = pad_l;
} else if (j >= pad_l && j < iwidth + pad_l) {
ip_x = j;
} else {
ip_x = iwidth + pad_l - 1;
}
ip_x = ip_x - oStartX + iStartX;
if (i < pad_t) {
ip_y = pad_t;
} else if (i >= pad_t && i < iheight + pad_t) {
ip_y = i;
} else {
ip_y = iheight + pad_t - 1;
}
ip_y = ip_y - oStartY + iStartY;
real *src_p = goutput_p + k*owidth*oheight + i * owidth + j;
real *dest_p = ginput_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
*dest_p += *src_p;
}
}
}
}
void THNN_(SpatialReplicationPadding_updateGradInput)(THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
int pad_l, int pad_r,
int pad_t, int pad_b)
{
int dimw = 2;
int dimh = 1;
int dimslices = 0;
long nbatch = 1;
long nslices;
long iheight;
long iwidth;
long oheight;
long owidth;
if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
dimh++;
dimslices++;
}
/* sizes */
nslices = input->size[dimslices];
iheight = input->size[dimh];
iwidth = input->size[dimw];
oheight = iheight + pad_t + pad_b;
owidth = iwidth + pad_l + pad_r;
THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
"gradOutput width unexpected");
THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
"gradOutput height unexpected");
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
/* backprop */
if (input->nDimension == 3) {
THNN_(SpatialReplicationPadding_updateGradInput_frame)(
THTensor_(data)(gradInput),
THTensor_(data)(gradOutput),
nslices,
iwidth, iheight,
owidth, oheight,
pad_l, pad_r,
pad_t, pad_b);
} else {
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++) {
THNN_(SpatialReplicationPadding_updateGradInput_frame)(
THTensor_(data)(gradInput) + p * nslices * iheight * iwidth,
THTensor_(data)(gradOutput) + p * nslices * oheight * owidth,
nslices,
iwidth, iheight,
owidth, oheight,
pad_l, pad_r,
pad_t, pad_b);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,267 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialSubSampling.c"
#else
void THNN_(SpatialSubSampling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
int kW, int kH,
int dW, int dH)
{
real *weight_data = THTensor_(data)(weight);
real *bias_data = THTensor_(data)(bias);
real *output_data;
real *input_data;
int dimw = 2;
int dimh = 1;
long nbatch = 1;
long inputWidth;
long inputHeight;
long outputWidth;
long outputHeight;
int nInputPlane = THTensor_(size)(weight,0);
long k;
THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
inputWidth = input->size[dimw];
inputHeight = input->size[dimh];
outputWidth = (inputWidth - kW) / dW + 1;
outputHeight = (inputHeight - kH) / dH + 1;
THArgCheck(input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes");
THArgCheck(inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size");
if (input->nDimension == 3)
THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
else
THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth);
input = THTensor_(newContiguous)(input);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
long p;
for(p = 0; p < nbatch; p++)
{
long xx, yy;
/* For all output pixels... */
real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight;
/* Get the good mask for (k,i) (k out, i in) */
real the_weight = weight_data[k];
/* Initialize to the bias */
real z = bias_data[k];
long i;
for(i = 0; i < outputWidth*outputHeight; i++)
ptr_output[i] = z;
for(yy = 0; yy < outputHeight; yy++)
{
for(xx = 0; xx < outputWidth; xx++)
{
/* Compute the mean of the input image... */
real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
real sum = 0;
long kx, ky;
for(ky = 0; ky < kH; ky++)
{
for(kx = 0; kx < kW; kx++)
sum += ptr_input[kx];
ptr_input += inputWidth; /* next input line */
}
/* Update output */
*ptr_output++ += the_weight*sum;
}
}
}
}
THTensor_(free)(input);
}
void THNN_(SpatialSubSampling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
int kW, int kH,
int dW, int dH)
{
int dimw = 2;
int dimh = 1;
long nbatch = 1;
long inputWidth;
long inputHeight;
long outputWidth;
long outputHeight;
int nInputPlane = THTensor_(size)(weight,0);
real *weight_data;
real *gradOutput_data;
real *input_data, *gradInput_data;
long k;
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
inputWidth = input->size[dimw];
inputHeight = input->size[dimh];
outputWidth = (inputWidth - kW) / dW + 1;
outputHeight = (inputHeight - kH) / dH + 1;
weight_data = THTensor_(data)(weight);
gradOutput_data = THTensor_(data)(gradOutput);
input_data = THTensor_(data)(input);
THTensor_(resizeAs)(gradInput, input);
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
long p;
for(p = 0; p < nbatch; p++)
{
real the_weight = weight_data[k];
real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
long xx, yy;
real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
long i;
for(i=0; i<inputWidth*inputHeight; i++)
ptr_gi[i] = 0.0;
for(yy = 0; yy < outputHeight; yy++)
{
for(xx = 0; xx < outputWidth; xx++)
{
real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
real z = *ptr_gradOutput++ * the_weight;
long kx, ky;
for(ky = 0; ky < kH; ky++)
{
for(kx = 0; kx < kW; kx++)
ptr_gradInput[kx] += z;
ptr_gradInput += inputWidth;
}
}
}
}
}
}
void THNN_(SpatialSubSampling_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
int kW, int kH,
int dW, int dH,
real scale)
{
long nbatch = 1;
long dimw = 2;
long dimh = 1;
long inputWidth;
long inputHeight;
long outputWidth;
long outputHeight;
int nInputPlane = THTensor_(size)(gradWeight,0);
real *gradWeight_data;
real *gradBias_data;
real *gradOutput_data;
real *input_data;
long k;
if (input->nDimension == 4) {
dimw++;
dimh++;
nbatch = input->size[0];
}
inputWidth = input->size[dimw];
inputHeight = input->size[dimh];
outputWidth = (inputWidth - kW) / dW + 1;
outputHeight = (inputHeight - kH) / dH + 1;
gradWeight_data = THTensor_(data)(gradWeight);
gradBias_data = THTensor_(data)(gradBias);
gradOutput_data = THTensor_(data)(gradOutput);
input = THTensor_(newContiguous)(input);
input_data = THTensor_(data)(input);
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
long p;
for(p = 0; p < nbatch; p++)
{
real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
real sum;
long xx, yy;
long i;
sum = 0;
for(i = 0; i < outputWidth*outputHeight; i++)
sum += ptr_gradOutput[i];
gradBias_data[k] += scale*sum;
sum = 0;
for(yy = 0; yy < outputHeight; yy++)
{
for(xx = 0; xx < outputWidth; xx++)
{
real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
real z = *ptr_gradOutput++;
long kx, ky;
for(ky = 0; ky < kH; ky++)
{
for(kx = 0; kx < kW; kx++)
sum += z * ptr_input[kx];
ptr_input += inputWidth;
}
}
}
gradWeight_data[k] += scale*sum;
}
}
THTensor_(free)(input);
}
#endif

View File

@ -0,0 +1,127 @@
// Adapted from interp.cpp from Caffe util by Pauline Luc
// Originally developed by George Papandreou
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialUpSamplingBilinear.c"
#else
void THNN_(SpatialUpSamplingBilinear_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output){
input = THTensor_(newContiguous)(input);
output = THTensor_(newContiguous)(output);
THTensor_(zero)(output);
real *idata = THTensor_(data)(input);
real *odata = THTensor_(data)(output);
int channels = THTensor_(size)(input, 0) * THTensor_(size)(input, 1);
int height1 = THTensor_(size)(input, 2);
int width1 = THTensor_(size)(input, 3);
int height2 = THTensor_(size)(output, 2);
int width2 = THTensor_(size)(output, 3);
THAssert(height1 > 0 && width1 > 0 && height2 > 0 && width2 > 0);
// special case: just copy
if (height1 == height2 && width1 == width2) {
for (int h2 = 0; h2 < height2; ++h2) {
const int h1 = h2;
for (int w2 = 0; w2 < width2; ++w2) {
const int w1 = w2;
const real* pos1 = &idata[h1 * width1 + w1];
real* pos2 = &odata[h2 * width2 + w2];
for (int c = 0; c < channels; ++c) {
pos2[0] = pos1[0];
pos1 += width1 * height1;
pos2 += width2 * height2;
}
}
}
return;
}
const float rheight =(height2 > 1) ? (float)(height1 - 1)/(height2 - 1) : 0.f;
const float rwidth = (width2 > 1) ? (float)(width1 - 1) / (width2 - 1) : 0.f;
for (int h2 = 0; h2 < height2; ++h2) {
const float h1r = rheight * h2;
const int h1 = h1r;
const int h1p = (h1 < height1 - 1) ? 1 : 0;
const real h1lambda = h1r - h1;
const real h0lambda = (real)1. - h1lambda;
for (int w2 = 0; w2 < width2; ++w2) {
const float w1r = rwidth * w2;
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const real w1lambda = w1r - w1;
const real w0lambda = (real)1. - w1lambda;
const real* pos1 = &idata[h1 * width1 + w1];
real* pos2 = &odata[h2 * width2 + w2];
for (int c = 0; c < channels; ++c) {
pos2[0] = h0lambda * (w0lambda * pos1[0]+ w1lambda * pos1[w1p])
+ h1lambda * (w0lambda * pos1[h1p * width1]
+ w1lambda * pos1[h1p * width1 + w1p]);
pos1 += width1 * height1;
pos2 += width2 * height2;
}
}
}
}
void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
THNNState *state,
THTensor *gradOutput,
THTensor *gradInput){
gradInput = THTensor_(newContiguous)(gradInput);
gradOutput = THTensor_(newContiguous)(gradOutput);
THTensor_(zero)(gradInput);
real *data1 = THTensor_(data)(gradInput);
real *data2 = THTensor_(data)(gradOutput);
int channels = THTensor_(size)(gradInput, 0) * THTensor_(size)(gradInput, 1);
int height1 = THTensor_(size)(gradInput, 2);
int width1 = THTensor_(size)(gradInput, 3);
int height2 = THTensor_(size)(gradOutput, 2);
int width2 = THTensor_(size)(gradOutput, 3);
THAssert(height1 > 0 && width1 > 0 && height2 > 0 && width2 > 0);
// special case: same-size matching grids
if (height1 == height2 && width1 == width2) {
for (int h2 = 0; h2 < height2; ++h2) {
const int h1 = h2;
for (int w2 = 0; w2 < width2; ++w2) {
const int w1 = w2;
real* pos1 = &data1[h1 * width1 + w1];
const real* pos2 = &data2[h2 * width2 + w2];
for (int c = 0; c < channels; ++c) {
pos1[0] += pos2[0];
pos1 += width1 * height1;
pos2 += width2 * height2;
}
}
}
return;
}
const float rheight =(height2 > 1) ? (float)(height1 - 1)/(height2 - 1) : 0.f;
const float rwidth = (width2 > 1) ? (float)(width1 - 1)/(width2 - 1) : 0.f;
for (int h2 = 0; h2 < height2; ++h2) {
const float h1r = rheight * h2;
const int h1 = h1r;
const int h1p = (h1 < height1 - 1) ? 1 : 0;
const real h1lambda = h1r - h1;
const real h0lambda = (real)1. - h1lambda;
for (int w2 = 0; w2 < width2; ++w2) {
const float w1r = rwidth * w2;
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const real w1lambda = w1r - w1;
const real w0lambda = (real)1. - w1lambda;
real* pos1 = &data1[h1 * width1 + w1];
const real* pos2 = &data2[h2 * width2 + w2];
for (int c = 0; c < channels; ++c) {
pos1[0] += h0lambda * w0lambda * pos2[0];
pos1[w1p] += h0lambda * w1lambda * pos2[0];
pos1[h1p * width1] += h1lambda * w0lambda * pos2[0];
pos1[h1p * width1 + w1p] += h1lambda * w1lambda * pos2[0];
pos1 += width1 * height1;
pos2 += width2 * height2;
}
}
}
}
#endif

View File

@ -0,0 +1,143 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialUpSamplingNearest.c"
#else
void THNN_(SpatialUpSamplingNearest_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
int scale_factor)
{
int dW = scale_factor;
int dH = scale_factor;
int xDim = input->nDimension-2;
int yDim = input->nDimension-1;
// dims
int idim = input->nDimension; // Gauranteed to be between 3 and 5
int osz0 = output->size[0];
int osz1 = output->size[1];
int osz2 = output->size[2];
int osz3 = 1;
if (idim > 3) {
osz3 = output->size[3];
}
// get strides
long *is = input->stride;
long *os = output->stride;
// get raw pointers
real *pin = THTensor_(data)(input);
real *pout = THTensor_(data)(output);
// perform the upsampling
int i0, i1, i2, i3, isrc, idst;
int iout[4]; // Output indices
int iin[4]; // Input indices
for (i0 = 0; i0 < osz0; i0++) {
iout[0] = i0;
iin[0] = i0;
for (i1 = 0; i1 < osz1; i1++) {
iout[1] = i1;
iin[1] = i1;
for (i2 = 0; i2 < osz2; i2++) {
iout[2] = i2;
iin[2] = i2;
for (i3 = 0; i3 < osz3; i3++) {
iout[3] = i3;
iin[3] = i3;
// set the indices for the upsampled dimensions
iin[xDim] = iout[xDim] / dW;
iin[yDim] = iout[yDim] / dH;
idst = i0*os[0] + i1*os[1] + i2*os[2];
isrc = iin[0]*is[0] + iin[1]*is[1] + iin[2]*is[2];
if (idim > 3) {
idst += i3*os[3];
isrc += iin[3]*is[3];
}
pout[idst] = pin[isrc];
}
}
}
}
}
void THNN_(SpatialUpSamplingNearest_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
int scale_factor)
{
int dW = scale_factor;
int dH = scale_factor;
int xDim = gradInput->nDimension-2;
int yDim = gradInput->nDimension-1;
// dims
int idim = gradInput->nDimension; // Gauranteed to be between 3 and 5
int isz0 = gradInput->size[0];
int isz1 = gradInput->size[1];
int isz2 = gradInput->size[2];
int isz3 = 1;
if (idim > 3) {
isz3 = gradInput->size[3];
}
// get strides
long *is = gradInput->stride;
long *os = gradOutput->stride;
// get raw pointers
real *pin = THTensor_(data)(gradInput);
real *pout = THTensor_(data)(gradOutput);
// perform the upsampling
int i0, i1, i2, i3, isrc, idst, x, y;
int iin[4]; // Input indices
int iout[4]; // Output indices
THTensor_(zero)(gradInput);
for (i0 = 0; i0 < isz0; i0++) {
iin[0] = i0;
iout[0] = i0;
for (i1 = 0; i1 < isz1; i1++) {
iin[1] = i1;
iout[1] = i1;
for (i2 = 0; i2 < isz2; i2++) {
iin[2] = i2;
iout[2] = i2;
for (i3 = 0; i3 < isz3; i3++) {
iin[3] = i3;
iout[3] = i3;
idst = i0*is[0] + i1*is[1] + i2*is[2];
if (idim > 3) {
idst += i3*is[3];
}
// Now accumulate the gradients from gradOutput
for (y = 0; y < dH; y++) {
for (x = 0; x < dW; x++) {
iout[xDim] = dW * iin[xDim] + x;
iout[yDim] = dH * iin[yDim] + y;
isrc = iout[0]*os[0] + iout[1]*os[1] + iout[2]*os[2];
if (idim > 3) {
isrc += iout[3]*os[3];
}
pin[idst] += pout[isrc];
}
}
}
}
}
}
}
#endif

View File

@ -0,0 +1,50 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/Sqrt.c"
#else
void THNN_(Sqrt_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
real eps)
{
THTensor_(resizeAs)(output, input);
THTensor_(sqrt)(output, input);
}
void THNN_(Sqrt_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *output)
{
THTensor_(resizeAs)(gradInput, input);
if (output->nDimension == 1 ||
!THTensor_(isContiguous)(output) ||
!THTensor_(isContiguous)(gradOutput) ||
!THTensor_(isContiguous)(gradInput))
{
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
*gradInput_data = (*output_data == 0.0) ? 0.0 : (0.5 * (*gradOutput_data / *output_data));
);
}
else
{
real *gradOutput_data = THTensor_(data)(gradOutput);
real *gradInput_data = THTensor_(data)(gradInput);
real *output_data = THTensor_(data)(output);
long i;
#pragma omp parallel for private(i)
for(i = 0; i < THTensor_(nElement)(output); i++)
{
if (output_data[i] == 0.0)
gradInput_data[i] = 0.0;
else
gradInput_data[i] = 0.5 * (gradOutput_data[i] / output_data[i]);
}
}
}
#endif

View File

@ -0,0 +1,58 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/Square.c"
#else
void THNN_(Square_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output)
{
THTensor_(resizeAs)(output, input);
if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output))
{
TH_TENSOR_APPLY2(real, output, real, input,
*output_data = (*input_data) * (*input_data);
);
}
else
{
real *output_data = THTensor_(data)(output);
real *input_data = THTensor_(data)(input);
long i;
#pragma omp parallel for private(i)
for (i = 0; i < THTensor_(nElement)(input); i++)
output_data[i] = input_data[i]*input_data[i];
}
}
void THNN_(Square_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput)
{
THTensor_(resizeAs)(gradInput, input);
if (input->nDimension == 1 ||
!THTensor_(isContiguous)(input) ||
!THTensor_(isContiguous)(gradOutput) ||
!THTensor_(isContiguous)(gradInput))
{
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
*gradInput_data = 2.0 * (*gradOutput_data) * (*input_data);
);
}
else
{
real *gradOutput_data = THTensor_(data)(gradOutput);
real *gradInput_data = THTensor_(data)(gradInput);
real *input_data = THTensor_(data)(input);
long i;
#pragma omp parallel for private(i)
for (i = 0; i < THTensor_(nElement)(gradInput); i++)
gradInput_data[i] = 2.0 * gradOutput_data[i] * input_data[i];
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,49 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/Tanh.c"
#else
void THNN_(Tanh_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output)
{
THTensor_(resizeAs)(output, input);
THTensor_(tanh)(output, input);
}
void THNN_(Tanh_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *output)
{
THTensor_(resizeAs)(gradInput, output);
if (output->nDimension == 1 ||
!THTensor_(isContiguous)(output) ||
!THTensor_(isContiguous)(gradOutput) ||
!THTensor_(isContiguous)(gradInput))
{
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
real z = *output_data; \
*gradInput_data = *gradOutput_data * (1. - z*z);
);
}
else
{
real* ptr_gradOutput = THTensor_(data)(gradOutput);
real* ptr_gradInput = THTensor_(data)(gradInput);
real* ptr_output = THTensor_(data)(output);
long i;
#pragma omp parallel for private(i)
for (i = 0; i < THTensor_(nElement)(gradInput); i++)
{
real z = ptr_output[i];
ptr_gradInput[i] = ptr_gradOutput[i] * (1. - z*z);
}
}
}
#endif

View File

@ -0,0 +1,349 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/TemporalConvolution.c"
#else
void THNN_(TemporalConvolution_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
int kW,
int dW,
int inputFrameSize,
int outputFrameSize)
{
THTensor *outputWindow, *inputWindow;
int nInputFrame, nOutputFrame;
long k, i;
int dimS = 0; // sequence dimension
int dimF = 1; // feature dimension
THArgCheck(input->nDimension == 2 || input->nDimension == 3, 2, "2D or 3D(batch mode) tensor expected");
if (input->nDimension == 3)
{
dimS = 1;
dimF = 2;
}
THArgCheck(input->size[dimF] == inputFrameSize, 2, "invalid input frame size");
THArgCheck(input->size[dimS] >= kW, 2, "input sequence smaller than kernel size");
input = THTensor_(newContiguous)(input);
outputWindow = THTensor_(new)();
inputWindow = THTensor_(new)();
nInputFrame = input->size[dimS];
nOutputFrame = (nInputFrame - kW) / dW + 1;
if (input->nDimension == 2)
{
THTensor_(resize2d)(output,
nOutputFrame,
outputFrameSize);
/* bias first */
for(k = 0; k < nOutputFrame; k++)
{
THTensor_(select)(outputWindow, output, 0, k);
THTensor_(copy)(outputWindow, bias);
}
/* ouch */
for(k = 0; nOutputFrame > 0; k++)
{
long outputFrameStride = (kW-1)/dW+1;
long inputFrameStride = outputFrameStride*dW;
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
nOutputFrame -= nFrame;
THTensor_(setStorage2d)(inputWindow, input->storage,
input->storageOffset+k*dW*input->size[1],
nFrame, inputFrameStride*input->size[1],
kW*input->size[1], 1);
THTensor_(setStorage2d)(outputWindow, output->storage,
output->storageOffset + k*output->size[1],
nFrame, outputFrameStride*output->size[1],
output->size[1], 1);
THTensor_(transpose)(weight, NULL, 0, 1);
THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, weight);
THTensor_(transpose)(weight, NULL, 0, 1);
}
}
else
{
THTensor *outputSample = THTensor_(new)();
THTensor *inputSample = THTensor_(new)();
int nBatchFrame = input->size[0];
THTensor_(resize3d)(output,
nBatchFrame,
nOutputFrame,
outputFrameSize);
for(i = 0; i < nBatchFrame; i++)
{
THTensor_(select)(outputSample, output, 0, i);
THTensor_(select)(inputSample, input, 0, i);
long nOutputSampleFrame = nOutputFrame;
/* bias first */
for(k = 0; k < nOutputFrame; k++)
{
THTensor_(select)(outputWindow, outputSample, 0, k);
THTensor_(copy)(outputWindow, bias);
}
/* ouch */
for(k = 0; nOutputSampleFrame > 0; k++)
{
long outputFrameStride = (kW-1)/dW+1;
long inputFrameStride = outputFrameStride*dW;
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
nOutputSampleFrame -= nFrame;
THTensor_(setStorage2d)(inputWindow, inputSample->storage,
inputSample->storageOffset+k*dW*inputSample->size[1],
nFrame, inputFrameStride*inputSample->size[1],
kW*inputSample->size[1], 1);
THTensor_(setStorage2d)(outputWindow, outputSample->storage,
outputSample->storageOffset + k*outputSample->size[1],
nFrame, outputFrameStride*outputSample->size[1],
outputSample->size[1], 1);
THTensor_(transpose)(weight, NULL, 0, 1);
THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, weight);
THTensor_(transpose)(weight, NULL, 0, 1);
}
}
THTensor_(free)(outputSample);
THTensor_(free)(inputSample);
}
THTensor_(free)(outputWindow);
THTensor_(free)(inputWindow);
THTensor_(free)(input);
}
void THNN_(TemporalConvolution_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
int kW,
int dW)
{
long nInputFrame;
long nOutputFrame;
THTensor *gradOutputWindow;
THTensor *gradInputWindow;
long k, i;
int dimS = 0; // sequence dimension
int dimF = 1; // feature dimension
if (gradOutput->nDimension == 3)
{
dimS = 1;
dimF = 2;
}
nInputFrame = input->size[dimS];
nOutputFrame = gradOutput->size[dimS];
gradOutputWindow = THTensor_(new)();
gradInputWindow = THTensor_(new)();
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
if (gradOutput->nDimension == 2)
{
/* ouch */
for(k = 0; nOutputFrame > 0; k++)
{
long outputFrameStride = (kW-1)/dW+1;
long inputFrameStride = outputFrameStride*dW;
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
nOutputFrame -= nFrame;
THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage,
gradOutput->storageOffset + k*gradOutput->size[1],
nFrame, outputFrameStride*gradOutput->size[1],
gradOutput->size[1], 1);
THTensor_(setStorage2d)(gradInputWindow, gradInput->storage,
gradInput->storageOffset+k*dW*gradInput->size[1],
nFrame, inputFrameStride*gradInput->size[1],
kW*gradInput->size[1], 1);
THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight);
}
}
else
{
THTensor *gradOutputSample = THTensor_(new)();
THTensor *gradInputSample = THTensor_(new)();
int nBatchFrame = input->size[0];
for(i = 0; i < nBatchFrame; i++)
{
THTensor_(select)(gradOutputSample, gradOutput, 0, i);
THTensor_(select)(gradInputSample, gradInput, 0, i);
int nOutputSampleFrame = nOutputFrame;
/* ouch */
for(k = 0; nOutputSampleFrame > 0; k++)
{
long outputFrameStride = (kW-1)/dW+1;
long inputFrameStride = outputFrameStride*dW;
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
nOutputSampleFrame -= nFrame;
THTensor_(setStorage2d)(gradOutputWindow, gradOutputSample->storage,
gradOutputSample->storageOffset + k*gradOutputSample->size[1],
nFrame, outputFrameStride*gradOutputSample->size[1],
gradOutputSample->size[1], 1);
THTensor_(setStorage2d)(gradInputWindow, gradInputSample->storage,
gradInputSample->storageOffset+k*dW*gradInputSample->size[1],
nFrame, inputFrameStride*gradInputSample->size[1],
kW*gradInputSample->size[1], 1);
THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight);
}
}
THTensor_(free)(gradOutputSample);
THTensor_(free)(gradInputSample);
}
THTensor_(free)(gradOutputWindow);
THTensor_(free)(gradInputWindow);
}
void THNN_(TemporalConvolution_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
int kW,
int dW,
real scale)
{
long nInputFrame;
long nOutputFrame;
THTensor *gradOutputWindow;
THTensor *inputWindow;
long k, i;
int dimS = 0; // sequence dimension
int dimF = 1; // feature dimension
if (gradOutput->nDimension == 3)
{
dimS = 1;
dimF = 2;
}
nInputFrame = input->size[dimS];
nOutputFrame = gradOutput->size[dimS];
input = THTensor_(newContiguous)(input);
gradOutputWindow = THTensor_(new)();
inputWindow = THTensor_(new)();
if (input->nDimension == 2)
{
/* bias first */
for(k = 0; k < nOutputFrame; k++)
{
THTensor_(select)(gradOutputWindow, gradOutput, 0, k);
THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow);
}
/* ouch */
for(k = 0; nOutputFrame > 0; k++)
{
long outputFrameStride = (kW-1)/dW+1;
long inputFrameStride = outputFrameStride*dW;
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
nOutputFrame -= nFrame;
THTensor_(setStorage2d)(inputWindow, input->storage,
input->storageOffset+k*dW*input->size[1],
nFrame, inputFrameStride*input->size[1],
kW*input->size[1], 1);
THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage,
gradOutput->storageOffset + k*gradOutput->size[1],
nFrame, outputFrameStride*gradOutput->size[1],
gradOutput->size[1], 1);
THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutputWindow, inputWindow);
THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
}
}
else
{
THTensor *gradOutputSample = THTensor_(new)();
THTensor *inputSample = THTensor_(new)();
int nBatchFrame = input->size[0];
for(i = 0; i < nBatchFrame; i++)
{
THTensor_(select)(gradOutputSample, gradOutput, 0, i);
THTensor_(select)(inputSample, input, 0, i);
int nOutputSampleFrame = nOutputFrame;
/* bias first */
for(k = 0; k < nOutputFrame; k++)
{
THTensor_(select)(gradOutputWindow, gradOutputSample, 0, k);
THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow);
}
/* ouch */
for(k = 0; nOutputSampleFrame > 0; k++)
{
long outputFrameStride = (kW-1)/dW+1;
long inputFrameStride = outputFrameStride*dW;
long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
nOutputSampleFrame -= nFrame;
THTensor_(setStorage2d)(inputWindow, inputSample->storage,
inputSample->storageOffset+k*dW*inputSample->size[1],
nFrame, inputFrameStride*inputSample->size[1],
kW*inputSample->size[1], 1);
THTensor_(setStorage2d)(gradOutputWindow, gradOutputSample->storage,
gradOutputSample->storageOffset + k*gradOutputSample->size[1],
nFrame, outputFrameStride*gradOutputSample->size[1],
gradOutputSample->size[1], 1);
THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutputWindow, inputWindow);
THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
}
}
THTensor_(free)(gradOutputSample);
THTensor_(free)(inputSample);
}
THTensor_(free)(gradOutputWindow);
THTensor_(free)(inputWindow);
THTensor_(free)(input);
}
#endif

View File

@ -0,0 +1,235 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/TemporalMaxPooling.c"
#else
void THNN_(TemporalMaxPooling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *indices,
int kW,
int dW)
{
long niframe;
long framesize;
long noframe;
real *input_data;
real *output_data;
real *indices_data;
long t, y;
int dimS = 0; // sequence dimension
int dimF = 1; // feature dimension
THArgCheck(input->nDimension == 2 || input->nDimension == 3, 2, "2D or 3D(batch mode) tensor expected");
if (input->nDimension == 3)
{
dimS = 1;
dimF = 2;
}
THArgCheck(input->size[dimS] >= kW, 2, "input sequence smaller than kernel size");
/* sizes */
niframe = input->size[dimS];
framesize = input->size[dimF];
noframe = (niframe - kW) / dW + 1;
/* get contiguous input */
input = THTensor_(newContiguous)(input);
if (input->nDimension == 2)
{
/* resize output */
THTensor_(resize2d)(output, noframe, framesize);
/* indices will contain index locations for each output point */
THTensor_(resize2d)(indices, noframe, framesize);
/* get raw pointers */
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
for(t = 0; t < noframe; t++)
{
real *ip = input_data + t*framesize*dW;
real *op = output_data + t*framesize;
real *xp = indices_data + t*framesize;
#pragma omp parallel for private(y)
for(y = 0; y < framesize; y++)
{
/* compute local max: */
long maxindex = -1;
real maxval = -THInf;
long x;
for(x = 0; x < kW; x++)
{
real val = ip[x*framesize+y];
if (val > maxval)
{
maxval = val;
maxindex = x;
}
}
/* set output to local max */
op[y] = maxval;
xp[y] = (real)maxindex;
}
}
}
else
{
/* number of batch frames */
long nbframe = input->size[0];
long i;
/* resize output */
THTensor_(resize3d)(output, nbframe, noframe, framesize);
/* indices will contain index locations for each output point */
THTensor_(resize3d)(indices, nbframe, noframe, framesize);
/* get raw pointers */
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
for(i = 0; i < nbframe; i++)
{
real *inputSample_data = input_data + i*niframe*framesize;
real *outputSample_data = output_data + i*noframe*framesize;
real *indicesSample_data = indices_data + i*noframe*framesize;
for(t = 0; t < noframe; t++)
{
real *ip = inputSample_data + t*framesize*dW;
real *op = outputSample_data + t*framesize;
real *xp = indicesSample_data + t*framesize;
#pragma omp parallel for private(y)
for(y = 0; y < framesize; y++)
{
/* compute local max: */
long maxindex = -1;
real maxval = -THInf;
long x;
for(x = 0; x < kW; x++)
{
real val = ip[x*framesize+y];
if (val > maxval)
{
maxval = val;
maxindex = x;
}
}
/* set output to local max */
op[y] = maxval;
xp[y] = (real)maxindex;
}
}
}
}
/* cleanup */
THTensor_(free)(input);
}
void THNN_(TemporalMaxPooling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *indices,
int kW,
int dW)
{
long niframe;
int noframe;
long framesize;
real *gradInput_data;
real *gradOutput_data;
real *indices_data;
long t, y;
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
/* resize and zero */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
int dimS = 0; // sequence dimension
int dimF = 1; // feature dimension
if (input->nDimension == 3)
{
dimS = 1;
dimF = 2;
}
/* sizes */
niframe = input->size[dimS];
noframe = gradOutput->size[dimS];
framesize = gradOutput->size[dimF];
/* get raw pointers */
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
indices_data = THTensor_(data)(indices);
if (input->nDimension == 2)
{
for(t = 0; t < noframe; t++)
{
real *gip = gradInput_data + t*framesize*dW;
real *gop = gradOutput_data + t*framesize;
real *xp = indices_data + t*framesize;
#pragma omp parallel for private(y)
for(y = 0; y < framesize; y++)
{
/* compute local max: */
long maxindex = (long)xp[y];
gip[maxindex*framesize+y] += gop[y];
}
}
}
else
{
/* number of batch frames */
long nbframe = input->size[0];
long i;
for(i = 0; i < nbframe; i++)
{
real *gradInputSample_data = gradInput_data + i*niframe*framesize;
real *gradOutputSample_data = gradOutput_data + i*noframe*framesize;
real *indicesSample_data = indices_data + i*noframe*framesize;
for(t = 0; t < noframe; t++)
{
real *gip = gradInputSample_data + t*framesize*dW;
real *gop = gradOutputSample_data + t*framesize;
real *xp = indicesSample_data + t*framesize;
#pragma omp parallel for private(y)
for(y = 0; y < framesize; y++)
{
/* compute local max: */
long maxindex = (long)xp[y];
gip[maxindex*framesize+y] += gop[y];
}
}
}
}
/* cleanup */
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,116 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/TemporalSubSampling.c"
#else
void THNN_(TemporalSubSampling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
int kW,
int dW,
int inputFrameSize)
{
THTensor *outputFrame, *inputWindow;
int nInputFrame, nOutputFrame;
long k;
THArgCheck( input->nDimension == 2, 2, "2D tensor expected");
THArgCheck( input->size[1] == inputFrameSize, 2, "invalid input frame size");
THArgCheck( input->size[0] >= kW, 2, "input sequence smaller than kernel size");
outputFrame = THTensor_(new)();
inputWindow = THTensor_(new)();
nInputFrame = input->size[0];
nOutputFrame = (nInputFrame - kW) / dW + 1;
THTensor_(resize2d)(output,
nOutputFrame,
inputFrameSize);
for(k = 0; k < nOutputFrame; k++)
{
THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
THTensor_(select)(outputFrame, output, 0, k);
THTensor_(sum)(outputFrame, inputWindow, 0);
THTensor_(cmul)(outputFrame, outputFrame, weight);
THTensor_(cadd)(outputFrame, outputFrame, 1, bias);
}
THTensor_(free)(outputFrame);
THTensor_(free)(inputWindow);
}
void THNN_(TemporalSubSampling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
int kW,
int dW)
{
THTensor *gradOutputFrame;
THTensor *gradInputWindow, *buffer, *kwunit;
long k;
gradOutputFrame = THTensor_(new)();
gradInputWindow = THTensor_(new)();
buffer = THTensor_(new)();
kwunit = THTensor_(newWithSize1d)(kW);
THTensor_(fill)(kwunit, 1);
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
for(k = 0; k < gradOutput->size[0]; k++)
{
THTensor_(narrow)(gradInputWindow, gradInput, 0, k*dW, kW);
THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
THTensor_(cmul)(buffer, weight, gradOutputFrame);
THTensor_(addr)(gradInputWindow, 1, gradInputWindow, 1, kwunit, buffer);
}
THTensor_(free)(gradOutputFrame);
THTensor_(free)(gradInputWindow);
THTensor_(free)(buffer);
THTensor_(free)(kwunit);
}
void THNN_(TemporalSubSampling_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
int kW,
int dW,
real scale)
{
THTensor *gradOutputFrame;
THTensor *inputWindow, *buffer;
long k;
gradOutputFrame = THTensor_(new)();
inputWindow = THTensor_(new)();
buffer = THTensor_(new)();
for(k = 0; k < gradOutput->size[0]; k++)
{
THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
THTensor_(sum)(buffer, inputWindow, 0);
THTensor_(addcmul)(gradWeight, gradWeight, scale, buffer, gradOutputFrame);
THTensor_(cadd)(gradBias, gradBias, scale, gradOutputFrame);
}
THTensor_(free)(gradOutputFrame);
THTensor_(free)(inputWindow);
THTensor_(free)(buffer);
}
#endif

View File

@ -0,0 +1,58 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/Threshold.c"
#else
void THNN_(Threshold_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
real threshold,
real val,
bool inplace)
{
if (inplace)
{
TH_TENSOR_APPLY(real, input,
if (*input_data <= threshold)
*input_data = val;
);
THTensor_(set)(output, input);
}
else
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, output, real, input,
*output_data = (*input_data > threshold) ? *input_data : val;
);
}
}
void THNN_(Threshold_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
real threshold,
bool inplace)
{
if (inplace)
{
TH_TENSOR_APPLY2(real, gradOutput, real, input,
if ((*input_data) <= threshold)
*gradOutput_data = 0;
);
THTensor_(set)(gradInput, gradOutput);
}
else
{
THTensor_(resizeAs)(gradInput, input);
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
if ((*input_data) > threshold)
*gradInput_data = *gradOutput_data;
else
*gradInput_data = 0;
);
}
}
#endif

View File

@ -0,0 +1,309 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/VolumetricAveragePooling.c"
#else
static void THNN_(VolumetricAveragePooling_updateOutput_frame)(
real *input_p,
real *output_p,
long nslices,
long itime,
long iwidth,
long iheight,
long otime,
long owidth,
long oheight,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
/* loop over output */
long i, j, ti;
for (ti = 0; ti < otime; ti++)
{
for (i = 0; i < oheight; i++)
{
for (j = 0; j < owidth; j++)
{
/* local pointers */
real *ip = input_p + k * itime * iwidth * iheight
+ ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
real *op = output_p + k * otime * owidth * oheight
+ ti * owidth * oheight + i * owidth + j;
/* compute local sum: */
real sum = 0.0;
int x, y, z;
for (z=0; z < kT; z++)
{
for (y = 0; y < kH; y++)
{
for (x = 0; x < kW; x++)
{
sum += *(ip + z * iwidth * iheight + y * iwidth + x);
}
}
}
/* set output to local max */
*op = sum / (kT * kW * kH);
}
}
}
}
}
void THNN_(VolumetricAveragePooling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH)
{
long nslices;
long itime;
long iheight;
long iwidth;
long otime;
long oheight;
long owidth;
real *input_data;
real *output_data;
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
"4D or 5D (batch-mode) tensor expected"
);
int dimN = 0;
int dimt = 1;
int dimh = 2;
int dimw = 3;
if (input->nDimension == 5)
{
dimN++;
dimt++;
dimh++;
dimw++;
}
THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2,
"input image smaller than kernel size"
);
/* sizes */
nslices = input->size[dimN];
itime = input->size[dimt];
iheight = input->size[dimh];
iwidth = input->size[dimw];
otime = (itime - kT) / dT + 1;
oheight = (iheight - kH) / dH + 1;
owidth = (iwidth - kW) / dW + 1;
/* get contiguous input */
input = THTensor_(newContiguous)(input);
if (input->nDimension == 4) /* non-batch mode */
{
/* resize output */
THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
THNN_(VolumetricAveragePooling_updateOutput_frame)(
input_data, output_data, nslices,
itime, iwidth, iheight,
otime, owidth, oheight,
kT, kW, kH,
dT, dW, dH
);
}
else /* batch mode */
{
long p;
long nBatch = input->size[0];
long istride = nslices * itime * iwidth * iheight;
long ostride = nslices * otime * owidth * oheight;
/* resize output */
THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
#pragma omp parallel for private(p)
for (p=0; p < nBatch; p++)
{
THNN_(VolumetricAveragePooling_updateOutput_frame)(
input_data + p * istride, output_data + p * ostride, nslices,
itime, iwidth, iheight,
otime, owidth, oheight,
kT, kW, kH,
dT, dW, dH
);
}
}
/* cleanup */
THTensor_(free)(input);
}
static void THNN_(VolumetricAveragePooling_updateGradInput_frame)(
real *gradInput_p,
real *gradOutput_p,
long nslices,
long itime,
long iwidth,
long iheight,
long otime,
long owidth,
long oheight,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
/* loop over output */
long i, j, ti;
for (ti = 0; ti < otime; ti++)
{
for (i = 0; i < oheight; i++)
{
for (j = 0; j < owidth; j++)
{
/* local pointers */
real *ip = gradInput_p + k * itime * iwidth * iheight
+ ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
real *op = gradOutput_p + k * otime * owidth * oheight
+ ti * owidth * oheight + i * owidth + j;
/* scatter gradients out to footprint: */
real val = *op / (kT * kW * kH);
int x,y,z;
for (z=0; z < kT; z++)
{
for (y = 0; y < kH; y++)
{
for (x = 0; x < kW; x++)
{
*(ip + z * iwidth * iheight + y * iwidth + x) += val;
}
}
}
}
}
}
}
}
void THNN_(VolumetricAveragePooling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH)
{
int nslices;
int itime;
int iheight;
int iwidth;
int otime;
int oheight;
int owidth;
real *gradInput_data;
real *gradOutput_data;
int dimN = 0;
int dimt = 1;
int dimh = 2;
int dimw = 3;
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
if (input->nDimension == 5)
{
dimN++;
dimt++;
dimh++;
dimw++;
}
/* sizes */
nslices = input->size[dimN];
itime = input->size[dimt];
iheight = input->size[dimh];
iwidth = input->size[dimw];
otime = gradOutput->size[dimt];
oheight = gradOutput->size[dimh];
owidth = gradOutput->size[dimw];
/* get raw pointers */
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
/* backprop */
if (input->nDimension == 4) /* non-batch mode*/
{
THNN_(VolumetricAveragePooling_updateGradInput_frame)(
gradInput_data, gradOutput_data, nslices,
itime, iwidth, iheight,
otime, owidth, oheight,
kT, kW, kH,
dT, dW, dH
);
}
else /* batch mode */
{
long p;
long nBatch = input->size[0];
long istride = nslices * itime * iwidth * iheight;
long ostride = nslices * otime * owidth * oheight;
#pragma omp parallel for private(p)
for (p = 0; p < nBatch; p++)
{
THNN_(VolumetricAveragePooling_updateGradInput_frame)(
gradInput_data + p * istride, gradOutput_data + p * ostride, nslices,
itime, iwidth, iheight,
otime, owidth, oheight,
kT, kW, kH,
dT, dW, dH
);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,247 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/VolumetricConvolution.c"
#else
void THNN_(VolumetricConvolution_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
THTensor *finput, // only used by cuda impl
THTensor *fgradInput, // only used by cuda impl
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
"4D or 5D (batch-mode) tensor expected"
);
int dimt = 1;
int dimh = 2;
int dimw = 3;
if (input->nDimension == 5)
{
dimt++;
dimh++;
dimw++;
}
long nOutputPlane = weight->size[0];
long kT = weight->size[2];
long kH = weight->size[3];
long kW = weight->size[4];
long inputDepth = input->size[dimt];
long inputHeight = input->size[dimh];
long inputWidth = input->size[dimw];
long outputDepth = (inputDepth - kT) / dT + 1;
long outputWidth = (inputWidth - kW) / dW + 1;
long outputHeight = (inputHeight - kH) / dH + 1;
THTensor *outn = THTensor_(new)();
long i, j;
if (input->nDimension == 4) /* non-batch mode */
{
THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
/* add bias */
for (i = 0; i < bias->size[0]; i++)
{
THTensor_(select)(outn, output, 0, i);
THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
}
/* do convolutions */
THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X");
}
else /* batch mode */
{
long nBatch = input->size[0];
THTensor_(resize5d)(output, nBatch, nOutputPlane, outputDepth, outputHeight, outputWidth);
THTensor *inb = THTensor_(new)();
THTensor *outb = THTensor_(new)();
/* loop over batches */
for (j = 0; j < nBatch; j++)
{
THTensor_(select)(inb, input, 0, j);
THTensor_(select)(outb, output, 0, j);
/* add bias */
for (i = 0; i < bias->size[0]; i++)
{
THTensor_(select)(outn, outb, 0, i);
THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
}
/* do convolutions */
THTensor_(conv3Dmv)(outb, 1.0, 1.0, inb, weight, dT, dH, dW, "V", "X");
}
THTensor_(free)(inb);
THTensor_(free)(outb);
}
THTensor_(free)(outn);
}
void THNN_(VolumetricConvolution_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *finput, // only used by cuda impl
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version
THArgCheck(weight->nDimension == 5, 4,
"5D weight tensor is expected (nOutputPlane x nInputPlane x kT x kH x kW)"
);
int nOutputPlane = (int)weight->size[0];
THArgCheck(gradOutput->nDimension == 4 || gradOutput->nDimension == 5, 3,
"4D or 5D (batch-mode) tensor expected"
);
int dimPlane = 0;
if (gradOutput->nDimension == 5)
{
dimPlane++;
}
THArgCheck(nOutputPlane == gradOutput->size[dimPlane], 1,
"Number of output features is not equal to nOutputPlane"
);
/* gradient to input */
THTensor *tweight = THTensor_(newTranspose)(weight, 0, 1);
if (gradOutput->nDimension == 4) /* non-batch mode */
{
THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C");
}
else /* batch mode */
{
long nBatch = gradOutput->size[0];
THTensor *ginpb = THTensor_(new)();
THTensor *goutb = THTensor_(new)();
long j;
THTensor_(resize5d)(gradInput,
input->size[0], input->size[1], input->size[2], input->size[3], input->size[4]
);
/* loop over batches */
for (j = 0; j < nBatch; j++)
{
THTensor_(select)(ginpb, gradInput, 0, j);
THTensor_(select)(goutb, gradOutput, 0, j);
THTensor_(conv3Dmv)(ginpb, 0.0, 1.0, goutb, tweight, dT, dH, dW, "F", "C");
}
THTensor_(free)(ginpb);
THTensor_(free)(goutb);
}
THTensor_(free)(tweight);
}
void THNN_(VolumetricConvolution_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput, // only used by cuda impl
THTensor *fgradInput, // only used by cuda impl
int dT,
int dW,
int dH,
int pT,
int pW,
int pH,
real scale)
{
THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version
THArgCheck(gradWeight->nDimension == 5, 4,
"5D gradWeight tensor is expected (nOutputPlane x nInputPlane x kT x kH x kW)"
);
int nOutputPlane = (int)gradWeight->size[0];
THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5,
"gradBias tensor has wrong size"
);
long k;
real *gradBias_data;
THTensor *gradOutSlice;
int dimPlane = 0;
if (gradOutput->nDimension == 5)
{
dimPlane++;
}
THArgCheck(nOutputPlane == gradOutput->size[dimPlane], 1,
"Number of output features is not equal to nOutputPlane"
);
if (gradOutput->nDimension == 4) /* non-batch mode */
{
/* gradient to bias */
gradBias_data = THTensor_(data)(gradBias);
gradOutSlice = THTensor_(new)();
for (k = 0; k < nOutputPlane; k++)
{
THTensor_(select)(gradOutSlice, gradOutput, 0, k);
gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice);
}
THTensor_(free)(gradOutSlice);
/* gradient to kernels */
THTensor_(conv3DRevger)(gradWeight, 1.0, scale, input, gradOutput, dT, dH, dW);
}
else /* batch mode */
{
long nBatch = gradOutput->size[0];
THTensor *inpb = THTensor_(new)();
THTensor *goutb = THTensor_(new)();
long j;
/* loop over batches */
for (j = 0; j < nBatch; j++)
{
THTensor_(select)(inpb, input, 0, j);
THTensor_(select)(goutb, gradOutput, 0, j);
/* gradient to bias */
gradBias_data = THTensor_(data)(gradBias);
gradOutSlice = THTensor_(new)();
for (k = 0; k < nOutputPlane; k++)
{
THTensor_(select)(gradOutSlice, goutb, 0, k);
gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice);
}
THTensor_(free)(gradOutSlice);
/* gradient to kernels */
THTensor_(conv3DRevger)(gradWeight, 1.0, scale, inpb, goutb, dT, dH, dW);
}
THTensor_(free)(inpb);
THTensor_(free)(goutb);
}
}
#endif

View File

@ -0,0 +1,518 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/VolumetricConvolutionMM.c"
#else
/* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */
static void THNN_(unfolded_acc_vol)(
THTensor *finput,
THTensor *input,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH,
int nInputPlane,
int inputDepth,
int inputWidth,
int inputHeight,
int outputDepth,
int outputWidth,
int outputHeight)
{
int nip;
real *input_data = THTensor_(data)(input);
real *finput_data = THTensor_(data)(finput);
//#pragma omp parallel for private(nip)
for (nip = 0; nip < nInputPlane; nip++)
{
int kt, kw, kh, t, y, x, it, ix, iy;
for (kt = 0; kt < kT; kt++)
{
for (kh = 0; kh < kH; kh++)
{
for (kw = 0; kw < kW; kw++)
{
real *src = finput_data
+ nip * (kT*kH*kW*outputDepth*outputHeight*outputWidth)
+ kt * (kH*kW*outputDepth*outputHeight*outputWidth)
+ kh * (kW*outputDepth*outputHeight*outputWidth)
+ kw * (outputDepth*outputHeight*outputWidth);
real *dst = input_data + nip*(inputDepth*inputHeight*inputWidth);
if (pT > 0 || pH > 0 || pW > 0)
{
for (t = 0; t < outputDepth; t++)
{
it = t*dT - pT + kt;
for (y = 0; y < outputHeight; y++)
{
iy = y*dH - pH + kh;
for (x = 0; x < outputWidth; x++)
{
ix = x*dW - pW + kw;
if (it < 0 || it >= inputDepth || iy < 0 || iy >= inputHeight || ix < 0 || ix >= inputWidth)
{
}
else
{
THVector_(add)(dst+it*inputHeight*inputWidth+iy*inputWidth+ix, src+t*outputHeight*outputWidth+y*outputWidth+x, 1, 1);
}
}
}
}
}
else
{
for (t = 0; t < outputDepth; t++)
{
it = t*dT + kt;
for (y = 0; y < outputHeight; y++)
{
iy = y*dH + kh;
for(x = 0; x < outputWidth; x++)
{
ix = x*dW + kw;
THVector_(add)(dst+it*inputHeight*inputWidth+iy*inputWidth+ix, src+t*outputHeight*outputWidth+y*outputWidth+x, 1, 1);
}
}
}
}
}
}
}
}
}
static void THNN_(unfolded_copy_vol)(
THTensor *finput,
THTensor *input,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH,
int nInputPlane,
int inputDepth,
int inputWidth,
int inputHeight,
int outputDepth,
int outputWidth,
int outputHeight)
{
long k;
real *input_data = THTensor_(data)(input);
real *finput_data = THTensor_(data)(finput);
// #pragma omp parallel for private(k)
for (k = 0; k < nInputPlane*kT*kH*kW; k++)
{
int nip = k / (kT*kH*kW);
int rest = k % (kT*kH*kW);
int kt = rest / (kH*kW);
rest = rest % (kH*kW);
int kh = rest / kW;
int kw = rest % kW;
int t,x,y,it,ix,iy;
real *dst = finput_data
+ nip * (kT*kH*kW*outputDepth*outputHeight*outputWidth)
+ kt * (kH*kW*outputDepth*outputHeight*outputWidth)
+ kh * (kW*outputDepth*outputHeight*outputWidth)
+ kw * (outputDepth*outputHeight*outputWidth);
real *src = input_data + nip*(inputDepth*inputHeight*inputWidth);
if (pT > 0 || pH > 0 || pW > 0)
{
for (t = 0; t < outputDepth; t++)
{
it = t*dT - pT + kt;
for (y = 0; y < outputHeight; y++)
{
iy = y*dH - pH + kh;
for (x = 0; x < outputWidth; x++)
{
ix = x*dW - pW + kw;
if (it < 0 || it >= inputDepth || iy < 0 || iy >= inputHeight || ix < 0 || ix >= inputWidth)
memset(dst+t*outputHeight*outputWidth+y*outputWidth+x, 0, sizeof(real)*(1));
else
memcpy(dst+t*outputHeight*outputWidth+y*outputWidth+x, src+it*inputHeight*inputWidth+iy*inputWidth+ix, sizeof(real)*(1));
}
}
}
}
else
{
for (t = 0; t < outputDepth; t++)
{
it = t*dT + kt;
for (y = 0; y < outputHeight; y++)
{
iy = y*dH + kh;
for(x = 0; x < outputWidth; x++)
{
ix = x*dW + kw;
memcpy(dst+t*outputHeight*outputWidth+y*outputWidth+x, src+it*inputHeight*inputWidth+iy*inputWidth+ix, sizeof(real)*(1));
}
}
}
}
}
}
static void THNN_(VolumetricConvolutionMM_updateOutput_frame)(
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
THTensor *finput,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH,
long nInputPlane,
long inputDepth,
long inputWidth,
long inputHeight,
long nOutputPlane,
long outputDepth,
long outputWidth,
long outputHeight)
{
long i;
THTensor *output2d;
THNN_(unfolded_copy_vol)(
finput, input,
kT, kW, kH,
dT, dW, dH,
pT, pW, pH,
nInputPlane,
inputDepth, inputWidth, inputHeight,
outputDepth, outputWidth, outputHeight
);
output2d = THTensor_(newWithStorage2d)(
output->storage, output->storageOffset, nOutputPlane, -1,
outputDepth*outputHeight*outputWidth, -1
);
for (i = 0; i < nOutputPlane; i++)
{
THVector_(fill)(
output->storage->data+output->storageOffset+output->stride[0]*i,
THTensor_(get1d)(bias, i),
outputDepth*outputHeight*outputWidth
);
}
THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput);
THTensor_(free)(output2d);
}
void THNN_(VolumetricConvolutionMM_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
THTensor *finput,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
int dimf = 0;
int dimt = 1;
int dimh = 2;
int dimw = 3;
long nInputPlane;
long inputDepth;
long inputHeight;
long inputWidth;
long nOutputPlane;
long outputDepth;
long outputHeight;
long outputWidth;
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
"4D or 5D(batch mode) tensor expected"
);
if (input->nDimension == 5)
{
dimf++;
dimt++;
dimh++;
dimw++;
}
nInputPlane = input->size[dimf];
inputDepth = input->size[dimt];
inputHeight = input->size[dimh];
inputWidth = input->size[dimw];
nOutputPlane = weight->size[0];
outputDepth = (inputDepth + 2*pT - kT) / dT + 1;
outputHeight = (inputHeight + 2*pH - kH) / dH + 1;
outputWidth = (inputWidth + 2*pW - kW) / dW + 1;
if (outputWidth < 1 || outputHeight < 1)
{
THError(
"Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
nInputPlane, inputDepth, inputHeight, inputWidth,
nOutputPlane, outputDepth, outputHeight, outputWidth
);
}
if (input->nDimension == 4)
{
THTensor_(resize2d)(finput, kT*kW*kH*nInputPlane, outputDepth*outputHeight*outputWidth);
THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
THNN_(VolumetricConvolutionMM_updateOutput_frame)(
input, output, weight, bias, finput,
kT, kW, kH,
dT, dW, dH,
pT, pW, pH,
nInputPlane, inputDepth, inputWidth, inputHeight,
nOutputPlane, outputDepth, outputWidth, outputHeight
);
}
else
{
long T = input->size[0];
long t;
THTensor_(resize3d)(finput, T, kT*kW*kH*nInputPlane, outputDepth*outputHeight*outputWidth);
THTensor_(resize5d)(output, T, nOutputPlane, outputDepth, outputHeight, outputWidth);
// #pragma omp parallel for private(t)
for (t = 0; t < T; t++)
{
THTensor *input_t = THTensor_(newSelect)(input, 0, t);
THTensor *output_t = THTensor_(newSelect)(output, 0, t);
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
THNN_(VolumetricConvolutionMM_updateOutput_frame)(
input_t, output_t, weight, bias, finput_t,
kT, kW, kH,
dT, dW, dH,
pT, pW, pH,
nInputPlane, inputDepth, inputWidth, inputHeight,
nOutputPlane, outputDepth, outputWidth, outputHeight
);
THTensor_(free)(input_t);
THTensor_(free)(output_t);
THTensor_(free)(finput_t);
}
}
}
static void THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
THTensor *gradInput,
THTensor *gradOutput,
THTensor *weight,
THTensor *fgradInput,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(
gradOutput->storage, gradOutput->storageOffset,
gradOutput->size[0], -1,
gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1
);
THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
THTensor_(free)(gradOutput2d);
THTensor_(zero)(gradInput);
THNN_(unfolded_acc_vol)(
fgradInput, gradInput,
kT, kW, kH,
dT, dW, dH,
pT, pW, pH,
gradInput->size[0], gradInput->size[1], gradInput->size[3], gradInput->size[2],
gradOutput->size[1], gradOutput->size[3], gradOutput->size[2]
);
}
void THNN_(VolumetricConvolutionMM_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *finput,
THTensor *fgradInput,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
// number of input/output planes and kernel size is indirectly defined by the weight tensor
THArgCheck(weight->nDimension == 2, 4,
"2D weight tensor is expected (nOutputPlane x (nInputPlane * kT * kH * kW))"
);
int nOutputPlane = (int)weight->size[0];
THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 5 ? 1 : 0], 1,
"Number of output features is not equal to nOutputPlane"
);
THTensor_(resizeAs)(gradInput, input);
THTensor_(resizeAs)(fgradInput, finput);
// depending on the BLAS library, fgradInput (result tensor) might
// be left uninitialized on zero alpha, which might lead to weird behavior
// hence, to be safe, zero it
THTensor_(zero)(fgradInput);
THTensor_(transpose)(weight, weight, 0, 1);
if (input->nDimension == 4)
{
THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
gradInput, gradOutput, weight, fgradInput,
kT, kW, kH,
dT, dW, dH,
pT, pW, pH
);
}
else
{
long T = input->size[0];
long t;
//#pragma omp parallel for private(t)
for (t = 0; t < T; t++)
{
THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
gradInput_t, gradOutput_t, weight, fgradInput_t,
kT, kW, kH,
dT, dW, dH,
pT, pW, pH
);
THTensor_(free)(gradInput_t);
THTensor_(free)(gradOutput_t);
THTensor_(free)(fgradInput_t);
}
}
THTensor_(transpose)(weight, weight, 0, 1);
}
static void THNN_(VolumetricConvolutionMM_accGradParameters_frame)(
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput,
real scale)
{
long i;
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(
gradOutput->storage, gradOutput->storageOffset,
gradOutput->size[0], -1,
gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1
);
THTensor_(transpose)(finput, finput, 0, 1);
THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput);
THTensor_(transpose)(finput, finput, 0, 1);
for (i = 0; i < gradBias->size[0]; i++)
{
long k;
real sum = 0;
real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0];
for (k = 0; k < gradOutput2d->size[1]; k++)
sum += data[k];
(gradBias->storage->data + gradBias->storageOffset)[i] += scale * sum;
}
THTensor_(free)(gradOutput2d);
}
void THNN_(VolumetricConvolutionMM_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput,
real scale)
{
THArgCheck(gradWeight->nDimension == 2, 4,
"2D gradWeight tensor is expected (nOutputPlane x (nInputPlane * kT * kH * kW))"
);
int nOutputPlane = (int)gradWeight->size[0];
THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5,
"gradBias tensor has wrong size"
);
THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 5 ? 1 : 0], 3,
"Number of output features is not equal to nOutputPlane"
);
if (input->nDimension == 4) // non-batch mode
{
THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
}
else // batch mode
{
long T = input->size[0];
long t;
for (t = 0; t < T; t++)
{
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);
THTensor_(free)(gradOutput_t);
THTensor_(free)(finput_t);
}
}
}
#endif

View File

@ -0,0 +1,356 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/VolumetricDilatedConvolution.c"
#else
void THNN_(VolumetricDilatedConvolution_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
THTensor *columns,
THTensor *ones,
int kT, int kW, int kH,
int dT, int dW, int dH,
int padT, int padW, int padH,
int dilationT, int dilationW, int dilationH)
{
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch mode) tensor is expected, but got: %d", input->nDimension);
THArgCheck(weight->nDimension == 5, 4, "weight tensor must be 5D (nOutputPlane,nInputPlane,kT,kH,kW)");
THArgCheck(!bias || weight->size[0] == bias->size[0], 4, "nOutputPlane mismatch in weight and bias");
THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
THArgCheck(dT > 0 && dW > 0 && dH > 0, 10, "stride should be greater than zero");
// Params:
int nInputPlane = weight->size[1];
int nOutputPlane = weight->size[0];
int batch = 1;
if (input->nDimension == 4) {
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match. Expected: %d, got %d", nInputPlane, input->size[0]);
// Force batch
batch = 0;
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
} else {
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match. Expected: %d, got %d", nInputPlane, input->size[1]);
}
long inputDepth = input->size[2];
long inputHeight = input->size[3];
long inputWidth = input->size[4];
long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1)
THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth);
// Batch size + input planes
long batchSize = input->size[0];
// Resize output
THTensor_(resize5d)(output, batchSize, nOutputPlane, outputDepth, outputHeight, outputWidth);
THTensor_(zero)(output);
// Resize temporary columns
THTensor_(resize2d)(columns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
// Define a buffer of ones, for bias accumulation
// Note: this buffer can be shared with other modules, it only ever gets increased,
// and always contains ones.
if (ones->nDimension != 3 ||
ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) {
// Resize plane and fill with ones...
THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
}
// Helpers
THTensor *input_n = THTensor_(new)();
THTensor *output_n = THTensor_(new)();
// For each elt in batch, do:
for (int elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per output:
THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(output_n, output, 0, elt);
// Do Bias first:
// M,N,K are dims of matrix A and B
long m_ = nOutputPlane;
long n_ = outputDepth * outputHeight * outputWidth;
long k_ = 1;
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
if (bias) {
THBlas_(gemm)(
't', 'n',
n_, m_, k_,
1,
THTensor_(data)(ones), k_,
THTensor_(data)(bias), k_,
0,
THTensor_(data)(output_n), n_
);
} else {
THTensor_(zero)(output_n);
}
// Extract columns:
THNN_(vol2col)(
THTensor_(data)(input_n),
nInputPlane, inputDepth, inputHeight, inputWidth,
kT, kH, kW, padT, padH, padW, dT, dH, dW,
dilationT, dilationH, dilationW,
THTensor_(data)(columns)
);
// M,N,K are dims of matrix A and B
long m = nOutputPlane;
long n = columns->size[1];
long k = nInputPlane*kT*kH*kW;
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
'n', 'n',
n, m, k,
1,
THTensor_(data)(columns), n,
THTensor_(data)(weight), k,
1,
THTensor_(data)(output_n), n
);
}
// Free
THTensor_(free)(input_n);
THTensor_(free)(output_n);
// Resize output
if (batch == 0) {
THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
}
}
void THNN_(VolumetricDilatedConvolution_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *gradColumns,
int kT, int kW, int kH,
int dT, int dW, int dH,
int padT, int padW, int padH,
int dilationT, int dilationW, int dilationH)
{
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch mode) tensor is expected");
THArgCheck(gradOutput->nDimension == 4 || gradOutput->nDimension == 5, 3, "4D or 5D (batch mode) tensor is expected");
THArgCheck(weight->nDimension == 5, 4, "weight tensor must be 5D (nOutputPlane,nInputPlane,kT,kH,kW)");
THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
THArgCheck(dT > 0 && dW > 0 && dH > 0, 10, "stride should be greater than zero");
// Params
int nInputPlane = weight->size[1];
int nOutputPlane = weight->size[0];
int batch = 1;
if (input->nDimension == 4) {
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
// Force batch
batch = 0;
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
} else {
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
}
long inputDepth = input->size[2];
long inputWidth = input->size[4];
long inputHeight = input->size[3];
long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
// Batch size + input planes
long batchSize = input->size[0];
// Resize output
THTensor_(resize5d)(gradInput, batchSize, nInputPlane, inputDepth, inputHeight, inputWidth);
// Resize temporary columns
THTensor_(resize2d)(gradColumns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
THTensor_(zero)(gradColumns);
// Helpers
THTensor *gradInput_n = THTensor_(new)();
THTensor *gradOutput_n = THTensor_(new)();
// For each elt in batch, do:
for (int elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per sample:
THTensor_(select)(gradInput_n, gradInput, 0, elt);
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
// M,N,K are dims of matrix A and B
long m = nInputPlane*kT*kW*kH;
long n = gradColumns->size[1];
long k = nOutputPlane;
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
'n', 't',
n, m, k,
1,
THTensor_(data)(gradOutput_n), n,
THTensor_(data)(weight), m,
0,
THTensor_(data)(gradColumns), n
);
// Unpack columns back into input:
THNN_(col2vol)(
THTensor_(data)(gradColumns),
nInputPlane, inputDepth, inputHeight, inputWidth,
kT, kH, kW, padT, padH, padW, dT, dH, dW,
dilationT, dilationH, dilationW,
THTensor_(data)(gradInput_n)
);
}
// Free
THTensor_(free)(gradInput_n);
THTensor_(free)(gradOutput_n);
// Resize output
if (batch == 0) {
THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
THTensor_(resize4d)(gradInput, nInputPlane, inputDepth, inputHeight, inputWidth);
}
}
void THNN_(VolumetricDilatedConvolution_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *columns,
THTensor *ones,
int kT, int kW, int kH,
int dT, int dW, int dH,
int padT, int padW, int padH,
int dilationT, int dilationW, int dilationH,
real scale)
{
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch mode) tensor is expected");
THArgCheck(gradOutput->nDimension == 4 || gradOutput->nDimension == 5, 3, "4D or 5D (batch mode) tensor is expected");
THArgCheck(gradWeight->nDimension == 5, 4, "gradWeight tensor must be 5D (nOutputPlane,nInputPlane,kT,kH,kW)");
THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
THArgCheck(dT > 0 && dW > 0 && dH > 0, 10, "stride should be greater than zero");
THArgCheck(!gradBias || gradWeight->size[0] == gradBias->size[0], 4, "nOutputPlane mismatch in gradWeight and gradBias");
// Params
int nInputPlane = gradWeight->size[1];
int nOutputPlane = gradWeight->size[0];
int batch = 1;
if (input->nDimension == 4) {
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
// Force batch
batch = 0;
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
} else {
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
}
long inputDepth = input->size[2];
long inputWidth = input->size[4];
long inputHeight = input->size[3];
long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
// Batch size + input planes
long batchSize = input->size[0];
// Define a buffer of ones, for bias accumulation
if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) {
// Resize plane and fill with ones...
THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
}
// Resize temporary columns
THTensor_(resize2d)(columns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
// Helpers
THTensor *input_n = THTensor_(new)();
THTensor *gradOutput_n = THTensor_(new)();
// For each elt in batch, do:
for (int elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per output:
THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
// Extract columns:
THNN_(vol2col)(
THTensor_(data)(input_n),
nInputPlane, inputDepth, inputHeight, inputWidth,
kT, kH, kW, padT, padH, padW, dT, dH, dW,
dilationT, dilationH, dilationW,
THTensor_(data)(columns)
);
// M,N,K are dims of matrix A and B
long m = nOutputPlane;
long n = nInputPlane*kT*kW*kH;
long k = columns->size[1];
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
't', 'n',
n, m, k,
scale,
THTensor_(data)(columns), k,
THTensor_(data)(gradOutput_n), k,
1,
THTensor_(data)(gradWeight), n
);
// Do Bias:
// M,N,K are dims of matrix A and B
long m_ = nOutputPlane;
long k_ = outputDepth * outputHeight * outputWidth;
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
if (gradBias) {
THBlas_(gemv)(
't',
k_, m_,
scale,
THTensor_(data)(gradOutput_n), k_,
THTensor_(data)(ones), 1,
1,
THTensor_(data)(gradBias), 1
);
}
}
// Free
THTensor_(free)(input_n);
THTensor_(free)(gradOutput_n);
// Resize
if (batch == 0) {
THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
}
}
#endif

View File

@ -0,0 +1,469 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/VolumetricFullConvolution.c"
#else
static void THNN_(vol2col)(
const real *data_vol, const int channels,
const int depth, const int height, const int width,
const int kT, const int kH, const int kW,
const int pT, const int pH, const int pW,
const int dT, const int dH, const int dW,
const int dilationT, const int dilationH, const int dilationW,
real *data_col)
{
int c, t, h, w;
int depth_col = (depth + 2 * pT - (dilationT * (kT - 1) + 1)) / dT + 1;
int height_col = (height + 2 * pH - (dilationH * (kH - 1) + 1)) / dH + 1;
int width_col = (width + 2 * pW - (dilationW * (kW - 1) + 1)) / dW + 1;
int channels_col = channels * kT * kH * kW;
for (c = 0; c < channels_col; ++c)
{
int w_offset = c % kW;
int h_offset = (c / kW) % kH;
int t_offset = (c / kW / kH) % kT;
int c_vol = c / kT / kH / kW;
for (t = 0; t < depth_col; ++t)
{
for (h = 0; h < height_col; ++h)
{
for (w = 0; w < width_col; ++w)
{
int t_pad = t * dT - pT + t_offset * dilationT;
int h_pad = h * dH - pH + h_offset * dilationH;
int w_pad = w * dW - pW + w_offset * dilationW;
if (t_pad >= 0 && t_pad < depth &&
h_pad >= 0 && h_pad < height &&
w_pad >= 0 && w_pad < width)
data_col[((c * depth_col + t) * height_col + h) * width_col + w] =
data_vol[((c_vol * depth + t_pad) * height + h_pad) * width + w_pad];
else
data_col[((c * depth_col + t) * height_col + h) * width_col + w] = 0;
}
}
}
}
}
static void THNN_(col2vol)(
const real* data_col, const int channels,
const int depth, const int height, const int width,
const int kT, const int kH, const int kW,
const int pT, const int pH, const int pW,
const int dT, const int dH, const int dW,
const int dilationT, const int dilationH, const int dilationW,
real* data_vol)
{
int c, t, h, w;
memset(data_vol, 0, sizeof(real) * depth * height * width * channels);
int depth_col = (depth + 2 * pT - (dilationT * (kT - 1) + 1)) / dT + 1;
int height_col = (height + 2 * pH - (dilationH * (kH - 1) + 1)) / dH + 1;
int width_col = (width + 2 * pW - (dilationW * (kW - 1) + 1)) / dW + 1;
int channels_col = channels * kT * kH * kW;
for (c = 0; c < channels_col; ++c)
{
int w_offset = c % kW;
int h_offset = (c / kW) % kH;
int t_offset = (c / kW / kH) % kT;
int c_vol = c / kT / kH / kW;
for (t = 0; t < depth_col; ++t)
{
for (h = 0; h < height_col; ++h)
{
for (w = 0; w < width_col; ++w)
{
int t_pad = t * dT - pT + t_offset * dilationT;
int h_pad = h * dH - pH + h_offset * dilationH;
int w_pad = w * dW - pW + w_offset * dilationW;
if (t_pad >= 0 && t_pad < depth &&
h_pad >= 0 && h_pad < height &&
w_pad >= 0 && w_pad < width)
data_vol[((c_vol * depth + t_pad) * height + h_pad) * width + w_pad] +=
data_col[((c * depth_col + t) * height_col + h) * width_col + w];
}
}
}
}
}
void THNN_(VolumetricFullConvolution_updateOutput)(
THNNState *state,
THTensor *input, // 4D or 5D (batch) tensor
THTensor *output,
THTensor *weight, // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW)
THTensor *bias,
THTensor *finput, // internal columns buffer
THTensor *fgradInput, // internal ones buffer
int dT, int dW, int dH, // stride of the convolution
int pT, int pW, int pH, // padding
int aT, int aW, int aH) // extra output adjustment
{
THTensor *columns = finput;
THTensor *ones = fgradInput;
// number of input & output planes and kernel size is indirectly defined by the weight tensor
THArgCheck(weight->nDimension == 5, 4,
"5D weight tensor is expected (nInputPlane x nOutputPlane x kT x kH x kW)"
);
const int nInputPlane = (int)weight->size[0];
const int nOutputPlane = (int)weight->size[1];
const int kT = (int)weight->size[2];
const int kH = (int)weight->size[3];
const int kW = (int)weight->size[4];
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
"4D or 5D (batch mode) tensor is expected"
);
int batch = 1;
if (input->nDimension == 4)
{
THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
// Force batch
batch = 0;
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
}
else
{
THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
}
const long inputWidth = input->size[4];
const long inputHeight = input->size[3];
const long inputDepth = input->size[2];
const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW;
const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT;
// Batch size + input planes
const long batchSize = input->size[0];
// Resize output
THTensor_(resize5d)(output, batchSize, nOutputPlane, outputDepth, outputHeight, outputWidth);
// Resize temporary columns
THTensor_(resize2d)(columns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
THTensor_(zero)(columns);
// Define a buffer of ones, for bias accumulation
// Note: this buffer can be shared with other modules, it only ever gets increased,
// and always contains ones.
if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth)
{
// Resize plane and fill with ones...
THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
}
// Helpers
THTensor *input_n = THTensor_(new)();
THTensor *output_n = THTensor_(new)();
int elt;
// For each elt in batch, do:
for (elt = 0; elt < batchSize; ++elt)
{
// Matrix mulitply per output:
THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(output_n, output, 0, elt);
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
const long m = weight->size[1] * weight->size[2] * weight->size[3] * weight->size[4];
const long n = columns->size[1];
const long k = weight->size[0];
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
'n', 't',
n, m, k,
1,
THTensor_(data)(input_n), n,
THTensor_(data)(weight), m,
0,
THTensor_(data)(columns), n
);
// Unpack columns back into input:
THNN_(col2vol)(
THTensor_(data)(columns),
nOutputPlane, outputDepth, outputHeight, outputWidth,
kT, kH, kW,
pT, pH, pW,
dT, dH, dW,
1, 1, 1,
THTensor_(data)(output_n)
);
// Do Bias after:
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
const long m_ = nOutputPlane;
const long n_ = outputDepth * outputHeight * outputWidth;
const long k_ = 1;
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
't', 'n',
n_, m_, k_,
1,
THTensor_(data)(ones), k_,
THTensor_(data)(bias), k_,
1,
THTensor_(data)(output_n), n_
);
}
// Free
THTensor_(free)(input_n);
THTensor_(free)(output_n);
// Resize output
if (batch == 0)
{
THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
}
}
void THNN_(VolumetricFullConvolution_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *finput,
THTensor *fgradInput, // only used by cuda impl
int dT, int dW, int dH, // stride
int pT, int pW, int pH, // padding
int aT, int aW, int aH) // extra output adjustment
{
THTensor *gradColumns = finput;
// number of input & output planes and kernel size is indirectly defined by the weight tensor
THArgCheck(weight->nDimension == 5, 4,
"5D weight tensor is expected (nInputPlane x nOutputPlane x kT x kH x kW)"
);
const int nInputPlane = (int)weight->size[0];
const int nOutputPlane = (int)weight->size[1];
const int kT = (int)weight->size[2];
const int kH = (int)weight->size[3];
const int kW = (int)weight->size[4];
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
"4D or 5D (batch mode) tensor is expected"
);
int batch = 1;
if (input->nDimension == 4)
{
// Force batch
batch = 0;
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
}
const long inputWidth = input->size[4];
const long inputHeight = input->size[3];
const long inputDepth = input->size[2];
const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW;
const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT;
// Batch size + input planes
const long batchSize = input->size[0];
// Resize output
THTensor_(resize5d)(gradInput, batchSize, nInputPlane, inputDepth, inputHeight, inputWidth);
THTensor_(zero)(gradInput);
// Resize temporary columns
THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
// Helpers
THTensor *gradInput_n = THTensor_(new)();
THTensor *gradOutput_n = THTensor_(new)();
int elt;
// For each elt in batch, do:
for (elt = 0; elt < batchSize; ++elt)
{
// Matrix mulitply per sample:
THTensor_(select)(gradInput_n, gradInput, 0, elt);
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
// Extract columns:
THNN_(vol2col)(
THTensor_(data)(gradOutput_n),
nOutputPlane, outputDepth, outputHeight, outputWidth,
kT, kH, kW,
pT, pH, pW,
dT, dH, dW,
1, 1, 1,
THTensor_(data)(gradColumns)
);
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
const long m = weight->size[0];
const long n = gradColumns->size[1];
const long k = weight->size[1] * weight->size[2] * weight->size[3] * weight->size[4];
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
'n', 'n',
n, m, k,
1,
THTensor_(data)(gradColumns), n,
THTensor_(data)(weight), k,
0,
THTensor_(data)(gradInput_n), n
);
}
// Free
THTensor_(free)(gradInput_n);
THTensor_(free)(gradOutput_n);
// Resize output
if (batch == 0)
{
THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
THTensor_(resize4d)(gradInput, nInputPlane, inputDepth, inputHeight, inputWidth);
}
}
void THNN_(VolumetricFullConvolution_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput,
THTensor *fgradInput,
int dT, int dW, int dH, // stride
int pT, int pW, int pH, // padding
int aT, int aW, int aH, // extra output adjustment
real scale)
{
// number of input & output planes and kernel size is indirectly defined by the gradWeight tensor
THArgCheck(gradWeight->nDimension == 5, 4,
"5D gradWeight tensor is expected (nInputPlane x nOutputPlane x kT x kH x kW)"
);
int nInputPlane = (int)gradWeight->size[0];
int nOutputPlane = (int)gradWeight->size[1];
int kT = (int)gradWeight->size[2];
int kH = (int)gradWeight->size[3];
int kW = (int)gradWeight->size[4];
THTensor *columns = finput;
THTensor *ones = fgradInput;
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
"4D or 5D (batch mode) tensor is expected"
);
int batch = 1;
if (input->nDimension == 4)
{
// Force batch
batch = 0;
THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
}
const long inputWidth = input->size[4];
const long inputHeight = input->size[3];
const long inputDepth = input->size[2];
const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW;
const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT;
// Batch size + input planes
const long batchSize = input->size[0];
// Define a buffer of ones, for bias accumulation
if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth)
{
// Resize plane and fill with ones...
THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
}
// Resize temporary columns
THTensor_(resize2d)(columns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
// Helpers
THTensor *input_n = THTensor_(new)();
THTensor *gradOutput_n = THTensor_(new)();
int elt;
// For each elt in batch, do:
for (elt = 0; elt < batchSize; ++elt)
{
// Matrix mulitply per output:
THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
// Extract columns:
THNN_(vol2col)(
THTensor_(data)(gradOutput_n), nOutputPlane,
outputDepth, outputHeight, outputWidth,
kT, kH, kW,
pT, pH, pW,
dT, dH, dW,
1, 1, 1,
THTensor_(data)(columns)
);
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
const long n = columns->size[0]; // nOutputPlane * kt * kh * kw
const long m = input_n->size[0]; // nInputPlane
const long k = columns->size[1]; // inputHeight * inputWidth
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
THBlas_(gemm)(
't', 'n',
n, m, k,
scale,
THTensor_(data)(columns), k,
THTensor_(data)(input_n), k,
1,
THTensor_(data)(gradWeight), n
);
// Do Bias:
// M,N,K are dims of matrix A and B
// (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
const long m_ = nOutputPlane;
const long k_ = outputDepth * outputHeight * outputWidth;
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
THBlas_(gemv)(
't',
k_, m_,
scale,
THTensor_(data)(gradOutput_n), k_,
THTensor_(data)(ones), 1,
1,
THTensor_(data)(gradBias), 1
);
}
// Free
THTensor_(free)(input_n);
THTensor_(free)(gradOutput_n);
// Resize
if (batch == 0)
{
THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
}
}
#endif

View File

@ -0,0 +1,392 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c"
#else
static void THNN_(VolumetricMaxPooling_updateOutput_frame)(
real *input_p,
real *output_p,
real *indz_p,
long nslices,
long itime,
long iwidth,
long iheight,
long otime,
long owidth,
long oheight,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
/* loop over output */
long i, j, ti;
for (ti = 0; ti < otime; ti++)
{
for (i = 0; i < oheight; i++)
{
for (j = 0; j < owidth; j++)
{
/* local pointers */
long start_t = ti * dT - pT;
long start_h = i * dH - pH;
long start_w = j * dW - pW;
long kernel_t = fminf(kT, kT + start_t);
long kernel_h = fminf(kH, kH + start_h);
long kernel_w = fminf(kW, kW + start_w);
start_t = fmaxf(start_t, 0);
start_h = fmaxf(start_h, 0);
start_w = fmaxf(start_w, 0);
real *ip = input_p + k * itime * iwidth * iheight
+ start_t * iwidth * iheight + start_h * iwidth + start_w;
real *op = output_p + k * otime * owidth * oheight
+ ti * owidth * oheight + i * owidth + j;
real *indzp = indz_p + k * otime * owidth * oheight
+ ti * owidth * oheight + i * owidth + j;
/* compute local max: */
real maxval = -THInf;
int x,y,z;
int mx, my, mz;
for (z = 0; z < kernel_t; z++)
{
for (y = 0; y < kernel_h; y++)
{
for (x = 0; x < kernel_w; x++)
{
if ((start_t + z < itime) && (start_h + y < iheight) && (start_w + x < iwidth))
{
real val = *(ip + z * iwidth * iheight + y * iwidth + x);
if (val > maxval)
{
maxval = val;
// Store indices w.r.t the kernel dimension
mz = z + (kT - kernel_t);
my = y + (kH - kernel_h);
mx = x + (kW - kernel_w);
}
}
}
}
}
// set max values
((unsigned char*)(indzp))[0] = mz;
((unsigned char*)(indzp))[1] = my;
((unsigned char*)(indzp))[2] = mx;
((unsigned char*)(indzp))[3] = 0;
/* set output to local max */
*op = maxval;
}
}
}
}
}
void THNN_(VolumetricMaxPooling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *indices,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH,
bool ceilMode)
{
long nslices;
long itime;
long iheight;
long iwidth;
long otime;
long oheight;
long owidth;
real *input_data;
real *output_data;
real *indices_data;
THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2,
"4D or 5D (batch-mode) tensor expected"
);
int dimN = 0;
int dimt = 1;
int dimh = 2;
int dimw = 3;
if (input->nDimension == 5)
{
dimN++;
dimt++;
dimh++;
dimw++;
}
THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2,
"input image smaller than kernel size"
);
THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2,
"pad should be smaller than half of kernel size"
);
/* sizes */
nslices = input->size[dimN];
itime = input->size[dimt];
iheight = input->size[dimh];
iwidth = input->size[dimw];
if (ceilMode)
{
otime = (int)(ceil((float)(itime - kT + 2 * pT) / dT) + 1);
oheight = (int)(ceil((float)(iheight - kH + 2 * pH) / dH) + 1);
owidth = (int)(ceil((float)(iwidth - kW + 2 * pW) / dW) + 1);
}
else
{
otime = (int)(floor((float)(itime - kT + 2 * pT) / dT) + 1);
oheight = (int)(floor((float)(iheight - kH + 2 * pH) / dH) + 1);
owidth = (int)(floor((float)(iwidth - kW + 2 * pW) / dW) + 1);
}
if (pT || pW || pH)
{
// ensure that the last pooling starts inside the image
if ((otime - 1)*dT >= itime + pT)
--otime;
if ((oheight - 1)*dH >= iheight + pH)
--oheight;
if ((owidth - 1)*dW >= iwidth + pW)
--owidth;
}
/* get contiguous input */
input = THTensor_(newContiguous)(input);
if (input->nDimension == 4) /* non-batch mode */
{
/* resize output */
THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
/* indices will contain ti,i,j uchar locations packed into float/double */
THTensor_(resize4d)(indices, nslices, otime, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
THNN_(VolumetricMaxPooling_updateOutput_frame)(
input_data, output_data,
indices_data,
nslices,
itime, iwidth, iheight,
otime, owidth, oheight,
kT, kW, kH,
dT, dW, dH,
pT, pW, pH
);
}
else /* batch mode */
{
long p;
long nBatch = input->size[0];
long istride = nslices * itime * iwidth * iheight;
long ostride = nslices * otime * owidth * oheight;
/* resize output */
THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
/* indices will contain ti,i,j locations for each output point */
THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
#pragma omp parallel for private(p)
for (p=0; p < nBatch; p++)
{
THNN_(VolumetricMaxPooling_updateOutput_frame)(
input_data + p * istride,
output_data + p * ostride,
indices_data + p * ostride,
nslices,
itime, iwidth, iheight,
otime, owidth, oheight,
kT, kW, kH,
dT, dW, dH,
pT, pW, pH
);
}
}
/* cleanup */
THTensor_(free)(input);
}
static void THNN_(VolumetricMaxPooling_updateGradInput_frame)(
real *gradInput_p,
real *gradOutput_p,
real *indz_p,
long nslices,
long itime,
long iwidth,
long iheight,
long otime,
long owidth,
long oheight,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
real *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight;
real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight;
real *indz_p_k = indz_p + k * otime * owidth * oheight;
/* calculate max points */
long ti, i, j;
for (ti = 0; ti < otime; ti++)
{
for (i = 0; i < oheight; i++)
{
for (j = 0; j < owidth; j++)
{
/* retrieve position of max */
real * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j];
long maxti = ((unsigned char*)(indzp))[0] + ti * dT - pT;
long maxi = ((unsigned char*)(indzp))[1] + i * dH - pH;
long maxj = ((unsigned char*)(indzp))[2] + j * dW - pW;
/* update gradient */
gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] +=
gradOutput_p_k[ti * oheight * owidth + i * owidth + j];
}
}
}
}
}
void THNN_(VolumetricMaxPooling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *indices,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
int nslices;
int itime;
int iheight;
int iwidth;
int otime;
int oheight;
int owidth;
real *gradInput_data;
real *gradOutput_data;
real *indices_data;
int dimN = 0;
int dimt = 1;
int dimh = 2;
int dimw = 3;
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
if (input->nDimension == 5)
{
dimN++;
dimt++;
dimh++;
dimw++;
}
/* sizes */
nslices = input->size[dimN];
itime = input->size[dimt];
iheight = input->size[dimh];
iwidth = input->size[dimw];
otime = gradOutput->size[dimt];
oheight = gradOutput->size[dimh];
owidth = gradOutput->size[dimw];
/* get raw pointers */
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
indices_data = THTensor_(data)(indices);
/* backprop */
if (input->nDimension == 4) /* non-batch mode*/
{
THNN_(VolumetricMaxPooling_updateGradInput_frame)(
gradInput_data, gradOutput_data,
indices_data,
nslices,
itime, iwidth, iheight,
otime, owidth, oheight,
dT, dW, dH,
pT, pW, pH
);
}
else /* batch mode */
{
long p;
long nBatch = input->size[0];
long istride = nslices * itime * iwidth * iheight;
long ostride = nslices * otime * owidth * oheight;
#pragma omp parallel for private(p)
for (p = 0; p < nBatch; p++)
{
THNN_(VolumetricMaxPooling_updateGradInput_frame)(
gradInput_data + p * istride,
gradOutput_data + p * ostride,
indices_data + p * ostride,
nslices,
itime, iwidth, iheight,
otime, owidth, oheight,
dT, dW, dH,
pT, pW, pH
);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,325 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/VolumetricMaxUnpooling.c"
#else
static void THNN_(VolumetricMaxUnpooling_updateOutput_frame)(
real *input_p,
real *output_p,
real *ind_p,
long nslices,
long iT,
long iW,
long iH,
long oT,
long oW,
long oH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
long ti, i, j, maxz, maxy, maxx;
for (ti = 0; ti < iT; ti++)
{
for (i = 0; i < iH; i++)
{
for (j = 0; j < iW; j++)
{
long start_t = ti * dT - pT;
long start_h = i * dH - pH;
long start_w = j * dW - pW;
//real *output_p_k = output_p + k*oT*oW*oH + ti*oW*oH*dT + i*oW*dH + j*dW;
real *input_p_k = input_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
real *ind_p_k = ind_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
maxz = ((unsigned char*)(ind_p_k))[0]; /* retrieve position of max */
maxy = ((unsigned char*)(ind_p_k))[1];
maxx = ((unsigned char*)(ind_p_k))[2];
if (start_t+maxz<0 || start_h+maxy<0 || start_w+maxx<0 || start_t+maxz>=oT || start_h+maxy>=oH || start_w+maxx>=oW)
{
THError(
"invalid max index z= %d, y= %d, x= %d, oT= %d, oW= %d, oH= %d",
start_t+maxz, start_h+maxy, start_w+maxx, oT, oW, oH
);
}
output_p[k*oT*oW*oH + oH*oW*(start_t+maxz) + oW*(start_h+maxy) + (start_w+maxx)] = *input_p_k; /* update output */
}
}
}
}
}
void THNN_(VolumetricMaxUnpooling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *indices,
int oT,
int oW,
int oH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
int dimw = 3;
int dimh = 2;
int dimt = 1;
int nbatch = 1;
int nslices;
int iT;
int iH;
int iW;
real *input_data;
real *output_data;
real *indices_data;
THArgCheck(input->nDimension == 4 || input->nDimension == 5 , 2,
"4D or 5D (batch mode) tensor expected"
);
if (!THTensor_(isSameSizeAs)(input, indices))
{
THError("Invalid input size w.r.t current indices size");
}
if (input->nDimension == 5)
{
nbatch = input->size[0];
dimt++;
dimw++;
dimh++;
}
/* sizes */
nslices = input->size[dimt-1];
iT = input->size[dimt];
iH = input->size[dimh];
iW = input->size[dimw];
/* get contiguous input */
input = THTensor_(newContiguous)(input);
indices = THTensor_(newContiguous)(indices);
/* resize output */
if (input->nDimension == 4)
{
THTensor_(resize4d)(output, nslices, oT, oH, oW);
THTensor_(zero)(output);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
THNN_(VolumetricMaxUnpooling_updateOutput_frame)(
input_data, output_data,
indices_data,
nslices,
iT, iW, iH,
oT, oW, oH,
dT, dW, dH, pT, pW, pH
);
}
else
{
long p;
THTensor_(resize5d)(output, nbatch, nslices, oT, oH, oW);
THTensor_(zero)(output);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(VolumetricMaxUnpooling_updateOutput_frame)(
input_data+p*nslices*iT*iW*iH,
output_data+p*nslices*oT*oW*oH,
indices_data+p*nslices*iT*iW*iH,
nslices,
iT, iW, iH,
oT, oW, oH,
dT, dW, dH,
pT, pW, pH
);
}
}
/* cleanup */
THTensor_(free)(input);
THTensor_(free)(indices);
}
static void THNN_(VolumetricMaxUnpooling_updateGradInput_frame)(
real *gradInput_p,
real *gradOutput_p,
real *ind_p,
long nslices,
long iT,
long iW,
long iH,
long oT,
long oW,
long oH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
long ti, i, j, maxz, maxy, maxx;
for (ti = 0; ti < iT; ti++)
{
for (i = 0; i < iH; i++)
{
for (j = 0; j < iW; j++)
{
long start_t = ti * dT - pT;
long start_h = i * dH - pH;
long start_w = j * dW - pW;
real *gradInput_p_k = gradInput_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
//real *gradOutput_p_k = gradOutput_p + k*oT*oW*oH + ti*oW*oH*dT + i*oW*dH + j*dW;
real *ind_p_k = ind_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
maxz = ((unsigned char*)(ind_p_k))[0]; /* retrieve position of max */
maxy = ((unsigned char*)(ind_p_k))[1];
maxx = ((unsigned char*)(ind_p_k))[2];
if (start_t+maxz<0 || start_h+maxy<0 || start_w+maxx<0 || start_t+maxz>=oT || start_h+maxy>=oH || start_w+maxx>=oW)
{
THError(
"invalid max index z= %d, y= %d, x= %d, oT= %d, oW= %d, oH= %d",
start_t+maxz, start_h+maxy, start_w+maxx, oT, oW, oH
);
}
*gradInput_p_k = gradOutput_p[k*oT*oW*oH + oH*oW*(start_t+maxz) + oW*(start_h+maxy) + (start_w+maxx)]; /* update gradient */
}
}
}
}
}
void THNN_(VolumetricMaxUnpooling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *indices,
int oT,
int oW,
int oH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
int dimw = 3;
int dimh = 2;
int dimt = 1;
int nbatch = 1;
int nslices;
int iT;
int iH;
int iW;
real *gradInput_data;
real *gradOutput_data;
real *indices_data;
if (!THTensor_(isSameSizeAs)(input, indices))
{
THError("Invalid input size w.r.t current indices size");
}
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
indices = THTensor_(newContiguous)(indices);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
if (input->nDimension == 5)
{
nbatch = input->size[0];
dimt++;
dimw++;
dimh++;
}
/* sizes */
nslices = input->size[dimt-1];
iT = input->size[dimt];
iH = input->size[dimh];
iW = input->size[dimw];
if (oT != gradOutput->size[dimt] || oW != gradOutput->size[dimw] || oH != gradOutput->size[dimh])
{
THError(
"Inconsistent gradOutput size. oT= %d, oH= %d, oW= %d, gradOutput: %dx%d",
oT, oH, oW,gradOutput->size[dimh], gradOutput->size[dimw]
);
}
/* get raw pointers */
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
indices_data = THTensor_(data)(indices);
/* backprop */
if (input->nDimension == 4)
{
THNN_(VolumetricMaxUnpooling_updateGradInput_frame)(
gradInput_data, gradOutput_data,
indices_data,
nslices,
iT, iW, iH,
oT, oW, oH,
dT, dW, dH,
pT, pW, pH
);
}
else
{
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(VolumetricMaxUnpooling_updateGradInput_frame)(
gradInput_data+p*nslices*iT*iW*iH,
gradOutput_data+p*nslices*oT*oW*oH,
indices_data+p*nslices*iT*iW*iH,
nslices,
iT, iW, iH,
oT, oW, oH,
dT, dW, dH,
pT, pW, pH
);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
THTensor_(free)(indices);
}
#endif

View File

@ -0,0 +1,301 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/VolumetricReplicationPadding.c"
#else
static void THNN_(VolumetricReplicationPadding_updateOutput_frame)(
real *input_p, real *output_p,
long nslices,
long iwidth, long iheight, long idepth,
long owidth, long oheight, long odepth,
int pleft, int pright,
int ptop, int pbottom,
int pfront, int pback)
{
int iStartX = fmax(0, -pleft);
int iStartY = fmax(0, -ptop);
int iStartZ = fmax(0, -pfront);
int oStartX = fmax(0, pleft);
int oStartY = fmax(0, ptop);
int oStartZ = fmax(0, pfront);
long k, ip_x, ip_y, ip_z;
#pragma omp parallel for private(k, ip_x, ip_y, ip_z)
for (k = 0; k < nslices; k++) {
long i, j, z;
for (z = 0; z < odepth; z++) {
for (i = 0; i < oheight; i++) {
for (j = 0; j < owidth; j++) {
if (j < pleft) {
ip_x = pleft;
} else if (j >= pleft && j < iwidth + pleft) {
ip_x = j;
} else {
ip_x = iwidth + pleft - 1;
}
ip_x = ip_x - oStartX + iStartX;
if (i < ptop) {
ip_y = ptop;
} else if (i >= ptop && i < iheight + ptop) {
ip_y = i;
} else {
ip_y = iheight + ptop - 1;
}
ip_y = ip_y - oStartY + iStartY;
if (z < pfront) {
ip_z = pfront;
} else if (z >= pfront && z < idepth + pfront) {
ip_z = z;
} else {
ip_z = idepth + pfront - 1;
}
ip_z = ip_z - oStartZ + iStartZ;
real *dest_p = output_p + k * owidth * oheight * odepth +
z * owidth * oheight + i * owidth + j;
real *src_p = input_p + k * iwidth * iheight * idepth +
ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
*dest_p = *src_p;
}
}
}
}
}
void THNN_(VolumetricReplicationPadding_updateOutput)(THNNState *state,
THTensor *input,
THTensor *output,
int pleft, int pright,
int ptop, int pbottom,
int pfront, int pback)
{
int dimw = 3;
int dimh = 2;
int dimd = 1;
int dimslices = 0;
long nbatch = 1;
long nslices;
long idepth;
long iheight;
long iwidth;
long odepth;
long oheight;
long owidth;
real *input_data;
real *output_data;
THArgCheck(input->nDimension == 4 || input->nDimension == 5,
2, "input must be 4 or 5-dimensional");
if (input->nDimension == 5)
{
nbatch = input->size[0];
dimw++;
dimh++;
dimd++;
dimslices++;
}
/* sizes */
nslices = input->size[dimslices];
idepth = input->size[dimd];
iheight = input->size[dimh];
iwidth = input->size[dimw];
odepth = idepth + pfront + pback;
oheight = iheight + ptop + pbottom;
owidth = iwidth + pleft + pright;
THArgCheck(owidth >= 1 || oheight >= 1 || odepth >= 1 , 2,
"input is too small");
/* get contiguous input */
input = THTensor_(newContiguous)(input);
/* resize output */
if (input->nDimension == 4)
{
THTensor_(resize4d)(output, nslices, odepth, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
THNN_(VolumetricReplicationPadding_updateOutput_frame)(
input_data, output_data, nslices, iwidth, iheight, idepth,
owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront,
pback);
}
else
{
long p;
THTensor_(resize5d)(output, nbatch, nslices, odepth, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(VolumetricReplicationPadding_updateOutput_frame)(
input_data + p * nslices * iwidth * iheight * idepth,
output_data + p * nslices * owidth * oheight * odepth,
nslices,
iwidth, iheight, idepth,
owidth, oheight, odepth,
pleft, pright,
ptop, pbottom,
pfront, pback);
}
}
/* cleanup */
THTensor_(free)(input);
}
static void THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
real *ginput_p, real *goutput_p,
long nslices,
long iwidth, long iheight, long idepth,
long owidth, long oheight, long odepth,
int pleft, int pright,
int ptop, int pbottom,
int pfront, int pback)
{
int iStartX = fmax(0, -pleft);
int iStartY = fmax(0, -ptop);
int iStartZ = fmax(0, -pfront);
int oStartX = fmax(0, pleft);
int oStartY = fmax(0, ptop);
int oStartZ = fmax(0, pfront);
long k, ip_x, ip_y, ip_z;
#pragma omp parallel for private(k, ip_x, ip_y, ip_z)
for (k = 0; k < nslices; k++) {
long i, j, z;
for (z = 0; z < odepth; z++) {
for (i = 0; i < oheight; i++) {
for (j = 0; j < owidth; j++) {
if (j < pleft) {
ip_x = pleft;
} else if (j >= pleft && j < iwidth + pleft) {
ip_x = j;
} else {
ip_x = iwidth + pleft - 1;
}
ip_x = ip_x - oStartX + iStartX;
if (i < ptop) {
ip_y = ptop;
} else if (i >= ptop && i < iheight + ptop) {
ip_y = i;
} else {
ip_y = iheight + ptop - 1;
}
ip_y = ip_y - oStartY + iStartY;
if (z < pfront) {
ip_z = pfront;
} else if (z >= pfront && z < idepth + pfront) {
ip_z = z;
} else {
ip_z = idepth + pfront - 1;
}
ip_z = ip_z - oStartZ + iStartZ;
real *src_p = goutput_p + k * owidth * oheight * odepth +
z * owidth * oheight + i * owidth + j;
real *dest_p = ginput_p + k * iwidth * iheight * idepth +
ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
*dest_p += *src_p;
}
}
}
}
}
void THNN_(VolumetricReplicationPadding_updateGradInput)(THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
int pleft, int pright,
int ptop, int pbottom,
int pfront, int pback)
{
int dimw = 3;
int dimh = 2;
int dimd = 1;
int dimslices = 0;
long nbatch = 1;
long nslices;
long idepth;
long iheight;
long iwidth;
long odepth;
long oheight;
long owidth;
if (input->nDimension == 5)
{
nbatch = input->size[0];
dimw++;
dimh++;
dimd++;
dimslices++;
}
/* sizes */
nslices = input->size[dimslices];
idepth = input->size[dimd];
iheight = input->size[dimh];
iwidth = input->size[dimw];
odepth = idepth + pfront + pback;
oheight = iheight + ptop + pbottom;
owidth = iwidth + pleft + pright;
THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
"gradOutput width unexpected");
THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
"gradOutput height unexpected");
THArgCheck(odepth == THTensor_(size)(gradOutput, dimd), 3,
"gradOutput depth unexpected");
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
/* backprop */
if (input->nDimension == 4) {
THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
THTensor_(data)(gradInput),
THTensor_(data)(gradOutput),
nslices,
iwidth, iheight, idepth,
owidth, oheight, odepth,
pleft, pright,
ptop, pbottom,
pfront, pback);
} else {
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++) {
THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
THTensor_(data)(gradInput) + p * nslices * idepth * iheight * iwidth,
THTensor_(data)(gradOutput) + p * nslices * odepth * oheight * owidth,
nslices,
iwidth, iheight, idepth,
owidth, oheight, odepth,
pleft, pright,
ptop, pbottom,
pfront, pback);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
}
#endif

View File

@ -0,0 +1,158 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/unfold.c"
#else
#ifdef _WIN32
# include <windows.h>
#endif
/* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */
void THNN_(unfolded_acc)(
THTensor *finput,
THTensor *input,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
int nInputPlane,
int inputWidth,
int inputHeight,
int outputWidth,
int outputHeight)
{
#ifdef _WIN32
LONG_PTR nip;
#else
size_t nip;
#endif
real *input_data = THTensor_(data)(input);
real *finput_data = THTensor_(data)(finput);
#pragma omp parallel for private(nip)
for(nip = 0; nip < nInputPlane; nip++)
{
size_t kw, kh, y, x;
long long ix = 0, iy = 0;
for(kh = 0; kh < kH; kh++)
{
for(kw = 0; kw < kW; kw++)
{
real *src = finput_data + nip*(kH*kW*outputHeight*outputWidth) + kh*(kW*outputHeight*outputWidth) + kw*(outputHeight*outputWidth);
real *dst = input_data + nip*(inputHeight*inputWidth);
if (padW > 0 || padH > 0) {
size_t lpad,rpad;
for(y = 0; y < outputHeight; y++) {
iy = (long long)(y*dH - padH + kh);
if (iy < 0 || iy >= inputHeight) {
} else {
if (dW==1){
ix = (long long)(0 - padW + kw);
lpad = fmaxf(0,(int)(padW-kw));
rpad = fmaxf(0,(int)(padW-(kW-kw-1)));
THVector_(add)(dst+(size_t)(iy*inputWidth+ix+lpad), src+(size_t)(y*outputWidth+lpad), 1, outputWidth - lpad - rpad); /* note: THVector_add could handle 1 value better */
}
else{
for (x=0; x<outputWidth; x++){
ix = (long long)(x*dW - padW + kw);
if (ix < 0 || ix >= inputWidth){
}else
THVector_(add)(dst+(size_t)(iy*inputWidth+ix), src+(size_t)(y*outputWidth+x), 1, 1);
}
}
}
}
} else {
for(y = 0; y < outputHeight; y++) {
iy = (long long)(y*dH + kh);
ix = (long long)(0 + kw);
if (dW == 1 )
THVector_(add)(dst+(size_t)(iy*inputWidth+ix), src+(size_t)(y*outputWidth), 1, outputWidth); /* note: THVector_add could handle 1 value better */
else{
for(x = 0; x < outputWidth; x++)
THVector_(add)(dst+(size_t)(iy*inputWidth+ix+x*dW), src+(size_t)(y*outputWidth+x), 1, 1);
}
}
}
}
}
}
}
void THNN_(unfolded_copy)(
THTensor *finput,
THTensor *input,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
int nInputPlane,
int inputWidth,
int inputHeight,
int outputWidth,
int outputHeight)
{
long k;
real *input_data = THTensor_(data)(input);
real *finput_data = THTensor_(data)(finput);
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane*kH*kW; k++) {
size_t nip = k / (kH*kW);
size_t rest = k % (kH*kW);
size_t kh = rest / kW;
size_t kw = rest % kW;
size_t x,y;
long long ix,iy;
real *dst = finput_data + nip*(kH*kW*outputHeight*outputWidth) + kh*(kW*outputHeight*outputWidth) + kw*(outputHeight*outputWidth);
real *src = input_data + nip*(inputHeight*inputWidth);
if (padW > 0 || padH > 0) {
size_t lpad,rpad;
for(y = 0; y < outputHeight; y++) {
iy = (long long)(y*dH - padH + kh);
if (iy < 0 || iy >= inputHeight) {
memset(dst+y*outputWidth, 0, sizeof(real)*outputWidth);
} else {
if (dW==1){
ix = (long long)(0 - padW + kw);
lpad = fmaxf(0,(int)(padW-kw));
rpad = fmaxf(0,(int)(padW-(kW-kw-1)));
if (outputWidth-rpad-lpad <= 0) {
memset(dst+(size_t)(y*outputWidth), 0, sizeof(real)*outputWidth);
} else {
if (lpad > 0) memset(dst+y*outputWidth, 0, sizeof(real)*lpad);
memcpy(dst+(size_t)(y*outputWidth+lpad), src+(size_t)(iy*inputWidth+ix+lpad), sizeof(real)*(outputWidth-rpad-lpad));
if (rpad > 0) memset(dst+y*outputWidth + outputWidth - rpad, 0, sizeof(real)*rpad);
}
}
else{
for (x=0; x<outputWidth; x++){
ix = (long long)(x*dW - padW + kw);
if (ix < 0 || ix >= inputWidth)
memset(dst+(size_t)(y*outputWidth+x), 0, sizeof(real)*1);
else
memcpy(dst+(size_t)(y*outputWidth+x), src+(size_t)(iy*inputWidth+ix), sizeof(real)*(1));
}
}
}
}
} else {
for(y = 0; y < outputHeight; y++) {
iy = (long long)(y*dH + kh);
ix = (long long)(0 + kw);
if (dW == 1)
memcpy(dst+(size_t)(y*outputWidth), src+(size_t)(iy*inputWidth+ix), sizeof(real)*outputWidth);
else{
for (x=0; x<outputWidth; x++)
memcpy(dst+(size_t)(y*outputWidth+x), src+(size_t)(iy*inputWidth+ix+x*dW), sizeof(real)*(1));
}
}
}
}
}
#endif

182
torch/lib/THNN/init.c Normal file
View File

@ -0,0 +1,182 @@
#include "TH.h"
#include "THNN.h"
#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME)
#include "generic/Abs.c"
#include "THGenerateFloatTypes.h"
#include "generic/AbsCriterion.c"
#include "THGenerateFloatTypes.h"
#include "generic/ClassNLLCriterion.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialClassNLLCriterion.c"
#include "THGenerateFloatTypes.h"
#include "generic/DistKLDivCriterion.c"
#include "THGenerateFloatTypes.h"
#include "generic/ELU.c"
#include "THGenerateFloatTypes.h"
#include "generic/HardShrink.c"
#include "THGenerateFloatTypes.h"
#include "generic/HardTanh.c"
#include "THGenerateFloatTypes.h"
#include "generic/L1Cost.c"
#include "THGenerateFloatTypes.h"
#include "generic/LeakyReLU.c"
#include "THGenerateFloatTypes.h"
#include "generic/LogSigmoid.c"
#include "THGenerateFloatTypes.h"
#include "generic/LogSoftMax.c"
#include "THGenerateFloatTypes.h"
#include "generic/LookupTable.c"
#include "THGenerateFloatTypes.h"
#include "generic/MSECriterion.c"
#include "THGenerateFloatTypes.h"
#include "generic/MarginCriterion.c"
#include "THGenerateFloatTypes.h"
#include "generic/SoftMarginCriterion.c"
#include "THGenerateFloatTypes.h"
#include "generic/MultiLabelMarginCriterion.c"
#include "THGenerateFloatTypes.h"
#include "generic/MultiMarginCriterion.c"
#include "THGenerateFloatTypes.h"
#include "generic/PReLU.c"
#include "THGenerateFloatTypes.h"
#include "generic/RReLU.c"
#include "THGenerateFloatTypes.h"
#include "generic/Sigmoid.c"
#include "THGenerateFloatTypes.h"
#include "generic/SmoothL1Criterion.c"
#include "THGenerateFloatTypes.h"
#include "generic/SoftMax.c"
#include "THGenerateFloatTypes.h"
#include "generic/SoftPlus.c"
#include "THGenerateFloatTypes.h"
#include "generic/SoftShrink.c"
#include "THGenerateFloatTypes.h"
#include "generic/SparseLinear.c"
#include "THGenerateFloatTypes.h"
#include "generic/Sqrt.c"
#include "THGenerateFloatTypes.h"
#include "generic/Square.c"
#include "THGenerateFloatTypes.h"
#include "generic/Tanh.c"
#include "THGenerateFloatTypes.h"
#include "generic/Threshold.c"
#include "THGenerateFloatTypes.h"
#include "generic/TemporalConvolution.c"
#include "THGenerateFloatTypes.h"
#include "generic/TemporalSubSampling.c"
#include "THGenerateFloatTypes.h"
#include "generic/TemporalMaxPooling.c"
#include "THGenerateFloatTypes.h"
#include "generic/BatchNormalization.c"
#include "THGenerateFloatTypes.h"
#include "generic/unfold.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialConvolutionMap.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialConvolutionMM.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialConvolutionLocal.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialFullConvolution.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialFullConvolutionMap.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialDilatedConvolution.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialAdaptiveMaxPooling.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialAveragePooling.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialFractionalMaxPooling.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialMaxPooling.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialMaxUnpooling.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialSubSampling.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialUpSamplingNearest.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialUpSamplingBilinear.c"
#include "THGenerateFloatTypes.h"
#include "generic/VolumetricAveragePooling.c"
#include "THGenerateFloatTypes.h"
#include "generic/VolumetricConvolution.c"
#include "THGenerateFloatTypes.h"
#include "generic/VolumetricConvolutionMM.c"
#include "THGenerateFloatTypes.h"
#include "generic/VolumetricFullConvolution.c"
#include "THGenerateFloatTypes.h"
#include "generic/VolumetricDilatedConvolution.c"
#include "THGenerateFloatTypes.h"
#include "generic/VolumetricMaxPooling.c"
#include "THGenerateFloatTypes.h"
#include "generic/VolumetricMaxUnpooling.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialReflectionPadding.c"
#include "THGenerateFloatTypes.h"
#include "generic/SpatialReplicationPadding.c"
#include "THGenerateFloatTypes.h"
#include "generic/VolumetricReplicationPadding.c"
#include "THGenerateFloatTypes.h"