pytorch/torch/lib/THNN/generic/SoftMax.c
Adam Paszke 035eb28e18 Add 'torch/lib/THNN/' from commit '4fe7059a315d156ecd080ff7bd5b4fe3d3a9efad'
git-subtree-dir: torch/lib/THNN
git-subtree-mainline: c3f0c1e2e0
git-subtree-split: 4fe7059a31
2016-08-04 10:58:50 -07:00

150 lines
3.4 KiB
C

#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SoftMax.c"
#else
void THNN_(SoftMax_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output)
{
real *input_data, *output_data;
long nframe = 0, dim = 0, stride = 0;
long t;
if (input->nDimension == 1)
{
nframe = 1;
dim = input->size[0];
stride = 1;
}
else if (input->nDimension == 2)
{
nframe = input->size[0];
dim = input->size[1];
stride = 1;
}
else if (input->nDimension == 3)
{
nframe = 1;
dim = input->size[0];
stride = input->size[1]*input->size[2];
}
else if (input->nDimension == 4)
{
nframe = input->size[0];
dim = input->size[1];
stride = input->size[2]*input->size[3];
}
else
{
THArgCheck(0, 2, "1D, 2D, 3D or 4D tensor expected");
}
input = THTensor_(newContiguous)(input);
THTensor_(resizeAs)(output, input);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
#pragma omp parallel for private(t)
for (t = 0; t < stride*nframe; t++)
{
real *input_ptr = input_data + (t/stride)*dim*stride + t % stride;
real *output_ptr = output_data + (t/stride)*dim*stride + t % stride;
real inputMax = -THInf;
accreal sum;
long d;
for (d = 0; d < dim; d++)
{
if (input_ptr[d*stride] >= inputMax) inputMax = input_ptr[d*stride];
}
sum = 0;
for (d = 0; d < dim; d++)
{
real z = exp(input_ptr[d*stride] - inputMax);
output_ptr[d*stride] = z;
sum += z;
}
for (d = 0; d < dim; d++)
{
output_ptr[d*stride] *= 1/sum;
}
}
THTensor_(free)(input);
}
void THNN_(SoftMax_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *output)
{
real *gradInput_data, *gradOutput_data, *output_data;
long nframe = 0, dim = 0, stride = 0;
long t;
if (output->nDimension == 1)
{
nframe = 1;
dim = output->size[0];
stride = 1;
}
else if (output->nDimension == 2)
{
nframe = output->size[0];
dim = output->size[1];
stride = 1;
}
else if (output->nDimension == 3)
{
nframe = 1;
dim = output->size[0];
stride = output->size[1]*output->size[2];
}
else if (output->nDimension == 4)
{
nframe = output->size[0];
dim = output->size[1];
stride = output->size[2]*output->size[3];
}
else
{
THError("1D, 2D, 3D or 4D tensor expected");
}
gradOutput = THTensor_(newContiguous)(gradOutput);
output = THTensor_(newContiguous)(output);
THTensor_(resizeAs)(gradInput, output);
gradInput_data = THTensor_(data)(gradInput);
output_data = THTensor_(data)(output);
gradOutput_data = THTensor_(data)(gradOutput);
#pragma omp parallel for private(t)
for (t = 0; t < stride*nframe; t++)
{
real *gradInput_ptr = gradInput_data + (t/stride)*dim*stride + t % stride;
real *output_ptr = output_data + (t/stride)*dim*stride + t % stride;
real *gradOutput_ptr = gradOutput_data + (t/stride)*dim*stride + t % stride;
long d;
accreal sum = 0;
for (d = 0; d < dim; d++)
sum += (accreal)gradOutput_ptr[d*stride] * output_ptr[d*stride];
for (d = 0; d < dim; d++)
gradInput_ptr[d*stride] = output_ptr[d*stride] * (gradOutput_ptr[d*stride] - sum);
}
THTensor_(free)(gradOutput);
THTensor_(free)(output);
}
#endif