remove old docs

This commit is contained in:
Soumith Chintala 2017-01-16 15:06:08 -05:00
parent 38967568ca
commit af110d37f2
43 changed files with 1 additions and 6210 deletions

View File

@ -50,7 +50,7 @@ Elaborating further:
If you use numpy, then you have used Tensors (a.k.a ndarray).
![tensor_illustration](docs/image/tensor_illustration.png)
![tensor_illustration](docs/source/_static/img/tensor_illustration.png)
PyTorch provides Tensors that can live either on the CPU or the GPU, and accelerate
compute by a huge amount.

View File

@ -1,534 +0,0 @@
#! /usr/bin/env python
# encoding: utf-8
"""
Very lightweight docstring to Markdown converter. Modified for use in pytorch
### License
Copyright © 2013 Thomas Gläßle <t_glaessle@gmx.de>
This work is free. You can redistribute it and/or modify it under the
terms of the Do What The Fuck You Want To Public License, Version 2, as
published by Sam Hocevar. See the COPYING file for more details.
This program is free software. It comes without any warranty, to the
extent permitted by applicable law.
### Description
Little convenience tool to extract docstrings from a module or class and
convert them to GitHub Flavoured Markdown:
https://help.github.com/articles/github-flavored-markdown
Its purpose is to quickly generate `README.md` files for small projects.
### API
The interface consists of the following functions:
- `doctrim(docstring)`
- `doc2md(docstring, title)`
You can run this script from the command line like:
$ doc2md.py [-a] [--no-toc] [-t title] module-name [class-name] > README.md
### Limitations
At the moment this is suited only for a very specific use case. It is
hardly forseeable, if I will decide to improve on it in the near future.
"""
import re
import sys
import inspect
__all__ = ['doctrim', 'doc2md']
doctrim = inspect.cleandoc
def unindent(lines):
"""
Remove common indentation from string.
Unlike doctrim there is no special treatment of the first line.
"""
try:
# Determine minimum indentation:
indent = min(len(line) - len(line.lstrip())
for line in lines if line)
except ValueError:
return lines
else:
return [line[indent:] for line in lines]
def escape_markdown(line):
line = line.replace('[', '\[').replace(']', '\]')
line = line.replace('(', '\(').replace(')', '\)')
line = line.replace('{', '\{').replace('}', '\}')
line = line.replace('\\', '\\\\')
line = line.replace('`', '\`')
line = line.replace('*', '\*')
line = line.replace('_', '\_')
line = line.replace('#', '\#')
line = line.replace('+', '\+')
line = line.replace('-', '\-')
line = line.replace('.', '\.')
line = line.replace('!', '\!')
return line
def code_block(lines, language=''):
"""
Mark the code segment for syntax highlighting.
"""
return ['```' + language] + lines + ['```']
def doctest2md(lines):
"""
Convert the given doctest to a syntax highlighted markdown segment.
"""
is_only_code = True
lines = unindent(lines)
for line in lines:
if not line.startswith('>>> ') and not line.startswith('... ') and line not in ['>>>', '...']:
is_only_code = False
break
if is_only_code:
orig = lines
lines = []
for line in orig:
lines.append(line[4:])
return lines
def doc_code_block(lines, language):
if language == 'python':
lines = doctest2md(lines)
return code_block(lines, language)
_args_section = re.compile('^\s*Args:\s*')
def is_args_check(line):
return _args_section.match(line)
def args_block(lines):
out = ['']
out += ['Parameter | Default | Description']
out += ['--------- | ------- | -----------']
for line in lines:
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
assert matches != None
name = matches[0][0]
description = matches[0][1]
default = matches[0][3]
out += [name + ' | ' + default + ' | ' + description]
return out
# Inputs
_inputs_section = re.compile('^\s*Inputs:\s*(.*)\s*')
def is_inputs_check(line):
return _inputs_section.match(line)
def inputs_block(lines):
out = ['']
out += ['Parameter | Default | Description']
out += ['--------- | ------- | -----------']
for line in lines:
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
assert matches != None
name = matches[0][0]
description = matches[0][1]
default = matches[0][3]
out += [name + ' | ' + default + ' | ' + description]
return out
# Outputs
_outputs_section = re.compile('^\s*Outputs:\s*(.*)\s*')
def is_outputs_check(line):
return _outputs_section.match(line)
def outputs_block(lines):
out = ['']
out += ['Parameter | Description']
out += ['--------- | -----------']
for line in lines:
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
assert matches != None
name = matches[0][0]
description = matches[0][1]
default = matches[0][3]
out += [name + ' | ' + description]
return out
# Members
_members_section = re.compile('^\s*Members:\s*(.*)\s*')
def is_members_check(line):
return _members_section.match(line)
def members_block(lines):
out = ['']
out += ['Parameter | Description']
out += ['--------- | -----------']
for line in lines:
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
assert matches != None
name = matches[0][0]
description = matches[0][1]
default = matches[0][3]
out += [name + ' | ' + description]
return out
_returns_section = re.compile('^\s*Returns:\s*')
def is_returns_check(line):
return _returns_section.match(line)
_image_section = re.compile('^\s*Image:\s*')
def is_image_check(line):
return _image_section.match(line)
_example_section = re.compile('^\s*Returns:\s*|^\s*Examples:\s*')
def is_example_check(line):
return _example_section.match(line)
_inputshape_section = re.compile('^\s*Returns:\s*|^\s*Input Shape:\s*')
def is_inputshape_check(line):
return _inputshape_section.match(line)
_outputshape_section = re.compile('^\s*Returns:\s*|^\s*Output Shape:\s*')
def is_outputshape_check(line):
return _outputshape_section.match(line)
###############################################
_reg_section = re.compile('^#+ ')
def is_heading(line):
return _reg_section.match(line)
def get_heading(line):
assert is_heading(line)
part = line.partition(' ')
return len(part[0]), part[2]
def make_heading(level, title):
return '#'*max(level, 1) + ' ' + title
def find_sections(lines):
"""
Find all section names and return a list with their names.
"""
sections = []
for line in lines:
if is_heading(line):
sections.append(get_heading(line))
return sections
def make_toc(sections):
"""
Generate table of contents for array of section names.
"""
if not sections:
return []
outer = min(n for n,t in sections)
refs = []
for ind,sec in sections:
ref = sec.lower()
ref = ref.replace(' ', '-')
ref = ref.replace('?', '')
refs.append(" "*(ind-outer) + "- [%s](#%s)" % (sec, ref))
return refs
def _doc2md(lines, shiftlevel=0):
_doc2md.md = []
_doc2md.is_code = False
_doc2md.is_code_block = False
_doc2md.is_args = False
_doc2md.is_inputs = False
_doc2md.is_outputs = False
_doc2md.is_members = False
_doc2md.is_returns = False
_doc2md.is_inputshape = False
_doc2md.is_outputshape = False
_doc2md.code = []
def reset():
if _doc2md.is_code:
_doc2md.is_code = False
_doc2md.code += doc_code_block(code, 'python')
_doc2md.code += ['']
if _doc2md.is_code_block:
_doc2md.is_code_block = False
_doc2md.code += doc_code_block(code_block, 'python')
_doc2md.code += ['']
if _doc2md.is_args:
_doc2md.is_args = False
_doc2md.md += args_block(args)
if _doc2md.is_inputs:
_doc2md.is_inputs = False
_doc2md.md += inputs_block(inputs)
if _doc2md.is_outputs:
_doc2md.is_outputs = False
_doc2md.md += outputs_block(outputs)
if _doc2md.is_members:
_doc2md.is_members = False
_doc2md.md += members_block(members)
if _doc2md.is_returns:
_doc2md.is_returns = False
_doc2md.md += returns
_doc2md.is_inputshape = False
_doc2md.is_outputshape = False
for line in lines:
trimmed = line.lstrip()
if is_args_check(line):
reset()
_doc2md.is_args = True
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Constructor Arguments']
args = []
elif is_inputs_check(line):
reset()
_doc2md.is_inputs = True
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Inputs']
inputs = []
elif is_outputs_check(line):
reset()
_doc2md.is_outputs = True
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Outputs']
outputs = []
elif is_members_check(line):
reset()
_doc2md.is_members = True
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Members']
members = []
elif is_returns_check(line):
reset()
_doc2md.is_returns = True
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Returns']
returns = []
elif is_example_check(line):
reset()
elif is_inputshape_check(line):
reset()
inputshape = re.findall(r'\s*Input\sShape:\s*(.*)\s*:\s*(.*)\s*$', line)[0]
elif is_outputshape_check(line):
reset()
outputshape = re.findall(r'\s*Output\sShape:\s*(.*)\s*:\s*(.*)\s*$', line)[0]
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Expected Shape']
_doc2md.md += [' | Shape | Description ']
_doc2md.md += ['------ | ----- | ------------']
_doc2md.md += [' input | ' + inputshape[0] + ' | ' + inputshape[1]]
_doc2md.md += ['output | ' + outputshape[0] + ' | ' + outputshape[1]]
elif is_image_check(line):
reset()
_doc2md.md += ['']
filename = re.findall(r'\s*Image:\s*(.*?)\s*$', line)
_doc2md.md += ['<img src="image/' + filename[0] + '" >']
elif _doc2md.is_code == False and trimmed.startswith('>>> '):
reset()
_doc2md.is_code = True
code = [line]
elif _doc2md.is_code_block == False and trimmed.startswith('```'):
reset()
_doc2md.is_code_block = True
code_block = []
elif _doc2md.is_code_block == True and trimmed.startswith('```'):
# end of code block
reset()
elif _doc2md.is_code_block:
if line:
code_block.append(line)
else:
reset()
elif shiftlevel != 0 and is_heading(line):
reset()
level, title = get_heading(line)
_doc2md.md += [make_heading(level + shiftlevel, title)]
elif _doc2md.is_args:
if line:
args.append(line)
else:
reset()
elif _doc2md.is_inputs:
if line:
inputs.append(line)
else:
reset()
elif _doc2md.is_outputs:
if line:
outputs.append(line)
else:
reset()
elif _doc2md.is_members:
if line:
members.append(line)
else:
reset()
elif _doc2md.is_returns:
if line:
returns.append(line)
else:
reset()
elif _doc2md.is_code:
if line:
code.append(line)
else:
reset()
else:
reset()
_doc2md.md += [line]
reset()
_doc2md.code += _doc2md.md
return _doc2md.code
def doc2md(docstr, title, min_level=3, more_info=False, toc=True):
"""
Convert a docstring to a markdown text.
"""
text = doctrim(docstr)
lines = text.split('\n')
sections = find_sections(lines)
if sections:
level = min(n for n,t in sections) - 1
else:
level = 1
shiftlevel = 0
if level < min_level:
shiftlevel = min_level - level
level = min_level
sections = [(lev+shiftlevel, tit) for lev,tit in sections]
md = [
make_heading(level, title),
"",
lines.pop(0),
""
]
if toc:
md += make_toc(sections)
md += _doc2md(lines, shiftlevel)
if more_info:
return (md, sections)
else:
return "\n".join(md)
def mod2md(module, title, title_api_section, toc=True):
"""
Generate markdown document from module, including API section.
"""
docstr = module.__doc__ or " "
text = doctrim(docstr)
lines = text.split('\n')
sections = find_sections(lines)
if sections:
level = min(n for n,t in sections) - 1
else:
level = 1
api_md = []
api_sec = []
if title_api_section :
# sections.append((level+1, title_api_section))
for name, entry in iter(module.__dict__.items()):
if name[0] != '_' and entry.__doc__:
#api_sec.append((level+1, name))
#api_md += ['', '']
if entry.__doc__:
md, sec = doc2md(entry.__doc__, name,
min_level=level+1, more_info=True, toc=False)
api_sec += sec
api_md += md
sections += api_sec
# headline
md = [
make_heading(level, title),
"",
lines.pop(0),
""
]
# main sections
if toc:
md += make_toc(sections)
md += _doc2md(lines)
if toc:
md += ['']
md += make_toc(api_sec)
md += api_md
return "\n".join(md)
def main(args=None):
# parse the program arguments
import argparse
parser = argparse.ArgumentParser(
description='Convert docstrings to markdown.')
parser.add_argument(
'module', help='The module containing the docstring.')
group = parser.add_mutually_exclusive_group()
group.add_argument(
'entry', nargs='?',
help='Convert only docstring of this entry in module.')
group.add_argument(
'-a', '--all', dest='all', action='store_true',
help='Create an API section with the contents of module.__all__.')
parser.add_argument(
'-t', '--title', dest='title',
help='Document title (default is module name)')
parser.add_argument(
'--no-toc', dest='toc', action='store_false', default=True,
help='Do not automatically generate the TOC')
args = parser.parse_args(args)
import importlib
import inspect
import os
def add_path(*pathes):
for path in reversed(pathes):
if path not in sys.path:
sys.path.insert(0, path)
file = inspect.getfile(inspect.currentframe())
add_path(os.path.realpath(os.path.abspath(os.path.dirname(file))))
add_path(os.getcwd())
mod_name = args.module
if mod_name.endswith('.py'):
mod_name = mod_name.rsplit('.py', 1)[0]
title = args.title or mod_name.replace('_', '-')
module = importlib.import_module(mod_name)
if args.all:
print(mod2md(module, title, 'API', toc=args.toc))
else:
if args.entry:
docstr = module.__dict__[args.entry].__doc__ or ''
else:
docstr = module.__doc__ or ''
print(doc2md(docstr, title, toc=args.toc))
if __name__ == "__main__":
main()

View File

@ -1,100 +0,0 @@
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
pushd $SCRIPT_DIR
# module
#python doc2md.py torch.nn Module --title Module --no-toc >../nn_module.md
# containers
echo "## Containers" > ../nn_container.md
python doc2md.py torch.nn Container --title Container --no-toc >>../nn_container.md
python doc2md.py torch.nn Sequential --title Sequential --no-toc >>../nn_container.md
# convolution
echo "## Convolution Layers" > ../nn_convolution.md
echo Conv1d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
echo Conv2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
echo ConvTranspose2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
echo Conv3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
echo ConvTranspose3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
# pooling
echo "## Pooling Layers" > ../nn_pooling.md
echo MaxPool1d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo MaxPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo MaxPool3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo MaxUnpool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo MaxUnpool3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo AvgPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo AvgPool3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo FractionalMaxPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo LPPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
# activations
echo "## Non-linearities" > ../nn_activation.md
echo ReLU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo ReLU6 | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Threshold | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Hardtanh | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Sigmoid | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Tanh | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo ELU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo LeakyReLU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo LogSigmoid | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softplus | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softshrink | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo PReLU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softsign | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Tanhshrink | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softmin | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softmax | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softmax2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo LogSoftmax | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
# normalization
echo "## Normalization layers" > ../nn_normalization.md
echo BatchNorm1d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_normalization.md
echo BatchNorm2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_normalization.md
echo BatchNorm3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_normalization.md
# recurrentnet
echo "## Recurrent layers" > ../nn_recurrent.md
echo RNN | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
echo LSTM | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
echo GRU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
echo RNNCell | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
echo LSTMCell | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
echo GRUCell | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
# linear
echo "## Linear layers" > ../nn_linear.md
echo Linear | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_linear.md
# dropout
echo "## Dropout layers" > ../nn_dropout.md
echo Dropout | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_dropout.md
echo Dropout2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_dropout.md
echo Dropout3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_dropout.md
# Sparse
echo "## Sparse layers" > ../nn_sparse.md
echo Embedding | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_sparse.md
# loss_functions
echo "## Loss functions" > ../nn_loss.md
echo L1Loss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo MSELoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo CrossEntropyLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo NLLLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo NLLLoss2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo KLDivLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo BCELoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo MarginRankingLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo HingeEmbeddingLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo MultiLabelMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo SmoothL1Loss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo SoftMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo MultiLabelSoftMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo CosineEmbeddingLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo MultiMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
popd

View File

@ -1,143 +0,0 @@
import sys
from tools.cwrap import cwrap
from tools.cwrap.plugins import CWrapPlugin
from string import Template
import sys
import torch
from torch.autograd import Variable
def transform_defined_if(defined_if):
if defined_if != None:
defined_if = defined_if.replace('defined(TH_REAL_IS_FLOAT)', 'Float')
defined_if = defined_if.replace('defined(TH_REAL_IS_DOUBLE)', 'Double')
defined_if = defined_if.replace('defined(TH_REAL_IS_BYTE)', 'Byte')
defined_if = defined_if.replace('defined(TH_REAL_IS_CHAR)', 'Char')
defined_if = defined_if.replace('defined(TH_REAL_IS_INT)', 'Int')
defined_if = defined_if.replace('defined(TH_REAL_IS_LONG)', 'Long')
defined_if = defined_if.replace('defined(NUMPY_TYPE_ENUM)',
'Byte // Short // Int // Long // Float // Double')
defined_if = defined_if.replace('CUDA_INT', 'Cuda_Int')
defined_if = defined_if.replace('CUDA_LONG', 'Cuda_Long')
defined_if = defined_if.replace('CUDA_FLOAT', 'Cuda_Float')
defined_if = defined_if.replace('CUDA_DOUBLE', 'Cuda_Double')
defined_if = defined_if.replace('CUDA_HALF', 'Cuda_Half')
defined_if = defined_if.replace('!IS_CUDA', 'All CPU Types')
else:
defined_if = "All Types (CPU and CUDA)"
defined_if = defined_if.replace('||', '//')
return defined_if
class DocGen(CWrapPlugin):
def __init__(self):
self.declarations = {}
def process_declarations(self, declarations):
self.declarations.update({declaration['name']: declaration for declaration in declarations})
# self.declarations += declarations
return declarations
def get_wrapper_template(self, declaration):
return Template("")
def get_type_check(self, arg, option):
return Template("")
def get_type_unpack(self, arg, option):
return Template("")
def get_return_wrapper(self, option):
return Template("")
def print_declarations(self):
print("# torch.Tensor")
for name, declarations in sorted(self.declarations.items()):
if name.endswith('_') and name[:-1] in self.declarations:
continue
if not name.endswith('_') and name + '_' in self.declarations:
inplace = True
else:
inplace = False
pname = declarations['options'][0].get('python_name', None)
if pname != None:
name = pname
if name.startswith('_'):
continue
# START PRINTING MARKDOWN
print("## " + name + " \n")
print("| %-25s | %-8s | %-25s |" % ("Name", "Autograd", "defined if"))
print("| " + ('-' * 28) + " | " + ('-' * 11) + " | "+ ('-' * 28) + " |")
if inplace:
sys.stdout.write("| %-25s" % (name + ' // ' + name + "_"))
else:
sys.stdout.write("| %-25s" % name)
sys.stdout.write(' | ')
if hasattr(Variable(torch.randn(10)), name):
sys.stdout.write(' %9s ' % 'yes') # + ' ' + name)
else:
sys.stdout.write(' %9s ' % 'no') # + ' ' + name)
defined_if = declarations.get('defined_if', None)
defined_if = transform_defined_if(defined_if)
sys.stdout.write(' | ')
sys.stdout.write(defined_if)
sys.stdout.write(' |')
sys.stdout.write('\n\n')
#if inplace:
# print('Inplace Exists : True')
#sys.stdout.write('Arguments : ')
args = declarations['options'][0]['arguments']
if len(args) == 0:
print( '**No Arguments**\n' )
else:
print( '**Arguments**\n' )
print("| %-15s | %-12s | %-15s |" % ("Name", "Type", "Default"))
print("| " + ('-' * 18) + " | " + ('-' * 15) + " | "+ ('-' * 18) + " |")
for arg in args:
type_ = arg['type']
if type_ == 'THGenerator*':
continue
if type_ == 'THTensor*':
type_ = 'Tensor'
if type_ == 'THIndexTensor*':
type_ = 'LongTensor'
if type_ == 'THBoolTensor*':
type_ = 'ByteTensor'
if type_ == 'THLongTensor*':
type_ = 'LongTensor'
if type_ == 'THLongStorage*':
type_ = 'LongStorage'
default = arg.get('default', None)
allocated = arg.get('allocate', None)
if default == None and allocated == None:
default = " [required]"
elif allocated != None:
default = " [optional]"
else:
default = str(default)
import re
m = re.search('\s*AS_REAL\((.+)\)\s*', default)
if m:
default = m.group(1)
default = default
print('| %15s | %12s | %10s |' % (arg['name'], type_, default))
# print( 'Options : ' )
# print(declarations['options'][0])
print('')
if declarations['return']:
return_ = declarations['return']
if return_ == 'THTensor*':
return_ = 'Tensor'
if return_ == 'void':
return_ = 'nothing'
print( '**Returns : ' + return_ + '**')
print('')
docs = DocGen()
cwrap('../../torch/csrc/generic/TensorMethods.cwrap', plugins=[docs])
docs.print_declarations()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.2 KiB

View File

@ -1,3 +0,0 @@
# torch.nn
Neural Networks in PyTorch

View File

@ -1,496 +0,0 @@
## Non-linearities
### ReLU
Applies the rectified linear unit function element-wise ReLU(x)= max(0,x)
```python
m = nn.ReLU()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/relu.png" >
### ReLU6
Applies the element-wise function ReLU6(x) = min( max(0,x), 6)
```python
m = nn.ReLU6()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/relu6.png" >
### Threshold
Thresholds each element of the input Tensor
```python
m = nn.Threshold(0.1, 20)
input = Variable(torch.randn(2))
print(input)
print(m(input))
```
Threshold is defined as:
y = x if x >= threshold
value if x < threshold
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
threshold | | The value to threshold at
value | | The value to replace with
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
Tensor of same dimension and shape as the input
### Hardtanh
Applies the HardTanh function element-wise
```python
m = nn.HardTanh(-2, 2)
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
HardTanh is defined as:
f(x) = +1, if x > 1
f(x) = -1, if x < -1
f(x) = x, otherwise
The range of the linear region [-1, 1] can be adjusted
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
min_value | | minimum value of the linear region range
max_value | | maximum value of the linear region range
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/htanh.png" >
### Sigmoid
Applies the element-wise function sigmoid(x) = 1 / ( 1 + exp(-x))
```python
m = nn.Sigmoid()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/sigmoid.png" >
### Tanh
Applies element-wise, Tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
```python
m = nn.Tanh()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/tanh.png" >
### ELU
Applies element-wise, ELU(x) = max(0,x) + min(0, alpha * (exp(x) - 1))
```python
m = nn.ELU()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
alpha | 1.0 | the alpha value for the ELU formulation.
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/elu.png" >
### LeakyReLU
Applies element-wise, f(x) = max(0, x) + negative_slope * min(0, x)
```python
m = nn.LeakyReLU(0.1)
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
negative_slope | 1e-2 | Controls the angle of the negative slope.
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
### LogSigmoid
Applies element-wise LogSigmoid(x) = log( 1 / (1 + exp(-x_i)))
```python
m = nn.LogSigmoid()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/logsigmoid.png" >
### Softplus
Applies element-wise SoftPlus(x) = 1/beta * log(1 + exp(beta * x_i))
```python
m = nn.Softplus()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
SoftPlus is a smooth approximation to the ReLU function and can be used
to constrain the output of a machine to always be positive.
For numerical stability the implementation reverts to the linear function
for inputs above a certain value.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
beta | 1 | the beta value for the Softplus formulation.
threshold | 20 | values above this revert to a linear function.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/softplus.png" >
### Softshrink
Applies the soft shrinkage function elementwise
```python
m = nn.Softshrink()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
SoftShrinkage operator is defined as:
f(x) = x-lambda, if x > lambda > f(x) = x+lambda, if x < -lambda
f(x) = 0, otherwise
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
lambd | 0.5 | the lambda value for the Softshrink formulation.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/sshrink.png" >
### PReLU
Applies element-wise the function PReLU(x) = max(0,x) + a * min(0,x)
```python
m = nn.PReLU()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
Here "a" is a learnable parameter.
When called without arguments, nn.PReLU() uses a single parameter "a"
across all input channels. If called with nn.PReLU(nChannels), a separate
"a" is used for each input channel.
Note that weight decay should not be used when learning "a" for good
performance.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_parameters | 1 | number of "a" to learn.
init | 0.25 | the initial value of "a".
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/prelu.png" >
### Softsign
Applies element-wise, the function Softsign(x) = x / (1 + |x|)
```python
m = nn.Softsign()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/softsign.png" >
### Tanhshrink
Applies element-wise, Tanhshrink(x) = x - Tanh(x)
```python
m = nn.Tanhshrink()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
### Softmin
Applies the Softmin function to an n-dimensional input Tensor
```python
m = nn.Softmin()
input = autograd.Variable(torch.randn(2, 3))
print(input)
print(m(input))
```
rescaling them so that the elements of the n-dimensional output Tensor
lie in the range (0,1) and sum to 1
Softmin(x) = exp(-x_i - shift) / sum_j exp(-x_j - shift)
where shift = max_i - x_i
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * ] | 2D Tensor of any size
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input, with
values in the range [0, 1]
<img src="image/softmin.png" >
### Softmax
Applies the Softmax function to an n-dimensional input Tensor
```python
m = nn.Softmax()
input = autograd.Variable(torch.randn(2, 3))
print(input)
print(m(input))
```
rescaling them so that the elements of the n-dimensional output Tensor
lie in the range (0,1) and sum to 1
Softmax is defined as f_i(x) = exp(x_i - shift) / sum_j exp(x_j - shift)
where shift = max_i x_i
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * ] | 2D Tensor of any size
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input with
values in the range [0, 1]
<img src="image/softmax.png" >
Notes:
Note that this module doesn't work directly with NLLLoss,
which expects the Log to be computed between the Softmax and itself.
Use Logsoftmax instead (it's faster).
### Softmax2d
Applies SoftMax over features to each spatial location
```python
m = nn.Softmax2d()
# you softmax over the 2nd dimension
input = autograd.Variable(torch.randn(2, 3, 12, 13))
print(input)
print(m(input))
```
When given an image of Channels x Height x Width, it will
apply Softmax to each location [Channels, h_i, w_j]
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , * , * ] | 4D Tensor of any size
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input with
values in the range [0, 1]
### LogSoftmax
Applies the Log(Softmax(x)) function to an n-dimensional input Tensor.
```python
m = nn.LogSoftmax()
input = autograd.Variable(torch.randn(2, 3))
print(input)
print(m(input))
```
The LogSoftmax formulation can be simplified as
f_i(x) = log(1 / a * exp(x_i)) where a = sum_j exp(x_j) .
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * ] | 2D Tensor of any size
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input with
values in the range [-inf, 0)
<img src="image/logsoftmax.png" >

View File

@ -1,136 +0,0 @@
## Containers
### Container
This is the base container class for all neural networks you would define.
```python
# Example of using Container
class Net(nn.Container):
def __init__(self):
super(Net, self).__init__(
conv1 = nn.Conv2d(1, 20, 5),
relu = nn.ReLU()
)
def forward(self, input):
output = self.relu(self.conv1(x))
return output
model = Net()
```
```python
# one can add modules to the container after construction
model.add_module('pool1', nn.MaxPool2d(2, 2))
```
```python
```
```python
# .parameters()
```
```python
>>> for param in model.parameters():
>>> print(type(param.data), param.size())
<class 'torch.FloatTensor'> (20L,)
<class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
```
```python
```
```python
# .state_dict()
```
```python
>>> pdict = model.state_dict()
>>> print(sdict.keys())
['conv1.bias', 'conv1.weight']
```
```python
```
You will subclass your container from this class.
In the constructor you define the modules that you would want to use,
and in the "forward" function you use the constructed modules in
your operations.
To make it easier to understand, given is a small example.
One can also add new modules to a container after construction.
You can do this with the add_module function
or by assigning them as Container attributes.
#### one can also set modules as attributes of the container
model.conv1 = nn.Conv2d(12, 24, 3)
The container has some important additional methods:
**`[generator] parameters()`**
returns a generator over all learnable parameters in the container instance.
This can typically be passed to the optimizer API
**`[dict] state_dict()`**
returns a dictionary of learnable parameters of the Container.
For example: ['conv1.weight' : Parameter(torch.FloatTensor(20x1x5x5)),
'conv1.bias' : Parameter(torch.FloatTensor(20)),
]
**`load_state_dict(dict)`**
Given a parameter dict, sets the parameters of self to be the given dict.
It loads loads the parameters recursively.
Excessive or non-matching parameter names are ignored.
For example, the input dict has an entry 'conv44.weight', but
if the container does not have a module named 'conv44', then this entry is ignored.
**`children()`**
Returns a generator over all the children modules of self
**`train()`**
Sets the Container (and all it's child modules) to training mode (for modules such as batchnorm, dropout etc.)
**`eval()`**
Sets the Container (and all it's child modules) to evaluate mode (for modules such as batchnorm, dropout etc.)
**`apply(closure)`**
Applies the given closure to each parameter of the container.
**__Note: Apart from these, the container will define the base functions that it has derived from nn.Module __**
### Sequential
A sequential Container. It is derived from the base nn.Container class
```python
# Example of using Sequential
model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
```
```python
```
Modules will be added to it in the order they are passed in the constructor.
Alternatively, an ordered dict of modules can also be passed in.
To make it easier to understand, given is a small example.
#### Example of using Sequential with OrderedDict
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))

View File

@ -1,236 +0,0 @@
## Convolution Layers
### Conv1d
Applies a 1D convolution over an input signal composed of several input
```python
The output value of the layer with input (b x iC x W) and output (b x oC x oW)
can be precisely described as:
output[b_i][oc_i][w_i] = bias[oc_i]
+ sum_iC sum_{ow = 0, oW-1} sum_{kw = 0 to kW-1}
weight[oc_i][ic_i][kw] * input[b_i][ic_i][stride_w * ow + kw)]
```
```python
m = nn.Conv1d(16, 33, 3, stride=2)
input = autograd.Variable(torch.randn(20, 16, 50))
output = m(input)
```
planes.
Note that depending of the size of your kernel, several (of the last)
columns of the input might be lost. It is up to the user
to add proper padding.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_channels | | The number of expected input channels in the image given as input
out_channels | | The number of output channels the convolution layer will produce
kernel_size | | the size of the convolving kernel.
stride | | the stride of the convolving kernel.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , in_channels , * ] | Input is minibatch x in_channels x iW
output | [ * , out_channels , * ] | Output shape is precisely minibatch x out_channels x floor((iW + 2*padW - kW) / dW + 1)
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (out_channels x in_channels x kW)
bias | the learnable bias of the module of shape (out_channels)
### Conv2d
Applies a 2D convolution over an input image composed of several input
```python
The output value of the layer with input (b x iC x H x W) and output (b x oC x oH x oW)
can be precisely described as:
output[b_i][oc_i][h_i][w_i] = bias[oc_i]
+ sum_iC sum_{oh = 0, oH-1} sum_{ow = 0, oW-1} sum_{kh = 0 to kH-1} sum_{kw = 0 to kW-1}
weight[oc_i][ic_i][kh][kw] * input[b_i][ic_i][stride_h * oh + kh)][stride_w * ow + kw)]
```
```python
# With square kernels and equal stride
m = nn.Conv2d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
# non-square kernels and unequal stride and with padding and dilation
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
input = autograd.Variable(torch.randn(20, 16, 50, 100))
output = m(input)
```
planes.
Note that depending of the size of your kernel, several (of the last)
columns or rows of the input image might be lost. It is up to the user
to add proper padding in images.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_channels | | The number of expected input channels in the image given as input
out_channels | | The number of output channels the convolution layer will produce
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | 1 | the stride of the convolving kernel. Can be a single number s or a tuple (sh x sw).
padding | 0 | implicit zero padding on the input. Can be a single number s or a tuple.
dilation | None | If given, will do dilated (or atrous) convolutions. Can be a single number s or a tuple.
bias | True | If set to False, the layer will not learn an additive bias.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , in_channels , * , * ] | Input is minibatch x in_channels x iH x iW
output | [ * , out_channels , * , * ] | Output shape is precisely minibatch x out_channels x floor((iH + 2*padH - kH) / dH + 1) x floor((iW + 2*padW - kW) / dW + 1)
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (out_channels x in_channels x kH x kW)
bias | the learnable bias of the module of shape (out_channels)
### ConvTranspose2d
Applies a 2D deconvolution operator over an input image composed of several input
```python
# With square kernels and equal stride
m = nn.ConvTranspose2d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
input = autograd.Variable(torch.randn(20, 16, 50, 100))
output = m(input)
# exact output size can be also specified as an argument
input = autograd.Variable(torch.randn(1, 16, 12, 12))
downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
upsample = nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
h = downsample(input)
output = upsample(h, output_size=input.size())
```
planes.
The deconvolution operator multiplies each input value element-wise by a learnable kernel,
and sums over the outputs from all input feature planes.
This module can be seen as the exact reverse of the Conv2d module.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_channels | | The number of expected input channels in the image given as input
out_channels | | The number of output channels the convolution layer will produce
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | 1 | the stride of the convolving kernel. Can be a single number or a tuple (sh x sw).
padding | 0 | implicit zero padding on the input. Can be a single number or a tuple.
output_padding | 0 | A zero-padding of 0 <= padding < stride that should be added to the output. Can be a single number or a tuple.
bias | True | If set to False, the layer will not learn an additive bias.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , in_channels , * , * ] | Input is minibatch x in_channels x iH x iW
output | [ * , out_channels , * , * ] | Output shape is minibatch x out_channels x (iH - 1) * sH - 2*padH + kH + output_paddingH x (iW - 1) * sW - 2*padW + kW, or as specified in a second argument to the call.
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (in_channels x out_channels x kH x kW)
bias | the learnable bias of the module of shape (out_channels)
### Conv3d
Applies a 3D convolution over an input image composed of several input
```python
# With square kernels and equal stride
m = nn.Conv3d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.Conv3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(4, 2, 0))
input = autograd.Variable(torch.randn(20, 16, 10, 50, 100))
output = m(input)
```
planes.
Note that depending of the size of your kernel, several (of the last)
columns or rows of the input image might be lost. It is up to the user
to add proper padding in images.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_channels | | The number of expected input channels in the image given as input
out_channels | | The number of output channels the convolution layer will produce
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
stride | 1 | the stride of the convolving kernel. Can be a single number s or a tuple (kt x sh x sw).
padding | 0 | implicit zero padding on the input. Can be a single number s or a tuple.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , in_channels , * , * , * ] | Input is minibatch x in_channels x iT x iH x iW
output | [ * , out_channels , * , * , * ] | Output shape is precisely minibatch x out_channels x floor((iT + 2*padT - kT) / dT + 1) x floor((iH + 2*padH - kH) / dH + 1) x floor((iW + 2*padW - kW) / dW + 1)
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (out_channels x in_channels x kT x kH x kW)
bias | the learnable bias of the module of shape (out_channels)
### ConvTranspose3d
Applies a 3D deconvolution operator over an input image composed of several input
```python
# With square kernels and equal stride
m = nn.ConvTranspose3d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.Conv3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(0, 4, 2))
input = autograd.Variable(torch.randn(20, 16, 10, 50, 100))
output = m(input)
```
planes.
The deconvolution operator multiplies each input value element-wise by a learnable kernel,
and sums over the outputs from all input feature planes.
This module can be seen as the exact reverse of the Conv3d module.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_channels | | The number of expected input channels in the image given as input
out_channels | | The number of output channels the convolution layer will produce
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
stride | 1 | the stride of the convolving kernel. Can be a single number or a tuple (st x sh x sw).
padding | 0 | implicit zero padding on the input. Can be a single number or a tuple.
output_padding | 0 | A zero-padding of 0 <= padding < stride that should be added to the output. Can be a single number or a tuple.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , in_channels , * , * , * ] | Input is minibatch x in_channels x iH x iW
output | [ * , out_channels , * , * , * ] | Output shape is precisely minibatch x out_channels x (iT - 1) * sT - 2*padT + kT + output_paddingT x (iH - 1) * sH - 2*padH + kH + output_paddingH x (iW - 1) * sW - 2*padW + kW
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (in_channels x out_channels x kT x kH x kW)
bias | the learnable bias of the module of shape (out_channels)

View File

@ -1,233 +0,0 @@
# Module
This is the base class for all Modules defined in the nn package.
```python
# .parameters()
```
```python
>>> for param in model.parameters():
>>> print(type(param.data), param.size())
<class 'torch.FloatTensor'> (20L,)
<class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
```
```python
```
```python
# .state_dict()
```
```python
>>> pdict = model.state_dict()
>>> print(pdict.keys())
['bias', 'weight']
```
```python
```
Even the Container class derives from this class.
An nn.Module has the following interface:
**Constructor:**
nn.Module(**parameters)
All arguments passed in to the constructor need to be of type
nn.Parameter or a Tensor.
**forward(...)**
This is the function that one defines when subclassing to create
their own modules.
It takes in inputs and returns outputs.
**__call__(...)**
This calls the forward function, as well as the hooks
**register_buffer(name, tensor)**
This is typically used to register a buffer that is not a Parameter.
For example, in BatchNorm, the running_mean is a buffer, so one would
register it in the constructor of BatchNorm with:
`self.register_buffer('running_mean', torch.zeros(num_features))`
The registered buffers can simply be accessed as class members
when needed.
**cpu()**
Recursively moves all it's parameters and buffers to the CPU
**cuda(device_id=None)**
Recursively moves all it's parameters and buffers to the CUDA memory.
If device_id is given, moves it to GPU number device_id
**float()**
Typecasts the parameters and buffers to float
**double()**
Typecasts the parameters and buffers to double
**register_forward_hook(name, hook)**
This will register a user-defined closure on the module.
Whenever the module finishes it's forward operation,
the user closure is called.
The signature of the closure is `def closure(input, output)`
**register_backward_hook(name, hook)**
This will register a user-defined closure on the module.
Whenever the module finishes it's backward operation,
the user closure is called.
The signature of the closure is `def closure(gradOutput, gradInput)`
**remove_forward_hook(name)**
Removes a registered forward hook with the given name
**remove_backward_hook(name)**
Removes a registered backward hook with the given name
**`[generator] parameters()`**
returns a generator over all learnable parameters in the container instance.
This can typically be passed to the optimizer API
**`[dict] state_dict()`**
returns a dictionary of learnable parameters of the Module.
For example: ['weight' : Parameter(torch.FloatTensor(20x1x5x5)),
'bias' : Parameter(torch.FloatTensor(20)),
]
**`load_state_dict(dict)`**
Given a parameter dict, sets the parameters of self to be the given dict.
**`train()`**
Sets the Container to training mode (for modules such as batchnorm, dropout etc.)
**`eval()`**
Sets the Container to evaluate mode (for modules such as batchnorm, dropout etc.)
**`zero_grad()`**
Zeroes the gradients of each Parameter of the module
# Container
This is the base container class for all neural networks you would define.
```python
# Example of using Container
class Net(nn.Container):
def __init__(self):
super(Net, self).__init__(
conv1 = nn.Conv2d(1, 20, 5),
relu = nn.ReLU()
)
def forward(self, input):
output = self.relu(self.conv1(x))
return output
model = Net()
```
```python
# one can add modules to the container after construction
model.add_module('pool1', nn.MaxPool2d(2, 2))
```
```python
```
```python
# .parameters()
```
```python
>>> for param in model.parameters():
>>> print(type(param.data), param.size())
<class 'torch.FloatTensor'> (20L,)
<class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
```
```python
```
```python
# .state_dict()
```
```python
>>> pdict = model.state_dict()
>>> print(pdict.keys())
['conv1.bias', 'conv1.weight']
```
```python
```
You will subclass your container from this class.
In the constructor you define the modules that you would want to use,
and in the "forward" function you use the constructed modules in
your operations.
To make it easier to understand, given is a small example.
One can also add new modules to a container after construction.
You can do this with the add_module function
or by assigning them as Container attributes.
## one can also set modules as attributes of the container
model.conv1 = nn.Conv2d(12, 24, 3)
The container has some important additional methods:
**`[generator] parameters()`**
returns a generator over all learnable parameters in the container instance.
This can typically be passed to the optimizer API
**`[dict] state_dict()`**
returns a dictionary of learnable parameters of the Container.
For example: ['conv1.weight' : Parameter(torch.FloatTensor(20x1x5x5)),
'conv1.bias' : Parameter(torch.FloatTensor(20)),
]
**`load_state_dict(dict)`**
Given a parameter dict, sets the parameters of self to be the given dict.
It loads loads the parameters recursively.
Excessive or non-matching parameter names are ignored.
For example, the input dict has an entry 'conv44.weight', but
if the container does not have a module named 'conv44', then this entry is ignored.
**`children()`**
Returns a generator over all the children modules of self
**`train()`**
Sets the Container (and all it's child modules) to training mode (for modules such as batchnorm, dropout etc.)
**`eval()`**
Sets the Container (and all it's child modules) to evaluate mode (for modules such as batchnorm, dropout etc.)
**`apply(closure)`**
Applies the given closure to each parameter of the container.
**__Note: Apart from these, the container will define the base functions that it has derived from nn.Module __**

View File

@ -1,90 +0,0 @@
## Dropout layers
### Dropout
Randomly zeroes some of the elements of the input tensor.
```python
m = nn.Dropout(p=0.2)
input = autograd.Variable(torch.randn(20, 16))
output = m(input)
```
The elements to zero are randomized on every forward call.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
p | 0.5 | probability of an element to be zeroed.
inplace | false | If set to True, will do this operation in-place.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Input can be of any shape
output | Same | Output is of the same shape as input
### Dropout2d
Randomly zeroes whole channels of the input tensor.
```python
m = nn.Dropout2d(p=0.2)
input = autograd.Variable(torch.randn(20, 16, 32, 32))
output = m(input)
```
The input is 4D (batch x channels, height, width) and each channel
is of size (1, height, width).
The channels to zero are randomized on every forward call.
Usually the input comes from Conv2d modules.
As described in the paper &quot;Efficient Object Localization Using Convolutional
Networks&quot; (http:arxiv.org/abs/1411.4280), if adjacent pixels within
feature maps are strongly correlated (as is normally the case in early
convolution layers) then iid dropout will not regularize the activations
and will otherwise just result in an effective learning rate decrease.
In this case, nn.Dropout2d will help promote independence between
feature maps and should be used instead.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
p | 0.5 | probability of an element to be zeroed.
inplace | false | If set to True, will do this operation in-place.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [*, *, *, *] | Input can be of any sizes of 4D shape
output | Same | Output is of the same shape as input
### Dropout3d
Randomly zeroes whole channels of the input tensor.
```python
m = nn.Dropout3d(p=0.2)
input = autograd.Variable(torch.randn(20, 16, 4, 32, 32))
output = m(input)
```
The input is 5D (batch x channels, depth, height, width) and each channel
is of size (1, depth, height, width).
The channels to zero are randomized on every forward call.
Usually the input comes from Conv3d modules.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
p | 0.5 | probability of an element to be zeroed.
inplace | false | If set to True, will do this operation in-place.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [*, *, *, *, *] | Input can be of any sizes of 5D shape
output | Same | Output is of the same shape as input

View File

@ -1,36 +0,0 @@
## Linear layers
### Linear
Applies a linear transformation to the incoming data, y = Ax + b
```python
m = nn.Linear(20, 30)
input = autograd.Variable(torch.randn(128, 20))
output = m(input)
print(output.size())
```
The input is a 2D mini-batch of samples, each of size in_features
The output will be a 2D Tensor of size mini-batch x out_features
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_features | | size of each input sample
out_features | | size of each output sample
bias | True | If set to False, the layer will not learn an additive bias.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [*, in_features] | Input can be of shape minibatch x in_features
output | [*, out_features] | Output is of shape minibatch x out_features
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (out_features x in_features)
bias | the learnable bias of the module of shape (out_features)

View File

@ -1,295 +0,0 @@
## Loss functions
### L1Loss
Creates a criterion that measures the mean absolute value of the
element-wise difference between input `x` and target `y`:
loss(x, y) = 1/n \sum |x_i - y_i|
`x` and `y` arbitrary shapes with a total of `n` elements each
the sum operation still operates over all the elements, and divides by `n`.
The division by `n` can be avoided if one sets the internal
variable `sizeAverage` to `False`
### MSELoss
Creates a criterion that measures the mean squared error between
`n` elements in the input `x` and target `y`:
loss(x, y) = 1/n \sum |x_i - y_i|^2
`x` and `y` arbitrary shapes with a total of `n` elements each
the sum operation still operates over all the elements, and divides by `n`.
The division by `n` can be avoided if one sets the internal variable
`sizeAverage` to `False`
By default, the losses are averaged over observations for each minibatch.
However, if the field `sizeAverage = False`, the losses are instead summed.
### CrossEntropyLoss
This criterion combines `LogSoftMax` and `ClassNLLLoss` in one single class.
It is useful when training a classification problem with `n` classes.
If provided, the optional argument `weights` should be a 1D `Tensor`
assigning weight to each of the classes.
This is particularly useful when you have an unbalanced training set.
The `input` is expected to contain scores for each class:
`input` has to be a 2D `Tensor` of size `batch x n`.
This criterion expects a class index (0 to nClasses-1) as the
`target` for each value of a 1D tensor of size `n`
The loss can be described as:
loss(x, class) = -log(exp(x[class]) / (\sum_j exp(x[j])))
= -x[class] + log(\sum_j exp(x[j]))
or in the case of the `weights` argument being specified:
loss(x, class) = weights[class] * (-x[class] + log(\sum_j exp(x[j])))
The losses are averaged across observations for each minibatch.
### NLLLoss
The negative log likelihood loss. It is useful to train a classication problem with n classes
```python
m = nn.LogSoftmax()
loss = nn.NLLLoss()
# input is of size nBatch x nClasses = 3 x 5
input = autograd.Variable(torch.randn(3, 5))
# each element in target has to have 0 <= value < nclasses
target = autograd.Variable(torch.LongTensor([1, 0, 4]))
output = loss(m(input), target)
output.backward()
```
If provided, the optional argument `weights` should be a 1D Tensor assigning
weight to each of the classes.
This is particularly useful when you have an unbalanced training set.
The input given through a forward call is expected to contain log-probabilities
of each class: input has to be a 2D Tensor of size minibatch x n
Obtaining log-probabilities in a neural network is easily achieved by
adding a `LogSoftmax` layer in the last layer.
You may use `CrossEntropyLoss` instead, if you prefer not to
add an extra layer.
The target that this loss expects is a class index (1 to the number of class)
The loss can be described as:
loss(x, class) = -x[class]
or in the case of the weights argument it is specified as follows:
loss(x, class) = -weights[class] * x[class]
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
weight | None | a manual rescaling weight given to each class. If given, has to be a Tensor of size "nclasses".
size_average | True | By default, the losses are averaged over observations for each minibatch. However, if the field sizeAverage is set to False, the losses are instead summed for each minibatch.
Target Shape: [ * ] : Targets of size [minibatch], each value has to be 1 <= targets[i] <= nClasses
#### Members
Parameter | Description
--------- | -----------
weight | the class-weights given as input to the constructor
### NLLLoss2d
This is negative log likehood loss, but for image inputs. It computes NLL loss per-pixel.
```python
m = nn.Conv2d(16, 32, (3, 3)).float()
loss = nn.NLLLoss2d()
# input is of size nBatch x nClasses x height x width
input = autograd.Variable(torch.randn(3, 16, 10, 10))
# each element in target has to have 0 <= value < nclasses
target = autograd.Variable(torch.LongTensor(3, 8, 8).random_(0, 4))
output = loss(m(input), target)
output.backward()
```
This loss does not support per-class weights
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
size_average | True | By default, the losses are averaged over observations for each minibatch. However, if the field sizeAverage is set to False, the losses are instead summed for each minibatch.
Target Shape: [ * , *, *] : Targets of size minibatch x height x width, each value has to be 1 <= targets[i] <= nClasses
### KLDivLoss
The [Kullback-Leibler divergence](http://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence) Loss
KL divergence is a useful distance measure for continuous distributions
and is often useful when performing direct regression over the space of
(discretely sampled) continuous output distributions.
As with ClassNLLLoss, the `input` given is expected to contain
_log-probabilities_, however unlike ClassNLLLoss, `input` is not
restricted to a 2D Tensor, because the criterion is applied element-wise.
This criterion expects a `target` `Tensor` of the same size as the
`input` `Tensor`.
The loss can be described as:
loss(x, target) = 1/n \sum(target_i * (log(target_i) - x_i))
By default, the losses are averaged for each minibatch over observations
*as well as* over dimensions. However, if the field
`sizeAverage` is set to `False`, the losses are instead summed.
### BCELoss
Creates a criterion that measures the Binary Cross Entropy
between the target and the output:
loss(o, t) = - 1/n sum_i (t[i] * log(o[i]) + (1 - t[i]) * log(1 - o[i]))
or in the case of the weights argument being specified:
loss(o, t) = - 1/n sum_i weights[i] * (t[i] * log(o[i]) + (1 - t[i]) * log(1 - o[i]))
This is used for measuring the error of a reconstruction in for example
an auto-encoder. Note that the targets `t[i]` should be numbers between 0 and 1,
for instance, the output of an `nn.Sigmoid` layer.
By default, the losses are averaged for each minibatch over observations
*as well as* over dimensions. However, if the field `sizeAverage` is set
to `False`, the losses are instead summed.
### MarginRankingLoss
Creates a criterion that measures the loss given
inputs `x1`, `x2`, two 1D min-batch `Tensor`s,
and a label 1D mini-batch tensor `y` with values (`1` or `-1`).
If `y == 1` then it assumed the first input should be ranked higher
(have a larger value) than the second input, and vice-versa for `y == -1`.
The loss function for each sample in the mini-batch is:
loss(x, y) = max(0, -y * (x1 - x2) + margin)
if the internal variable `sizeAverage = True`,
the loss function averages the loss over the batch samples;
if `sizeAverage = False`, then the loss function sums over the batch samples.
By default, `sizeAverage` equals to `True`.
### HingeEmbeddingLoss
Measures the loss given an input `x` which is a 2D mini-batch tensor
and a labels `y`, a 1D tensor containg values (`1` or `-1`).
This is usually used for measuring whether two inputs are similar or dissimilar,
e.g. using the L1 pairwise distance, and is typically used for learning
nonlinear embeddings or semi-supervised learning.
{ x_i, if y_i == 1
loss(x, y) = 1/n {
{ max(0, margin - x_i), if y_i == -1
`x` and `y` arbitrary shapes with a total of `n` elements each
the sum operation still operates over all the elements, and divides by `n`.
(the division by `n` can be avoided if one sets the internal variable `sizeAverage=False`).
The `margin` has a default value of `1`, or can be set in the constructor.
### MultiLabelMarginLoss
Creates a criterion that optimizes a multi-class multi-classification
hinge loss (margin-based loss) between input `x` (a 2D mini-batch `Tensor`) and
output `y` (which is a 2D `Tensor` of target class indices).
For each sample in the mini-batch:
loss(x, y) = sum_ij(max(0, 1 - (x[y[j]] - x[i]))) / x:size(1)
where `i == 0` to `x.size(0)`, `j == 0` to `y.size(0)`,
`y[j] != 0`, and `i != y[j]` for all `i` and `j`.
`y` and `x` must have the same size.
The criterion only considers the first non zero `y[j]` targets.
This allows for different samples to have variable amounts of target classes
### SmoothL1Loss
Creates a criterion that uses a squared term if the absolute
element-wise error falls below 1 and an L1 term otherwise.
It is less sensitive to outliers than the `MSELoss` and in some cases
prevents exploding gradients (e.g. see "Fast R-CNN" paper by Ross Girshick).
Also known as the Huber loss.
{ 0.5 * (x_i - y_i)^2, if |x_i - y_i| < 1
loss(x, y) = 1/n \sum {
{ |x_i - y_i| - 0.5, otherwise
`x` and `y` arbitrary shapes with a total of `n` elements each
the sum operation still operates over all the elements, and divides by `n`.
The division by `n` can be avoided if one sets the internal variable
`sizeAverage` to `False`
### SoftMarginLoss
Creates a criterion that optimizes a two-class classification
logistic loss between input `x` (a 2D mini-batch `Tensor`) and
target `y` (which is a tensor containing either `1`s or `-1`s).
loss(x, y) = sum_i (log(1 + exp(-y[i]*x[i]))) / x:nElement()
The normalization by the number of elements in the input can be disabled by
setting `self.sizeAverage` to `False`.
### MultiLabelSoftMarginLoss
Creates a criterion that optimizes a multi-label one-versus-all
loss based on max-entropy, between input `x` (a 2D mini-batch `Tensor`) and
target `y` (a binary 2D `Tensor`). For each sample in the minibatch:
loss(x, y) = - sum_i (y[i] log( exp(x[i]) / (1 + exp(x[i])))
+ (1-y[i]) log(1/(1+exp(x[i])))) / x:nElement()
where `i == 0` to `x.nElement()-1`, `y[i] in {0,1}`.
`y` and `x` must have the same size.
### CosineEmbeddingLoss
Creates a criterion that measures the loss given an input tensors x1, x2
and a `Tensor` label `y` with values 1 or -1.
This is used for measuring whether two inputs are similar or dissimilar,
using the cosine distance, and is typically used for learning nonlinear
embeddings or semi-supervised learning.
`margin` should be a number from `-1` to `1`, `0` to `0.5` is suggested.
If `margin` is missing, the default value is `0`.
The loss function for each sample is:
{ 1 - cos(x1, x2), if y == 1
loss(x, y) = {
{ max(0, cos(x1, x2) - margin), if y == -1
If the internal variable `sizeAverage` is equal to `True`,
the loss function averages the loss over the batch samples;
if `sizeAverage` is `False`, then the loss function sums over the
batch samples. By default, `sizeAverage = True`.
### MultiMarginLoss
Creates a criterion that optimizes a multi-class classification hinge loss
(margin-based loss) between input `x` (a 2D mini-batch `Tensor`) and
output `y` (which is a 1D tensor of target class indices, `0` <= `y` <= `x.size(1)`):
For each mini-batch sample:
loss(x, y) = sum_i(max(0, (margin - x[y] + x[i]))^p) / x.size(0)
where `i == 0` to `x.size(0)` and `i != y`.
Optionally, you can give non-equal weighting on the classes by passing
a 1D `weights` tensor into the constructor.
The loss function then becomes:
loss(x, y) = sum_i(max(0, w[y] * (margin - x[y] - x[i]))^p) / x.size(0)
By default, the losses are averaged over observations for each minibatch.
However, if the field `sizeAverage` is set to `False`,
the losses are instead summed.

View File

@ -1,142 +0,0 @@
## Normalization layers
### BatchNorm1d
Applies Batch Normalization over a 2d input that is seen as a mini-batch of 1d inputs
```python
x - mean(x)
y = ----------------------------- * gamma + beta
standard_deviation(x) + eps
```
```python
# With Learnable Parameters
m = nn.BatchNorm1d(100)
# Without Learnable Parameters
m = nn.BatchNorm1d(100, affine=False)
input = autograd.Variable(torch.randn(20, 100))
output = m(input)
```
The mean and standard-deviation are calculated per-dimension over
the mini-batches and gamma and beta are learnable parameter vectors
of size N (where N is the input size).
During training, this layer keeps a running estimate of its computed mean
and variance. The running sum is kept with a default momentum of 0.1
During evaluation, this running mean/variance is used for normalization.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_features | | the size of each 1D input in the mini-batch
eps | 1e-5 | a value added to the denominator for numerical stability.
momentum | 0.1 | the value used for the running_mean and running_var computation.
affine | | a boolean value that when set to true, gives the layer learnable affine parameters.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , num_features ] | 2D Tensor of nBatches x num_features
output | Same | Output has the same shape as input
#### Returns
a normalized tensor in the batch dimension
### BatchNorm2d
Applies Batch Normalization over a 4d input that is seen as a mini-batch of 3d inputs
```python
x - mean(x)
y = ----------------------------- * gamma + beta
standard_deviation(x) + eps
```
```python
# With Learnable Parameters
m = nn.BatchNorm2d(100)
# Without Learnable Parameters
m = nn.BatchNorm2d(100, affine=False)
input = autograd.Variable(torch.randn(20, 100, 35, 45))
output = m(input)
```
The mean and standard-deviation are calculated per-dimension over
the mini-batches and gamma and beta are learnable parameter vectors
of size N (where N is the input size).
During training, this layer keeps a running estimate of its computed mean
and variance. The running sum is kept with a default momentum of 0.1
During evaluation, this running mean/variance is used for normalization.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_features | | num_features from an expected input of size batch_size x num_features x height x width
eps | 1e-5 | a value added to the denominator for numerical stability.
momentum | 0.1 | the value used for the running_mean and running_var computation.
affine | | a boolean value that when set to true, gives the layer learnable affine parameters.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , num_features , *, * ] | 4D Tensor of batch_size x num_features x height x width
output | Same | Output has the same shape as input
#### Returns
a normalized tensor in the batch dimension
### BatchNorm3d
Applies Batch Normalization over a 5d input that is seen as a mini-batch of 4d inputs
```python
x - mean(x)
y = ----------------------------- * gamma + beta
standard_deviation(x) + eps
```
```python
# With Learnable Parameters
m = nn.BatchNorm3d(100)
# Without Learnable Parameters
m = nn.BatchNorm3d(100, affine=False)
input = autograd.Variable(torch.randn(20, 100, 35, 45, 10))
output = m(input)
```
The mean and standard-deviation are calculated per-dimension over
the mini-batches and gamma and beta are learnable parameter vectors
of size N (where N is the input size).
During training, this layer keeps a running estimate of its computed mean
and variance. The running sum is kept with a default momentum of 0.1
During evaluation, this running mean/variance is used for normalization.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_features | | num_features from an expected input of size batch_size x num_features x height x width
eps | 1e-5 | a value added to the denominator for numerical stability.
momentum | 0.1 | the value used for the running_mean and running_var computation.
affine | | a boolean value that when set to true, gives the layer learnable affine parameters.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , num_features , * , * , * ] | 5D Tensor of batch_size x num_features x depth x height x width
output | Same | Output has the same shape as input
#### Returns
a normalized tensor in the batch dimension

View File

@ -1,308 +0,0 @@
## Pooling Layers
### MaxPool1d
Applies a 1D max pooling over an input signal composed of several input
```python
The output value of the layer with input (b x C x W) and output (b x C x oW)
can be precisely described as:
output[b_i][c_i][w_i] = max_{k=1, K} input[b_i][c_i][stride_w * w_i + k)]
```
```python
# pool of size=3, stride=2
m = nn.MaxPool1d(3, stride=2)
input = autograd.Variable(torch.randn(20, 16, 50))
output = m(input)
```
planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window to take a max over
stride | | the stride of the window
padding | 0 | implicit padding to be added.
dilation | kernel_size | a parameter that controls the stride of elements in the window.
return_indices | False | if True, will return the indices along with the outputs. Useful when Unpooling later.
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , * ] | Input is minibatch x channels x iW
output | [ * , * , * ] | Output shape = minibatch x channels x floor((iW + 2*padW - kernel_size) / stride + 1)
### MaxPool2d
Applies a 2D max pooling over an input signal composed of several input
```python
The output value of the layer with input (b x C x H x W) and output (b x C x oH x oW)
can be precisely described as:
output[b_i][c_i][h_i][w_i] = max_{{kh=1, KH}, {kw=1, kW}} input[b_i][c_i][stride_h * h_i + kH)][stride_w * w_i + kW)]
```
```python
# pool of square window of size=3, stride=2
m = nn.MaxPool2d(3, stride=2)
# pool of non-square window
m = nn.MaxPool2d((3, 2), stride=(2, 1))
input = autograd.Variable(torch.randn(20, 16, 50, 32))
output = m(input)
```
planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window to take a max over. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
padding | 0 | implicit padding to be added. Can be a single number or a tuple.
dilation | 1 | a parameter that controls the stride of elements in the window. Can be a single number or a tuple.
return_indices | False | if True, will return the indices along with the outputs. Useful to pass to nn.MaxUnpool2d .
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
### MaxPool3d
Applies a 3D max pooling over an input signal composed of several input
```python
# pool of square window of size=3, stride=2
m = nn.MaxPool3d(3, stride=2)
# pool of non-square window
m = nn.MaxPool3d((3, 2, 2), stride=(2, 1, 2))
input = autograd.Variable(torch.randn(20, 16, 50,44, 31))
output = m(input)
```
planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window to take a max over. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (st x sh x sw).
padding | 0 | implicit padding to be added. Can be a single number or a tuple.
dilation | 1 | a parameter that controls the stride of elements in the window. Can be a single number or a tuple.
return_indices | False | if True, will return the indices along with the outputs. Useful to pass to nn.MaxUnpool3d .
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, *, * ] | Input is minibatch x channels x iT x iH x iW
output | [ * , * , *, *, * ] | Output shape = minibatch x channels x floor((iT + 2*padT - kT) / sT + 1) x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
### MaxUnpool2d
Computes the inverse operation of MaxPool2d
```python
# pool of square window of size=3, stride=2
m = nn.MaxPool2d(2, stride=2, return_indices = True)
mu = nn.MaxUnpool2d(2, stride=2)
input = autograd.Variable(torch.randn(20, 16, 50, 32))
output, indices = m(input)
unpooled_output = mu.forward(output, indices)
# exact output size can be also specified as an argument
input = autograd.Variable(torch.randn(1, 16, 11, 11))
downsample = nn.MaxPool2d(3, 3, return_indices=True)
upsample = nn.MaxUnpool2d(3, 3)
h, indices = downsample(input)
output = upsample(h, indices, output_size=input.size())
```
MaxPool2d is not invertible, as the locations of the max locations are lost.
MaxUnpool2d takes in as input the output of MaxPool2d and the indices of the Max locations
and computes the inverse.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the max window. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
padding | 0 | implicit padding that was added to the input. Can be a single number or a tuple.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
output | [ * , * , *, * ] | Output shape is minibatch x channels x padH x (iH - 1) * sH + kH x padW x (iW - 1) * sW + kW, or as specified to the call.
### MaxUnpool3d
Computes the inverse operation of MaxPool3d
```python
# pool of square window of size=3, stride=2
m = nn.MaxPool3d(3, stride=2, return_indices = True)
mu = nn.MaxUnpool3d(3, stride=2)
input, indices = autograd.Variable(torch.randn(20, 16, 50, 32, 15))
output = m(input)
unpooled_output = m2.forward(output, indices)
```
MaxPool3d is not invertible, as the locations of the max locations are lost.
MaxUnpool3d takes in as input the output of MaxPool3d and the indices of the Max locations
and computes the inverse.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the max window. Can be a single number k (for a square kernel of k x k) or a tuple (kt x kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (st x sh x sw).
padding | 0 | implicit padding that was added to the input. Can be a single number or a tuple.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, *, * ] | Input is minibatch x channels x iT x iH x iW
output | [ * , * , *, *, * ] | Output shape = minibatch x channels x padT x (iT - 1) * sT + kT x padH x (iH - 1) * sH + kH x padW x (iW - 1) * sW + kW
### AvgPool2d
Applies a 2D average pooling over an input signal composed of several input
```python
The output value of the layer with input (b x C x H x W) and output (b x C x oH x oW)
can be precisely described as:
output[b_i][c_i][h_i][w_i] = (1 / K) * sum_{kh=1, KH} sum_{kw=1, kW} input[b_i][c_i][stride_h * h_i + kh)][stride_w * w_i + kw)]
```
```python
# pool of square window of size=3, stride=2
m = nn.AvgPool2d(3, stride=2)
# pool of non-square window
m = nn.AvgPool2d((3, 2), stride=(2, 1))
input = autograd.Variable(torch.randn(20, 16, 50, 32))
output = m(input)
```
planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
padding | 0 | implicit padding to be added. Can be a single number or a tuple.
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
### AvgPool3d
Applies a 3D average pooling over an input signal composed of several input
```python
# pool of square window of size=3, stride=2
m = nn.AvgPool3d(3, stride=2)
# pool of non-square window
m = nn.AvgPool3d((3, 2, 2), stride=(2, 1, 2))
input = autograd.Variable(torch.randn(20, 16, 50,44, 31))
output = m(input)
```
planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window to take a average over. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (st x sh x sw).
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, *, * ] | Input is minibatch x channels x iT x iH x iW
output | [ * , * , *, *, * ] | Output shape = minibatch x channels x floor((iT + 2*padT - kT) / sT + 1) x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
### FractionalMaxPool2d
Applies a 2D fractional max pooling over an input signal composed of several input
```python
# pool of square window of size=3, and target output size 13x12
m = nn.FractionalMaxPool2d(3, output_size=(13, 12))
# pool of square window and target output size being half of input image size
m = nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5))
input = autograd.Variable(torch.randn(20, 16, 50, 32))
output = m(input)
```
planes.
Fractiona MaxPooling is described in detail in the paper ["Fractional Max-Pooling" by Ben Graham](http://arxiv.org/abs/1412.6071)
The max-pooling operation is applied in kHxkW regions by a stochastic
step size determined by the target output size.
The number of output features is equal to the number of input planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window to take a max over. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
output_size | | the target output size of the image of the form oH x oW. Can be a tuple (oH, oW) or a single number oH for a square image oH x oH
output_ratio | | If one wants to have an output size as a ratio of the input size, this option can be given. This has to be a number or tuple in the range (0, 1)
return_indices | False | if True, will return the indices along with the outputs. Useful to pass to nn.MaxUnpool2d .
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
### LPPool2d
Applies a 2D power-average pooling over an input signal composed of several input
```python
# power-2 pool of square window of size=3, stride=2
m = nn.LPPool2d(2, 3, stride=2)
# pool of non-square window of power 1.2
m = nn.LPPool2d(1.2, (3, 2), stride=(2, 1))
input = autograd.Variable(torch.randn(20, 16, 50, 32))
output = m(input)
```
planes.
On each window, the function computed is: f(X) = pow(sum(pow(X, p)), 1/p)
At p = infinity, one gets Max Pooling
At p = 1, one gets Average Pooling
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)

View File

@ -1,346 +0,0 @@
## Recurrent layers
### RNN
Applies a multi-layer Elman RNN with tanh or ReLU non-linearity to an input sequence.
```python
h_t = tanh(w_ih * x_t + b_ih + w_hh * h_(t-1) + b_hh)
```
```python
rnn = nn.RNN(10, 20, 2)
input = Variable(torch.randn(5, 3, 10))
h0 = Variable(torch.randn(2, 3, 20))
output, hn = rnn(input, h0)
```
For each element in the input sequence, each layer computes the following
function:
where `h_t` is the hidden state at time t, and `x_t` is the hidden
state of the previous layer at time t or `input_t` for the first layer.
If nonlinearity='relu', then ReLU is used instead of tanh.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
num_layers | | the size of the convolving kernel.
nonlinearity | 'tanh' | The non-linearity to use ['tanh'|'relu'].
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
batch_first | | If True, then the input tensor is provided as (batch, seq, feature)
dropout | | If non-zero, introduces a dropout layer on the outputs of each RNN layer
bidirectional | False | If True, becomes a bidirectional RNN.
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (seq_len x batch x input_size) tensor containing the features of the input sequence.
h_0 | | A (num_layers x batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
output | A (seq_len x batch x hidden_size) tensor containing the output features (h_k) from the last layer of the RNN, for each k
h_n | A (num_layers x batch x hidden_size) tensor containing the hidden state for k=seq_len
#### Members
Parameter | Description
--------- | -----------
weight_ih_l[k] | the learnable input-hidden weights of the k-th layer, of shape (input_size x hidden_size)
weight_hh_l[k] | the learnable hidden-hidden weights of the k-th layer, of shape (hidden_size x hidden_size)
bias_ih_l[k] | the learnable input-hidden bias of the k-th layer, of shape (hidden_size)
bias_hh_l[k] | the learnable hidden-hidden bias of the k-th layer, of shape (hidden_size)
### LSTM
Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.
```python
i_t = sigmoid(W_ii x_t + b_ii + W_hi h_(t-1) + b_hi)
f_t = sigmoid(W_if x_t + b_if + W_hf h_(t-1) + b_hf)
g_t = tanh(W_ig x_t + b_ig + W_hc h_(t-1) + b_hg)
o_t = sigmoid(W_io x_t + b_io + W_ho h_(t-1) + b_ho)
c_t = f_t * c_(t-1) + i_t * c_t
h_t = o_t * tanh(c_t)
```
```python
rnn = nn.LSTM(10, 20, 2)
input = Variable(torch.randn(5, 3, 10))
h0 = Variable(torch.randn(2, 3, 20))
c0 = Variable(torch.randn(2, 3, 20))
output, hn = rnn(input, (h0, c0))
```
For each element in the input sequence, each layer computes the following
function:
where `h_t` is the hidden state at time t, `c_t` is the cell state at time t,
`x_t` is the hidden state of the previous layer at time t or input_t for the first layer,
and `i_t`, `f_t`, `g_t`, `o_t` are the input, forget, cell, and out gates, respectively.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
num_layers | | the size of the convolving kernel.
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
batch_first | | If True, then the input tensor is provided as (batch, seq, feature)
dropout | | If non-zero, introduces a dropout layer on the outputs of each RNN layer
bidirectional | False | If True, becomes a bidirectional RNN.
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (seq_len x batch x input_size) tensor containing the features of the input sequence.
h_0 | | A (num_layers x batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
c_0 | | A (num_layers x batch x hidden_size) tensor containing the initial cell state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
output | A (seq_len x batch x hidden_size) tensor containing the output features (h_t) from the last layer of the RNN, for each t
h_n | A (num_layers x batch x hidden_size) tensor containing the hidden state for t=seq_len
c_n | A (num_layers x batch x hidden_size) tensor containing the cell state for t=seq_len
#### Members
Parameter | Description
--------- | -----------
weight_ih_l[k] | the learnable input-hidden weights of the k-th layer (W_ir|W_ii|W_in), of shape (input_size x 3*hidden_size)
weight_hh_l[k] | the learnable hidden-hidden weights of the k-th layer (W_hr|W_hi|W_hn), of shape (hidden_size x 3*hidden_size)
bias_ih_l[k] | the learnable input-hidden bias of the k-th layer (b_ir|b_ii|b_in), of shape (3*hidden_size)
bias_hh_l[k] | the learnable hidden-hidden bias of the k-th layer (W_hr|W_hi|W_hn), of shape (3*hidden_size)
### GRU
Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
```python
r_t = sigmoid(W_ir x_t + b_ir + W_hr h_(t-1) + b_hr)
i_t = sigmoid(W_ii x_t + b_ii + W_hi h_(t-1) + b_hi)
n_t = tanh(W_in x_t + resetgate * W_hn h_(t-1))
h_t = (1 - i_t) * n_t + i_t * h_(t-1)
```
```python
rnn = nn.GRU(10, 20, 2)
input = Variable(torch.randn(5, 3, 10))
h0 = Variable(torch.randn(2, 3, 20))
output, hn = rnn(input, h0)
```
For each element in the input sequence, each layer computes the following
function:
where `h_t` is the hidden state at time t, `x_t` is the hidden
state of the previous layer at time t or input_t for the first layer,
and `r_t`, `i_t`, `n_t` are the reset, input, and new gates, respectively.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
num_layers | | the size of the convolving kernel.
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
batch_first | | If True, then the input tensor is provided as (batch, seq, feature)
dropout | | If non-zero, introduces a dropout layer on the outputs of each RNN layer
bidirectional | False | If True, becomes a bidirectional RNN.
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (seq_len x batch x input_size) tensor containing the features of the input sequence.
h_0 | | A (num_layers x batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
output | A (seq_len x batch x hidden_size) tensor containing the output features (h_t) from the last layer of the RNN, for each t
h_n | A (num_layers x batch x hidden_size) tensor containing the hidden state for t=seq_len
#### Members
Parameter | Description
--------- | -----------
weight_ih_l[k] | the learnable input-hidden weights of the k-th layer (W_ir|W_ii|W_in), of shape (input_size x 3*hidden_size)
weight_hh_l[k] | the learnable hidden-hidden weights of the k-th layer (W_hr|W_hi|W_hn), of shape (hidden_size x 3*hidden_size)
bias_ih_l[k] | the learnable input-hidden bias of the k-th layer (b_ir|b_ii|b_in), of shape (3*hidden_size)
bias_hh_l[k] | the learnable hidden-hidden bias of the k-th layer (W_hr|W_hi|W_hn), of shape (3*hidden_size)
### RNNCell
An Elman RNN cell with tanh or ReLU non-linearity.
```python
h' = tanh(w_ih * x + b_ih + w_hh * h + b_hh)
```
```python
rnn = nn.RNNCell(10, 20)
input = Variable(torch.randn(6, 3, 10))
hx = Variable(torch.randn(3, 20))
output = []
for i in range(6):
hx = rnn(input, hx)
output[i] = hx
```
If nonlinearity='relu', then ReLU is used in place of tanh.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
nonlinearity | 'tanh' | The non-linearity to use ['tanh'|'relu'].
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (batch x input_size) tensor containing input features
hidden | | A (batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
h' | A (batch x hidden_size) tensor containing the next hidden state for each element in the batch
#### Members
Parameter | Description
--------- | -----------
weight_ih | the learnable input-hidden weights, of shape (input_size x hidden_size)
weight_hh | the learnable hidden-hidden weights, of shape (hidden_size x hidden_size)
bias_ih | the learnable input-hidden bias, of shape (hidden_size)
bias_hh | the learnable hidden-hidden bias, of shape (hidden_size)
### LSTMCell
A long short-term memory (LSTM) cell.
```python
i = sigmoid(W_ii x + b_ii + W_hi h + b_hi)
f = sigmoid(W_if x + b_if + W_hf h + b_hf)
g = tanh(W_ig x + b_ig + W_hc h + b_hg)
o = sigmoid(W_io x + b_io + W_ho h + b_ho)
c' = f * c + i * c
h' = o * tanh(c_t)
```
```python
rnn = nn.LSTMCell(10, 20)
input = Variable(torch.randn(6, 3, 10))
hx = Variable(torch.randn(3, 20))
cx = Variable(torch.randn(3, 20))
output = []
for i in range(6):
hx, cx = rnn(input, (hx, cx))
output[i] = hx
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (batch x input_size) tensor containing input features
hidden | | A (batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
h' | A (batch x hidden_size) tensor containing the next hidden state for each element in the batch
c' | A (batch x hidden_size) tensor containing the next cell state for each element in the batch
#### Members
Parameter | Description
--------- | -----------
weight_ih | the learnable input-hidden weights, of shape (input_size x hidden_size)
weight_hh | the learnable hidden-hidden weights, of shape (hidden_size x hidden_size)
bias_ih | the learnable input-hidden bias, of shape (hidden_size)
bias_hh | the learnable hidden-hidden bias, of shape (hidden_size)
### GRUCell
A gated recurrent unit (GRU) cell
```python
r = sigmoid(W_ir x + b_ir + W_hr h + b_hr)
i = sigmoid(W_ii x + b_ii + W_hi h + b_hi)
n = tanh(W_in x + resetgate * W_hn h)
h' = (1 - i) * n + i * h
```
```python
rnn = nn.RNNCell(10, 20)
input = Variable(torch.randn(6, 3, 10))
hx = Variable(torch.randn(3, 20))
output = []
for i in range(6):
hx = rnn(input, hx)
output[i] = hx
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (batch x input_size) tensor containing input features
hidden | | A (batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
h' | A (batch x hidden_size) tensor containing the next hidden state for each element in the batch
#### Members
Parameter | Description
--------- | -----------
weight_ih | the learnable input-hidden weights, of shape (input_size x hidden_size)
weight_hh | the learnable hidden-hidden weights, of shape (hidden_size x hidden_size)
bias_ih | the learnable input-hidden bias, of shape (hidden_size)
bias_hh | the learnable hidden-hidden bias, of shape (hidden_size)

View File

@ -1,37 +0,0 @@
## Sparse layers
### Embedding
A simple lookup table that stores embeddings of a fixed dictionary and size
```python
# an Embedding module containing 10 tensors of size 3
embedding = nn.Embedding(10, 3)
# a batch of 2 samples of 4 indices each
input = torch.LongTensor([[1,2,4,5],[4,3,2,9]])
print(embedding(input))
# example with padding_idx
embedding = nn.Embedding(10, 3, padding_idx=0)
input = torch.LongTensor([[0,2,0,5]])
print(embedding(input))
```
This module is often used to store word embeddings and retrieve them using indices.
The input to the module is a list of indices, and the output is the corresponding
word embeddings.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_embeddings | | size of the dictionary of embeddings
embedding_dim | | the size of each embedding vector
padding_idx | None | If given, pads the output with zeros whenever it encounters the index.
max_norm | None | If given, will renormalize the embeddings to always have a norm lesser than this
norm_type | | The p of the p-norm to compute for the max_norm option
scale_grad_by_freq | | if given, this will scale gradients by the frequency of the words in the dictionary.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ *, * ] | Input is a 2D mini_batch LongTensor of m x n indices to extract from the Embedding dictionary
output | [ * , *, * ] | Output shape = m x n x embedding_dim

View File

@ -1,114 +0,0 @@
# torch.optim
The Optim package in Torch is targeted for one to optimize their neural networks
using a wide variety of optimization methods such as SGD, Adam etc.
Currently, the following optimization methods are supported, typically with
options such as weight decay and other bells and whistles.
- SGD `(params, lr=required, momentum=0, dampening=0)`
- AdaDelta `(params, rho=0.9, eps=1e-6, weight_decay=0)`
- Adagrad `(params, lr=1e-2, lr_decay=0, weight_decay=0)`
- Adam `(params, lr=1e-2, betas=(0.9, 0.999), epsilon=1e-8, weight_decay=0)`
- AdaMax `(params, lr=1e-2, betas=(0.9, 0.999), eps=1e-38, weight_decay=0)`
- Averaged SGD `(params, lr=1e-2, lambd=1e-4, alpha=0.75, t0=1e6, weight_decay=0)`
- RProp `(params, lr=1e-2, etas=(0.5, 1.2), step_sizes=(1e-6, 50))`
- RMSProp `(params, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0)`
The usage of the Optim package itself is as follows.
1. Construct an optimizer
2. Use `optimizer.step(...)` to optimize.
- Call `optimizer.zero_grad()` to zero out the gradient buffers when appropriate
## 1. Constructing the optimizer
One first constructs an `Optimizer` object by giving it a list of parameters
to optimize, as well as the optimizer options,such as learning rate, weight decay, etc.
Examples:
`optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)`
`optimizer = optim.Adam([var1, var2], lr = 0.0001)`
### Per-parameter options
In a more advanced usage, one can specify per-layer options by passing each parameter group along with it's custom options.
**__Any parameter group that does not have an attribute defined will use the default attributes.__**
This is very useful when one wants to specify per-layer learning rates for example.
Example:
`optim.SGD([{'params': model1.parameters()}, {'params': model2.parameters(), 'lr': 1e-3}, lr=1e-2, momentum=0.9)`
`model1`'s parameters will use the default learning rate of `1e-2` and momentum of `0.9`
`model2`'s parameters will use a learning rate of `1e-3`, and the default momentum of `0.9`
Then, you can use the optimizer by calling `optimizer.zero_grad()` and `optimizer.step(...)`. Read the next sections.
## 2. Taking an optimization step using `Optimizer.step(...)`
The step function has the following two signatures:
### a. `Optimizer.step(closure)`
The `step` function takes a user-defined closure that computes f(x) and returns the loss.
The closure needs to do the following:
- Optimizer.zero_grad()
- Compute the loss
- Call loss.backward()
- return the loss
Example 1: training a neural network
```python
# Example 1: training a neural network with optimizer.step(closure)
net = MNISTNet()
criterion = ClassNLLLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)
for data in data_batches:
input, target = data
def closure():
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
return loss
optimizer.step(closure)
```
Notes: Why is this required? Why cant we simply have the optimizer take the parameters and grads?
Some optimization algorithms such as Conjugate Gradient and LBFGS need to evaluate their function
multiple times. For such optimization methods, the function (i.e. the closure) has to be defined.
### b. `Optimizer.step()`
This is a simplified usage that supports most, but not all optimization algorithms. For example, it does not support LBFGS or Conjugate Gradient.
The usage for this is to simply call the function after the backward() is called on your model.
Example 2: training a neural network
```python
# Example 2: training a neural network with optimizer.step()
net = MNISTNet()
criterion = ClassNLLLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)
for data in data_batches:
input, target = data
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()
```

View File

@ -1,417 +0,0 @@
# Tensors
A `Tensor` is a potentially multi-dimensional matrix.
The number of dimensions is unlimited.
The `Tensor` set of classes are probably the most important class in
`torch`. Almost every package depends on these classes. They are *__the__*
class for handling numeric data. As with pretty much anything in
[torch], tensors are serializable with `torch.save` and `torch.load`
There are 7 Tensor classes in torch:
- `torch.FloatTensor` : Signed 32-bit floating point tensor
- `torch.DoubleTensor` : Signed 64-bit floating point tensor
- `torch.ByteTensor` : Signed 8-bit integer tensor
- `torch.CharTensor` : Unsigned 8-bit integer tensor
- `torch.ShortTensor` : Signed 16-bit integer tensor
- `torch.IntTensor` : Signed 32-bit integer tensor
- `torch.LongTensor` : Signed 64-bit integer tensor
The data in these tensors lives on the system memory connected to your CPU.
Most numeric operations are implemented _only_ for `FloatTensor` and `DoubleTensor`.
Other Tensor types are useful if you want to save memory space or specifically
do integer operations.
The number of dimensions of a `Tensor` can be queried by
`ndimension()` or `dim()`. Size of the `i-th` dimension is
returned by `size(i)`. A tuple containing the size of all the dimensions
can be returned by `size()`.
```python
import torch
# allocate a matrix of shape 3x4
a = torch.FloatTensor(3, 4)
print(a)
# convert this into a LongTensor
b = a.long()
print(b)
# print the size of the tensor
print(a.size())
# print the number of dimensions
print(a.dim())
```
These tensors can be converted to numpy arrays very efficiently
with zero memory copies.
For this, the two provided functions are `.numpy()` and `torch.from_numpy()`
```python
import numpy as np
# convert to numpy
c = a.numpy()
print(type(c))
```
When using GPUs, each of the classes above has an equivalent
class such as: `torch.cuda.FloatTensor`, `torch.cuda.LongTensor`, etc.
When one allocates a CUDA tensor, the data in these tensors lives in the
GPU memory.
One can seamlessly transfer a tensor from the CPU to the GPU, as well as
between different GPUs on your machine.
Apart from the above 7 tensor types, there is one additional tensor type on the GPU
- `torch.cuda.HalfTensor` : Signed 16-bit floating point tensor
```python
import torch.cuda
# allocate a matrix of shape 3x4
a = torch.cuda.FloatTensor(3, 4)
print(a)
# transfer this to the CPU
b = a.cpu()
print(b)
# transfer this back to the GPU-1
a = b.cuda()
print(a)
# transfer this to GPU-2
b = a.cuda(1)
```
## Internal data representation
The actual data of a `Tensor` is contained into a
`Storage`. It can be accessed using
`storage()`. While the memory of a
`Tensor` has to be contained in this unique `Storage`, it might
not be contiguous: the first position used in the `Storage` is given
by `storage_offset()` (starting at `0`).
And the _jump_ needed to go from one element to another
element in the `i-th` dimension is given by
`stride(i-1)`. See the code example for an illustration.
```python
# given a 3d tensor
x = torch.FloatTensor(7,7,7)
# accessing the element `(3,4,5)` can be done by
x[3 - 1][4 - 1][5 - 1]
# or equivalently (but slowly!)
x.storage()[x.storageOffset()
+ (3 - 1) * x.stride(0)
+ (4 - 1) * x.stride(1)
+ (5 - 1) * x.stride(2)]
```
One could say that a `Tensor` is a particular way of _viewing_ a
`Storage`: a `Storage` only represents a chunk of memory, while the
`Tensor` interprets this chunk of memory as having dimensions:
```python
# a tensor interprets a chunk of memory as having dimensions
>>> x = torch.Tensor(4,5)
>>> s = x.storage()
>>> for i in range(s.size()): # fill up the Storage
>>> s[i] = i
# s is interpreted by x as a 2D matrix
>>> print(x)
1 2 3 4 5
6 7 8 9 10
11 12 13 14 15
16 17 18 19 20
[torch.FloatTensor of dimension 4x5]
```
Note also that in Torch7 ___elements in the same row___ [elements along the __last__ dimension]
are contiguous in memory for a matrix [tensor]:
This is exactly like in `C` and `numpy` (and not `Fortran`).
## Default Tensor type
For convenience, _an alias_ `torch.Tensor` is provided, which allows the user to write
type-independent scripts, which can then ran after choosing the desired Tensor type with
a call like
`torch.set_default_tensor_type('torch.DoubleTensor')`
By default, the alias points to `torch.FloatTensor`.
## Efficient memory management
_All_ tensor operations post-fixed with an underscore (for example `.fill_`)
do _not_ make any memory copy. All these methods transform the existing tensor.
Tensor methods such as `narrow` and `select` return a new tensor referencing _the same storage_.
This magical behavior is internally obtained by good usage of the `stride()` and
`storage_offset()`. See the code example illustrating this.
```python
>>> x = torch.Tensor(5).zero_()
>>> print(x)
0
0
0
0
0
[torch.FloatTensor of dimension 5]
>>> x.narrow(0, 1, 2).fill_(1)
>>> # narrow() returns a Tensor referencing the same Storage as x
>>> print(x)
0
1
1
0
0
[torch.FloatTensor of dimension 5]
>>> # same thing can be achieved with slice indexing
>>> x[1:3] = 2
>>> print(x)
0
2
2
0
0
[torch.FloatTensor of dimension 5]
```
If you really need to copy a `Tensor`, you can use the `copy_()` method:
```python
# making a copy of a tensor
y = x.new(x.size()).copy_(x)
y = x.clone()
```
Or the convenience method `clone()`
We now describe all the methods for `Tensor`. If you want to specify the Tensor type,
just replace `Tensor` by the name of the Tensor variant (like `CharTensor`).
## Constructors ##
Tensor constructors, create new Tensor object, optionally, allocating
new memory. By default the elements of a newly allocated memory are
not initialized, therefore, might contain arbitrary numbers. Here are
several ways to construct a new `Tensor`.
### torch.Tensor() ###
Returns an empty tensor.
### torch.Tensor(tensor) ###
Returns a new tensor which reference the same `Storage` than the given `tensor`.
The `size`, `stride`, and `storage_offset` are the same than the given tensor.
The new `Tensor` is now going to "view" the same `storage`
as the given `tensor`. As a result, any modification in the elements
of the `Tensor` will have a impact on the elements of the given
`tensor`, and vice-versa. No memory copy!
```python
>>> x = torch.Tensor(2,5).fill_(3.14)
>>> x
3.1400 3.1400 3.1400 3.1400 3.1400
3.1400 3.1400 3.1400 3.1400 3.1400
[torch.FloatTensor of dimension 2x5]
>>> y = torch.Tensor(x)
>>> y
3.1400 3.1400 3.1400 3.1400 3.1400
3.1400 3.1400 3.1400 3.1400 3.1400
[torch.FloatTensor of dimension 2x5]
>>> y.zero_()
>>> x # elements of x are the same as y!
0 0 0 0 0
0 0 0 0 0
[torch.FloatTensor of dimension 2x5]
```
### torch.Tensor(sz1 [,sz2 [,sz3 [,sz4 [,sz5 ...]]]]]) ###
Create a tensor of the given sizes.
The tensor size will be `sz1 x sz2 x sx3 x sz4 x sz5 x ...`.
### torch.Tensor(sizes) ###
Create a tensor of any number of dimensions. `sizes` gives the size in each dimension of
the tensor and is of type `torch.Size`.
```python
Example, create a 4D 4x4x3x2 tensor:
x = torch.Tensor(torch.Size([4,4,3,2]))
```
### torch.Tensor(storage) ###
Returns a tensor which uses the existing `Storage` starting at a storage offset of 0.
### torch.Tensor(sequence) ###
One can create a tensor from a python sequence.
For example, you can create a `Tensor` from a `list` or a `tuple`
```python
# create a 2d tensor from a list of lists
>>> torch.Tensor([[1,2,3,4], [5,6,7,8]])
1 2 3 4
5 6 7 8
[torch.FloatTensor of dimension 2x4]
```
### torch.Tensor(ndarray) ###
Creates a `Tensor` from a NumPy `ndarray`.
If the `dtype` of the `ndarray` is the same as the type of the `Tensor` being created,
The underlying memory of both are shared, i.e. if the value of an element
in the `ndarray` is changed, the corresponding value in the `Tensor` changes,
and vice versa.
```python
# create a ndarray of dtype=int64
>>> a = np.random.randint(2, size=10)
>>> a
array([0, 0, 1, 1, 0, 1, 1, 0, 0, 0])
# create a LongTensor. Since they are the same type (int64), the memory is shared
>>> b = torch.LongTensor(a)
0
0
1
1
0
1
1
0
0
0
[torch.LongTensor of size 10]
>>> b[3] = 100
>>> print(a[3])
100
# now create an IntTensor from the same ndarray.
# The memory is not shared in this case as the dtype=int64 != IntTensor (int32)
>>> b = torch.IntTensor(a)
>>> b[3] = 30000
>>> print(a[3])
100
# a did not change to the value 30000
```
## NumPy Conversion ##
### torch.from_numpy(ndarray)
This is a convenience function similar to the constructor above.
Given a numpy `ndarray`, it constructs a torch `Tensor` of the same `dtype`
as the numpy array.
For example, passing in an ndarray of dtype=float64 will create a torch.DoubleTensor
### Tensor.numpy()
This is a member function on a tensor that converts a torch `Tensor` to a
numpy `ndarray`. The memory of the data of both objects is shared.
Hence, changing a value in the `Tensor` will change the corresponding value in
the `ndarray` and vice versa.
```python
>>> a = torch.randn(3,4)
>>> b = a.numpy() # creates a numpy array with dtype=float32 in this case
>>> print(a)
-1.0453 1.4730 -1.8990 -0.7763
1.8155 1.4004 -1.5286 1.0420
0.6551 1.0258 0.1152 -0.3239
[torch.FloatTensor of size 3x4]
>>> print(b)
[[-1.04525673 1.4730444 -1.89899576 -0.77626842]
[ 1.81549406 1.40035892 -1.5286355 1.04199517]
[ 0.6551016 1.02575183 0.11520521 -0.32391372]]
>>> a[2][2] = 1000
>>> print(b)
[[ -1.04525673e+00 1.47304440e+00 -1.89899576e+00 -7.76268423e-01]
[ 1.81549406e+00 1.40035892e+00 -1.52863550e+00 1.04199517e+00]
[ 6.55101597e-01 1.02575183e+00 1.00000000e+03 -3.23913723e-01]]
# notice that b[2][2] has changed to the value 1000 too.
```
### torch.is_tensor(obj)
Returns True if the passed-in object is a `Tensor` (of any type). Returns `False` otherwise.
### torch.is_storage(obj)
Returns True if the passed-in object is a `Storage` (of any type). Returns `False` otherwise.
### torch.expand_as
### torch.expand
### torch.view
### torch.view_as
### torch.permute
### torch.pin_memory
### copy
### split
### chunk
### tolist
### repeat
### unsqueeze
### unsqueeze_
### add, iadd, sub, isub, mul, imul, matmul, div, rdiv, idiv, mod, neg
## GPU Semantics ##
When you create a `torch.cuda.*Tensor`, it is allocated on the current GPU.
However, you could allocate it on another GPU as well, using the `with torch.cuda.device(id)` context.
All allocations within this context will be placed on the GPU `id`.
Once `Tensor`s are allocated, you can do operations on them from any GPU context, and the results
will be placed on the same device as where the source `Tensor` is located.
For example if Tensor `a` and `b` are on GPU-2, but the GPU-1 is the current device.
If one does `c = a + b`, then `c` will be on GPU-2, regardless of what the current device is.
Cross-GPU operations are not allowed. The only Cross-GPU operation allowed is `copy`.
If `a` is on GPU-1 and `b` is on GPU-2, then `c = a + b` will result in an error.
See the example for more clarity on these semantics.
```python
# Tensors are allocated on GPU 1 by default
x = torch.cuda.FloatTensor(1)
# x.get_device() == 0
y = torch.FloatTensor(1).cuda()
# y.get_device() == 0
with torch.cuda.device(1):
# allocates a tensor on GPU 2
a = torch.cuda.FloatTensor(1)
# transfers a tensor from CPU to GPU-2
b = torch.FloatTensor(1).cuda()
# a.get_device() == b.get_device() == 1
z = x + y
# z.get_device() == 1
# even within a context, you can give a GPU id to the .cuda call
c = torch.randn(2).cuda(2)
# c.get_device() == 2
```

File diff suppressed because it is too large Load Diff

View File

@ -1,83 +0,0 @@
# torch
```python
# load torch with
import torch
```
```python
# load the CUDA features of torch with
import torch.cuda
```
__torch__ is the main package where data structures for multi-dimensional
tensors and mathematical operations over these are defined.
Additionally, it provides many utilities for efficient serializing of
Tensors and arbitrary types, and other useful utilities.
It has a CUDA counterpart, that enables you to run your tensor computations
on an NVIDIA GPU with compute capability >= 2.0.
## Multi-core
### torch.get_num_threads()
Gets the number of OpenMP threads that will be used for parallelizing CPU operations
### torch.set_num_threads(n)
Sets the number of OpenMP threads to use for parallelizing CPU operations
## Serialization
### torch.save(object, file)
This function pickles a Python object to the `file`. `file` is either a filename or a file handle.
`object` can be a picklable python object, including `torch` `Tensor`s, autograd `Variable`, nn `Module`s etc.
When a group of `torch` `Tensor`s are saved together, and if any of them share the same storages, then this sharing is preserved during saving and loading back.
### torch.load(file)
This function unpickles objects that have been pickled with `torch.save`
## Random Numbers
### torch.get_rng_state()
Gets the current state of the torch Random Number Generator.
This can be passed in the future to `torch.set_rng_state` to restore the current RNG state.
### torch.set_rng_state(state)
Sets the current state of the torch Random Number Generator to the given `state`.
### torch.manual_seed(number)
Sets the initial seed of the random number generator to a given number.
### torch.initial_seed()
Returns the number that is the initial seed to the Random Number Generator
## CUDA
### torch.cuda.is_available()
Returns `True` if CUDA is available and usable. Returns `False` otherwise.
### torch.cuda.device_count()
Returns the number of CUDA devices on the system.
### torch.cuda.current_device()
Returns the device index of the current default CUDA device.
### torch.cuda.synchronize()
This function issues a `cudaDeviceSynchronize` on the current device, and hence waits for all in-flight CUDA computation to finish.
### torch.cuda.current_stream()
Returns the handle to the current stream of the CUDA context.

View File

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 18 KiB