remove old docs
|
|
@ -50,7 +50,7 @@ Elaborating further:
|
|||
|
||||
If you use numpy, then you have used Tensors (a.k.a ndarray).
|
||||
|
||||

|
||||

|
||||
|
||||
PyTorch provides Tensors that can live either on the CPU or the GPU, and accelerate
|
||||
compute by a huge amount.
|
||||
|
|
|
|||
|
|
@ -1,534 +0,0 @@
|
|||
#! /usr/bin/env python
|
||||
# encoding: utf-8
|
||||
"""
|
||||
Very lightweight docstring to Markdown converter. Modified for use in pytorch
|
||||
|
||||
|
||||
### License
|
||||
|
||||
Copyright © 2013 Thomas Gläßle <t_glaessle@gmx.de>
|
||||
|
||||
This work is free. You can redistribute it and/or modify it under the
|
||||
terms of the Do What The Fuck You Want To Public License, Version 2, as
|
||||
published by Sam Hocevar. See the COPYING file for more details.
|
||||
|
||||
This program is free software. It comes without any warranty, to the
|
||||
extent permitted by applicable law.
|
||||
|
||||
|
||||
### Description
|
||||
|
||||
Little convenience tool to extract docstrings from a module or class and
|
||||
convert them to GitHub Flavoured Markdown:
|
||||
|
||||
https://help.github.com/articles/github-flavored-markdown
|
||||
|
||||
Its purpose is to quickly generate `README.md` files for small projects.
|
||||
|
||||
|
||||
### API
|
||||
|
||||
The interface consists of the following functions:
|
||||
|
||||
- `doctrim(docstring)`
|
||||
- `doc2md(docstring, title)`
|
||||
|
||||
You can run this script from the command line like:
|
||||
|
||||
$ doc2md.py [-a] [--no-toc] [-t title] module-name [class-name] > README.md
|
||||
|
||||
|
||||
### Limitations
|
||||
|
||||
At the moment this is suited only for a very specific use case. It is
|
||||
hardly forseeable, if I will decide to improve on it in the near future.
|
||||
|
||||
"""
|
||||
import re
|
||||
import sys
|
||||
import inspect
|
||||
|
||||
__all__ = ['doctrim', 'doc2md']
|
||||
|
||||
doctrim = inspect.cleandoc
|
||||
|
||||
def unindent(lines):
|
||||
"""
|
||||
Remove common indentation from string.
|
||||
|
||||
Unlike doctrim there is no special treatment of the first line.
|
||||
|
||||
"""
|
||||
try:
|
||||
# Determine minimum indentation:
|
||||
indent = min(len(line) - len(line.lstrip())
|
||||
for line in lines if line)
|
||||
except ValueError:
|
||||
return lines
|
||||
else:
|
||||
return [line[indent:] for line in lines]
|
||||
|
||||
def escape_markdown(line):
|
||||
line = line.replace('[', '\[').replace(']', '\]')
|
||||
line = line.replace('(', '\(').replace(')', '\)')
|
||||
line = line.replace('{', '\{').replace('}', '\}')
|
||||
line = line.replace('\\', '\\\\')
|
||||
line = line.replace('`', '\`')
|
||||
line = line.replace('*', '\*')
|
||||
line = line.replace('_', '\_')
|
||||
line = line.replace('#', '\#')
|
||||
line = line.replace('+', '\+')
|
||||
line = line.replace('-', '\-')
|
||||
line = line.replace('.', '\.')
|
||||
line = line.replace('!', '\!')
|
||||
return line
|
||||
|
||||
def code_block(lines, language=''):
|
||||
"""
|
||||
Mark the code segment for syntax highlighting.
|
||||
"""
|
||||
return ['```' + language] + lines + ['```']
|
||||
|
||||
def doctest2md(lines):
|
||||
"""
|
||||
Convert the given doctest to a syntax highlighted markdown segment.
|
||||
"""
|
||||
is_only_code = True
|
||||
lines = unindent(lines)
|
||||
for line in lines:
|
||||
if not line.startswith('>>> ') and not line.startswith('... ') and line not in ['>>>', '...']:
|
||||
is_only_code = False
|
||||
break
|
||||
if is_only_code:
|
||||
orig = lines
|
||||
lines = []
|
||||
for line in orig:
|
||||
lines.append(line[4:])
|
||||
return lines
|
||||
|
||||
def doc_code_block(lines, language):
|
||||
if language == 'python':
|
||||
lines = doctest2md(lines)
|
||||
return code_block(lines, language)
|
||||
|
||||
_args_section = re.compile('^\s*Args:\s*')
|
||||
def is_args_check(line):
|
||||
return _args_section.match(line)
|
||||
|
||||
def args_block(lines):
|
||||
out = ['']
|
||||
out += ['Parameter | Default | Description']
|
||||
out += ['--------- | ------- | -----------']
|
||||
for line in lines:
|
||||
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
|
||||
assert matches != None
|
||||
name = matches[0][0]
|
||||
description = matches[0][1]
|
||||
default = matches[0][3]
|
||||
out += [name + ' | ' + default + ' | ' + description]
|
||||
return out
|
||||
|
||||
# Inputs
|
||||
_inputs_section = re.compile('^\s*Inputs:\s*(.*)\s*')
|
||||
def is_inputs_check(line):
|
||||
return _inputs_section.match(line)
|
||||
|
||||
def inputs_block(lines):
|
||||
out = ['']
|
||||
out += ['Parameter | Default | Description']
|
||||
out += ['--------- | ------- | -----------']
|
||||
for line in lines:
|
||||
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
|
||||
assert matches != None
|
||||
name = matches[0][0]
|
||||
description = matches[0][1]
|
||||
default = matches[0][3]
|
||||
out += [name + ' | ' + default + ' | ' + description]
|
||||
return out
|
||||
|
||||
# Outputs
|
||||
_outputs_section = re.compile('^\s*Outputs:\s*(.*)\s*')
|
||||
def is_outputs_check(line):
|
||||
return _outputs_section.match(line)
|
||||
|
||||
def outputs_block(lines):
|
||||
out = ['']
|
||||
out += ['Parameter | Description']
|
||||
out += ['--------- | -----------']
|
||||
for line in lines:
|
||||
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
|
||||
assert matches != None
|
||||
name = matches[0][0]
|
||||
description = matches[0][1]
|
||||
default = matches[0][3]
|
||||
out += [name + ' | ' + description]
|
||||
return out
|
||||
|
||||
# Members
|
||||
_members_section = re.compile('^\s*Members:\s*(.*)\s*')
|
||||
def is_members_check(line):
|
||||
return _members_section.match(line)
|
||||
|
||||
def members_block(lines):
|
||||
out = ['']
|
||||
out += ['Parameter | Description']
|
||||
out += ['--------- | -----------']
|
||||
for line in lines:
|
||||
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
|
||||
assert matches != None
|
||||
name = matches[0][0]
|
||||
description = matches[0][1]
|
||||
default = matches[0][3]
|
||||
out += [name + ' | ' + description]
|
||||
return out
|
||||
|
||||
_returns_section = re.compile('^\s*Returns:\s*')
|
||||
def is_returns_check(line):
|
||||
return _returns_section.match(line)
|
||||
|
||||
_image_section = re.compile('^\s*Image:\s*')
|
||||
def is_image_check(line):
|
||||
return _image_section.match(line)
|
||||
|
||||
_example_section = re.compile('^\s*Returns:\s*|^\s*Examples:\s*')
|
||||
def is_example_check(line):
|
||||
return _example_section.match(line)
|
||||
|
||||
_inputshape_section = re.compile('^\s*Returns:\s*|^\s*Input Shape:\s*')
|
||||
def is_inputshape_check(line):
|
||||
return _inputshape_section.match(line)
|
||||
|
||||
_outputshape_section = re.compile('^\s*Returns:\s*|^\s*Output Shape:\s*')
|
||||
def is_outputshape_check(line):
|
||||
return _outputshape_section.match(line)
|
||||
###############################################
|
||||
_reg_section = re.compile('^#+ ')
|
||||
def is_heading(line):
|
||||
return _reg_section.match(line)
|
||||
|
||||
def get_heading(line):
|
||||
assert is_heading(line)
|
||||
part = line.partition(' ')
|
||||
return len(part[0]), part[2]
|
||||
|
||||
def make_heading(level, title):
|
||||
return '#'*max(level, 1) + ' ' + title
|
||||
|
||||
def find_sections(lines):
|
||||
"""
|
||||
Find all section names and return a list with their names.
|
||||
"""
|
||||
sections = []
|
||||
for line in lines:
|
||||
if is_heading(line):
|
||||
sections.append(get_heading(line))
|
||||
return sections
|
||||
|
||||
def make_toc(sections):
|
||||
"""
|
||||
Generate table of contents for array of section names.
|
||||
"""
|
||||
if not sections:
|
||||
return []
|
||||
outer = min(n for n,t in sections)
|
||||
refs = []
|
||||
for ind,sec in sections:
|
||||
ref = sec.lower()
|
||||
ref = ref.replace(' ', '-')
|
||||
ref = ref.replace('?', '')
|
||||
refs.append(" "*(ind-outer) + "- [%s](#%s)" % (sec, ref))
|
||||
return refs
|
||||
|
||||
def _doc2md(lines, shiftlevel=0):
|
||||
_doc2md.md = []
|
||||
_doc2md.is_code = False
|
||||
_doc2md.is_code_block = False
|
||||
_doc2md.is_args = False
|
||||
_doc2md.is_inputs = False
|
||||
_doc2md.is_outputs = False
|
||||
_doc2md.is_members = False
|
||||
_doc2md.is_returns = False
|
||||
_doc2md.is_inputshape = False
|
||||
_doc2md.is_outputshape = False
|
||||
_doc2md.code = []
|
||||
def reset():
|
||||
if _doc2md.is_code:
|
||||
_doc2md.is_code = False
|
||||
_doc2md.code += doc_code_block(code, 'python')
|
||||
_doc2md.code += ['']
|
||||
if _doc2md.is_code_block:
|
||||
_doc2md.is_code_block = False
|
||||
_doc2md.code += doc_code_block(code_block, 'python')
|
||||
_doc2md.code += ['']
|
||||
|
||||
if _doc2md.is_args:
|
||||
_doc2md.is_args = False
|
||||
_doc2md.md += args_block(args)
|
||||
|
||||
if _doc2md.is_inputs:
|
||||
_doc2md.is_inputs = False
|
||||
_doc2md.md += inputs_block(inputs)
|
||||
|
||||
if _doc2md.is_outputs:
|
||||
_doc2md.is_outputs = False
|
||||
_doc2md.md += outputs_block(outputs)
|
||||
|
||||
if _doc2md.is_members:
|
||||
_doc2md.is_members = False
|
||||
_doc2md.md += members_block(members)
|
||||
|
||||
if _doc2md.is_returns:
|
||||
_doc2md.is_returns = False
|
||||
_doc2md.md += returns
|
||||
|
||||
_doc2md.is_inputshape = False
|
||||
_doc2md.is_outputshape = False
|
||||
|
||||
for line in lines:
|
||||
trimmed = line.lstrip()
|
||||
if is_args_check(line):
|
||||
reset()
|
||||
_doc2md.is_args = True
|
||||
_doc2md.md += ['']
|
||||
_doc2md.md += ['#' * (shiftlevel+2) + ' Constructor Arguments']
|
||||
args = []
|
||||
elif is_inputs_check(line):
|
||||
reset()
|
||||
_doc2md.is_inputs = True
|
||||
_doc2md.md += ['']
|
||||
_doc2md.md += ['#' * (shiftlevel+2) + ' Inputs']
|
||||
inputs = []
|
||||
elif is_outputs_check(line):
|
||||
reset()
|
||||
_doc2md.is_outputs = True
|
||||
_doc2md.md += ['']
|
||||
_doc2md.md += ['#' * (shiftlevel+2) + ' Outputs']
|
||||
outputs = []
|
||||
elif is_members_check(line):
|
||||
reset()
|
||||
_doc2md.is_members = True
|
||||
_doc2md.md += ['']
|
||||
_doc2md.md += ['#' * (shiftlevel+2) + ' Members']
|
||||
members = []
|
||||
elif is_returns_check(line):
|
||||
reset()
|
||||
_doc2md.is_returns = True
|
||||
_doc2md.md += ['']
|
||||
_doc2md.md += ['#' * (shiftlevel+2) + ' Returns']
|
||||
returns = []
|
||||
elif is_example_check(line):
|
||||
reset()
|
||||
elif is_inputshape_check(line):
|
||||
reset()
|
||||
inputshape = re.findall(r'\s*Input\sShape:\s*(.*)\s*:\s*(.*)\s*$', line)[0]
|
||||
elif is_outputshape_check(line):
|
||||
reset()
|
||||
outputshape = re.findall(r'\s*Output\sShape:\s*(.*)\s*:\s*(.*)\s*$', line)[0]
|
||||
_doc2md.md += ['']
|
||||
_doc2md.md += ['#' * (shiftlevel+2) + ' Expected Shape']
|
||||
_doc2md.md += [' | Shape | Description ']
|
||||
_doc2md.md += ['------ | ----- | ------------']
|
||||
_doc2md.md += [' input | ' + inputshape[0] + ' | ' + inputshape[1]]
|
||||
_doc2md.md += ['output | ' + outputshape[0] + ' | ' + outputshape[1]]
|
||||
elif is_image_check(line):
|
||||
reset()
|
||||
_doc2md.md += ['']
|
||||
filename = re.findall(r'\s*Image:\s*(.*?)\s*$', line)
|
||||
_doc2md.md += ['<img src="image/' + filename[0] + '" >']
|
||||
elif _doc2md.is_code == False and trimmed.startswith('>>> '):
|
||||
reset()
|
||||
_doc2md.is_code = True
|
||||
code = [line]
|
||||
elif _doc2md.is_code_block == False and trimmed.startswith('```'):
|
||||
reset()
|
||||
_doc2md.is_code_block = True
|
||||
code_block = []
|
||||
elif _doc2md.is_code_block == True and trimmed.startswith('```'):
|
||||
# end of code block
|
||||
reset()
|
||||
elif _doc2md.is_code_block:
|
||||
if line:
|
||||
code_block.append(line)
|
||||
else:
|
||||
reset()
|
||||
elif shiftlevel != 0 and is_heading(line):
|
||||
reset()
|
||||
level, title = get_heading(line)
|
||||
_doc2md.md += [make_heading(level + shiftlevel, title)]
|
||||
elif _doc2md.is_args:
|
||||
if line:
|
||||
args.append(line)
|
||||
else:
|
||||
reset()
|
||||
elif _doc2md.is_inputs:
|
||||
if line:
|
||||
inputs.append(line)
|
||||
else:
|
||||
reset()
|
||||
elif _doc2md.is_outputs:
|
||||
if line:
|
||||
outputs.append(line)
|
||||
else:
|
||||
reset()
|
||||
elif _doc2md.is_members:
|
||||
if line:
|
||||
members.append(line)
|
||||
else:
|
||||
reset()
|
||||
elif _doc2md.is_returns:
|
||||
if line:
|
||||
returns.append(line)
|
||||
else:
|
||||
reset()
|
||||
elif _doc2md.is_code:
|
||||
if line:
|
||||
code.append(line)
|
||||
else:
|
||||
reset()
|
||||
else:
|
||||
reset()
|
||||
_doc2md.md += [line]
|
||||
reset()
|
||||
_doc2md.code += _doc2md.md
|
||||
return _doc2md.code
|
||||
|
||||
def doc2md(docstr, title, min_level=3, more_info=False, toc=True):
|
||||
"""
|
||||
Convert a docstring to a markdown text.
|
||||
"""
|
||||
text = doctrim(docstr)
|
||||
lines = text.split('\n')
|
||||
|
||||
sections = find_sections(lines)
|
||||
if sections:
|
||||
level = min(n for n,t in sections) - 1
|
||||
else:
|
||||
level = 1
|
||||
|
||||
shiftlevel = 0
|
||||
if level < min_level:
|
||||
shiftlevel = min_level - level
|
||||
level = min_level
|
||||
sections = [(lev+shiftlevel, tit) for lev,tit in sections]
|
||||
|
||||
md = [
|
||||
make_heading(level, title),
|
||||
"",
|
||||
lines.pop(0),
|
||||
""
|
||||
]
|
||||
if toc:
|
||||
md += make_toc(sections)
|
||||
md += _doc2md(lines, shiftlevel)
|
||||
if more_info:
|
||||
return (md, sections)
|
||||
else:
|
||||
return "\n".join(md)
|
||||
|
||||
def mod2md(module, title, title_api_section, toc=True):
|
||||
"""
|
||||
Generate markdown document from module, including API section.
|
||||
"""
|
||||
docstr = module.__doc__ or " "
|
||||
|
||||
text = doctrim(docstr)
|
||||
lines = text.split('\n')
|
||||
|
||||
sections = find_sections(lines)
|
||||
if sections:
|
||||
level = min(n for n,t in sections) - 1
|
||||
else:
|
||||
level = 1
|
||||
|
||||
api_md = []
|
||||
api_sec = []
|
||||
if title_api_section :
|
||||
# sections.append((level+1, title_api_section))
|
||||
for name, entry in iter(module.__dict__.items()):
|
||||
if name[0] != '_' and entry.__doc__:
|
||||
#api_sec.append((level+1, name))
|
||||
#api_md += ['', '']
|
||||
if entry.__doc__:
|
||||
md, sec = doc2md(entry.__doc__, name,
|
||||
min_level=level+1, more_info=True, toc=False)
|
||||
api_sec += sec
|
||||
api_md += md
|
||||
|
||||
sections += api_sec
|
||||
|
||||
# headline
|
||||
md = [
|
||||
make_heading(level, title),
|
||||
"",
|
||||
lines.pop(0),
|
||||
""
|
||||
]
|
||||
|
||||
# main sections
|
||||
if toc:
|
||||
md += make_toc(sections)
|
||||
md += _doc2md(lines)
|
||||
|
||||
if toc:
|
||||
md += ['']
|
||||
md += make_toc(api_sec)
|
||||
md += api_md
|
||||
|
||||
return "\n".join(md)
|
||||
|
||||
def main(args=None):
|
||||
# parse the program arguments
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert docstrings to markdown.')
|
||||
|
||||
parser.add_argument(
|
||||
'module', help='The module containing the docstring.')
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument(
|
||||
'entry', nargs='?',
|
||||
help='Convert only docstring of this entry in module.')
|
||||
group.add_argument(
|
||||
'-a', '--all', dest='all', action='store_true',
|
||||
help='Create an API section with the contents of module.__all__.')
|
||||
parser.add_argument(
|
||||
'-t', '--title', dest='title',
|
||||
help='Document title (default is module name)')
|
||||
parser.add_argument(
|
||||
'--no-toc', dest='toc', action='store_false', default=True,
|
||||
help='Do not automatically generate the TOC')
|
||||
args = parser.parse_args(args)
|
||||
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
|
||||
def add_path(*pathes):
|
||||
for path in reversed(pathes):
|
||||
if path not in sys.path:
|
||||
sys.path.insert(0, path)
|
||||
|
||||
file = inspect.getfile(inspect.currentframe())
|
||||
add_path(os.path.realpath(os.path.abspath(os.path.dirname(file))))
|
||||
add_path(os.getcwd())
|
||||
|
||||
mod_name = args.module
|
||||
if mod_name.endswith('.py'):
|
||||
mod_name = mod_name.rsplit('.py', 1)[0]
|
||||
title = args.title or mod_name.replace('_', '-')
|
||||
|
||||
module = importlib.import_module(mod_name)
|
||||
|
||||
if args.all:
|
||||
print(mod2md(module, title, 'API', toc=args.toc))
|
||||
|
||||
else:
|
||||
if args.entry:
|
||||
docstr = module.__dict__[args.entry].__doc__ or ''
|
||||
else:
|
||||
docstr = module.__doc__ or ''
|
||||
|
||||
print(doc2md(docstr, title, toc=args.toc))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,100 +0,0 @@
|
|||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
pushd $SCRIPT_DIR
|
||||
|
||||
# module
|
||||
#python doc2md.py torch.nn Module --title Module --no-toc >../nn_module.md
|
||||
|
||||
# containers
|
||||
echo "## Containers" > ../nn_container.md
|
||||
python doc2md.py torch.nn Container --title Container --no-toc >>../nn_container.md
|
||||
python doc2md.py torch.nn Sequential --title Sequential --no-toc >>../nn_container.md
|
||||
|
||||
# convolution
|
||||
echo "## Convolution Layers" > ../nn_convolution.md
|
||||
echo Conv1d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
|
||||
echo Conv2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
|
||||
echo ConvTranspose2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
|
||||
echo Conv3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
|
||||
echo ConvTranspose3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
|
||||
|
||||
# pooling
|
||||
echo "## Pooling Layers" > ../nn_pooling.md
|
||||
echo MaxPool1d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
|
||||
echo MaxPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
|
||||
echo MaxPool3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
|
||||
echo MaxUnpool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
|
||||
echo MaxUnpool3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
|
||||
echo AvgPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
|
||||
echo AvgPool3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
|
||||
echo FractionalMaxPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
|
||||
echo LPPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
|
||||
|
||||
# activations
|
||||
echo "## Non-linearities" > ../nn_activation.md
|
||||
echo ReLU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo ReLU6 | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Threshold | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Hardtanh | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Sigmoid | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Tanh | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo ELU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo LeakyReLU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo LogSigmoid | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Softplus | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Softshrink | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo PReLU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Softsign | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Tanhshrink | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Softmin | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Softmax | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo Softmax2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
echo LogSoftmax | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
|
||||
|
||||
# normalization
|
||||
echo "## Normalization layers" > ../nn_normalization.md
|
||||
echo BatchNorm1d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_normalization.md
|
||||
echo BatchNorm2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_normalization.md
|
||||
echo BatchNorm3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_normalization.md
|
||||
|
||||
# recurrentnet
|
||||
echo "## Recurrent layers" > ../nn_recurrent.md
|
||||
echo RNN | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
|
||||
echo LSTM | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
|
||||
echo GRU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
|
||||
echo RNNCell | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
|
||||
echo LSTMCell | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
|
||||
echo GRUCell | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
|
||||
|
||||
# linear
|
||||
echo "## Linear layers" > ../nn_linear.md
|
||||
echo Linear | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_linear.md
|
||||
|
||||
# dropout
|
||||
echo "## Dropout layers" > ../nn_dropout.md
|
||||
echo Dropout | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_dropout.md
|
||||
echo Dropout2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_dropout.md
|
||||
echo Dropout3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_dropout.md
|
||||
|
||||
# Sparse
|
||||
echo "## Sparse layers" > ../nn_sparse.md
|
||||
echo Embedding | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_sparse.md
|
||||
|
||||
# loss_functions
|
||||
echo "## Loss functions" > ../nn_loss.md
|
||||
echo L1Loss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo MSELoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo CrossEntropyLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo NLLLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo NLLLoss2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo KLDivLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo BCELoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo MarginRankingLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo HingeEmbeddingLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo MultiLabelMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo SmoothL1Loss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo SoftMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo MultiLabelSoftMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo CosineEmbeddingLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
echo MultiMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
|
||||
|
||||
popd
|
||||
|
|
@ -1,143 +0,0 @@
|
|||
import sys
|
||||
from tools.cwrap import cwrap
|
||||
from tools.cwrap.plugins import CWrapPlugin
|
||||
from string import Template
|
||||
import sys
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
|
||||
def transform_defined_if(defined_if):
|
||||
if defined_if != None:
|
||||
defined_if = defined_if.replace('defined(TH_REAL_IS_FLOAT)', 'Float')
|
||||
defined_if = defined_if.replace('defined(TH_REAL_IS_DOUBLE)', 'Double')
|
||||
defined_if = defined_if.replace('defined(TH_REAL_IS_BYTE)', 'Byte')
|
||||
defined_if = defined_if.replace('defined(TH_REAL_IS_CHAR)', 'Char')
|
||||
defined_if = defined_if.replace('defined(TH_REAL_IS_INT)', 'Int')
|
||||
defined_if = defined_if.replace('defined(TH_REAL_IS_LONG)', 'Long')
|
||||
defined_if = defined_if.replace('defined(NUMPY_TYPE_ENUM)',
|
||||
'Byte // Short // Int // Long // Float // Double')
|
||||
defined_if = defined_if.replace('CUDA_INT', 'Cuda_Int')
|
||||
defined_if = defined_if.replace('CUDA_LONG', 'Cuda_Long')
|
||||
defined_if = defined_if.replace('CUDA_FLOAT', 'Cuda_Float')
|
||||
defined_if = defined_if.replace('CUDA_DOUBLE', 'Cuda_Double')
|
||||
defined_if = defined_if.replace('CUDA_HALF', 'Cuda_Half')
|
||||
defined_if = defined_if.replace('!IS_CUDA', 'All CPU Types')
|
||||
else:
|
||||
defined_if = "All Types (CPU and CUDA)"
|
||||
defined_if = defined_if.replace('||', '//')
|
||||
return defined_if
|
||||
|
||||
class DocGen(CWrapPlugin):
|
||||
def __init__(self):
|
||||
self.declarations = {}
|
||||
|
||||
def process_declarations(self, declarations):
|
||||
self.declarations.update({declaration['name']: declaration for declaration in declarations})
|
||||
# self.declarations += declarations
|
||||
return declarations
|
||||
|
||||
def get_wrapper_template(self, declaration):
|
||||
return Template("")
|
||||
|
||||
def get_type_check(self, arg, option):
|
||||
return Template("")
|
||||
|
||||
def get_type_unpack(self, arg, option):
|
||||
return Template("")
|
||||
|
||||
def get_return_wrapper(self, option):
|
||||
return Template("")
|
||||
|
||||
def print_declarations(self):
|
||||
print("# torch.Tensor")
|
||||
for name, declarations in sorted(self.declarations.items()):
|
||||
if name.endswith('_') and name[:-1] in self.declarations:
|
||||
continue
|
||||
if not name.endswith('_') and name + '_' in self.declarations:
|
||||
inplace = True
|
||||
else:
|
||||
inplace = False
|
||||
|
||||
pname = declarations['options'][0].get('python_name', None)
|
||||
if pname != None:
|
||||
name = pname
|
||||
if name.startswith('_'):
|
||||
continue
|
||||
|
||||
# START PRINTING MARKDOWN
|
||||
print("## " + name + " \n")
|
||||
print("| %-25s | %-8s | %-25s |" % ("Name", "Autograd", "defined if"))
|
||||
print("| " + ('-' * 28) + " | " + ('-' * 11) + " | "+ ('-' * 28) + " |")
|
||||
if inplace:
|
||||
sys.stdout.write("| %-25s" % (name + ' // ' + name + "_"))
|
||||
else:
|
||||
sys.stdout.write("| %-25s" % name)
|
||||
sys.stdout.write(' | ')
|
||||
if hasattr(Variable(torch.randn(10)), name):
|
||||
sys.stdout.write(' %9s ' % 'yes') # + ' ' + name)
|
||||
else:
|
||||
sys.stdout.write(' %9s ' % 'no') # + ' ' + name)
|
||||
defined_if = declarations.get('defined_if', None)
|
||||
defined_if = transform_defined_if(defined_if)
|
||||
sys.stdout.write(' | ')
|
||||
sys.stdout.write(defined_if)
|
||||
sys.stdout.write(' |')
|
||||
sys.stdout.write('\n\n')
|
||||
#if inplace:
|
||||
# print('Inplace Exists : True')
|
||||
#sys.stdout.write('Arguments : ')
|
||||
|
||||
args = declarations['options'][0]['arguments']
|
||||
if len(args) == 0:
|
||||
print( '**No Arguments**\n' )
|
||||
else:
|
||||
print( '**Arguments**\n' )
|
||||
print("| %-15s | %-12s | %-15s |" % ("Name", "Type", "Default"))
|
||||
print("| " + ('-' * 18) + " | " + ('-' * 15) + " | "+ ('-' * 18) + " |")
|
||||
|
||||
for arg in args:
|
||||
type_ = arg['type']
|
||||
if type_ == 'THGenerator*':
|
||||
continue
|
||||
if type_ == 'THTensor*':
|
||||
type_ = 'Tensor'
|
||||
if type_ == 'THIndexTensor*':
|
||||
type_ = 'LongTensor'
|
||||
if type_ == 'THBoolTensor*':
|
||||
type_ = 'ByteTensor'
|
||||
if type_ == 'THLongTensor*':
|
||||
type_ = 'LongTensor'
|
||||
if type_ == 'THLongStorage*':
|
||||
type_ = 'LongStorage'
|
||||
default = arg.get('default', None)
|
||||
allocated = arg.get('allocate', None)
|
||||
if default == None and allocated == None:
|
||||
default = " [required]"
|
||||
elif allocated != None:
|
||||
default = " [optional]"
|
||||
else:
|
||||
default = str(default)
|
||||
import re
|
||||
m = re.search('\s*AS_REAL\((.+)\)\s*', default)
|
||||
if m:
|
||||
default = m.group(1)
|
||||
default = default
|
||||
|
||||
print('| %15s | %12s | %10s |' % (arg['name'], type_, default))
|
||||
# print( 'Options : ' )
|
||||
# print(declarations['options'][0])
|
||||
print('')
|
||||
if declarations['return']:
|
||||
return_ = declarations['return']
|
||||
if return_ == 'THTensor*':
|
||||
return_ = 'Tensor'
|
||||
if return_ == 'void':
|
||||
return_ = 'nothing'
|
||||
print( '**Returns : ' + return_ + '**')
|
||||
print('')
|
||||
|
||||
|
||||
docs = DocGen()
|
||||
cwrap('../../torch/csrc/generic/TensorMethods.cwrap', plugins=[docs])
|
||||
|
||||
docs.print_declarations()
|
||||
|
Before Width: | Height: | Size: 5.8 KiB |
|
Before Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 6.0 KiB |
|
Before Width: | Height: | Size: 5.4 KiB |
|
Before Width: | Height: | Size: 5.8 KiB |
|
Before Width: | Height: | Size: 8.9 KiB |
|
Before Width: | Height: | Size: 8.5 KiB |
|
Before Width: | Height: | Size: 6.4 KiB |
|
Before Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 20 KiB |
|
Before Width: | Height: | Size: 12 KiB |
|
Before Width: | Height: | Size: 6.4 KiB |
|
Before Width: | Height: | Size: 6.4 KiB |
|
Before Width: | Height: | Size: 6.1 KiB |
|
Before Width: | Height: | Size: 6.3 KiB |
|
Before Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 6.7 KiB |
|
Before Width: | Height: | Size: 5.9 KiB |
|
Before Width: | Height: | Size: 6.8 KiB |
|
Before Width: | Height: | Size: 5.4 KiB |
|
Before Width: | Height: | Size: 7.2 KiB |
|
|
@ -1,3 +0,0 @@
|
|||
# torch.nn
|
||||
|
||||
Neural Networks in PyTorch
|
||||
|
|
@ -1,496 +0,0 @@
|
|||
## Non-linearities
|
||||
### ReLU
|
||||
|
||||
Applies the rectified linear unit function element-wise ReLU(x)= max(0,x)
|
||||
|
||||
```python
|
||||
m = nn.ReLU()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
inplace | | can optionally do the operation in-place
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/relu.png" >
|
||||
### ReLU6
|
||||
|
||||
Applies the element-wise function ReLU6(x) = min( max(0,x), 6)
|
||||
|
||||
```python
|
||||
m = nn.ReLU6()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
inplace | | can optionally do the operation in-place
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/relu6.png" >
|
||||
### Threshold
|
||||
|
||||
Thresholds each element of the input Tensor
|
||||
|
||||
```python
|
||||
m = nn.Threshold(0.1, 20)
|
||||
input = Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
Threshold is defined as:
|
||||
y = x if x >= threshold
|
||||
value if x < threshold
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
threshold | | The value to threshold at
|
||||
value | | The value to replace with
|
||||
inplace | | can optionally do the operation in-place
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
Tensor of same dimension and shape as the input
|
||||
### Hardtanh
|
||||
|
||||
Applies the HardTanh function element-wise
|
||||
|
||||
```python
|
||||
m = nn.HardTanh(-2, 2)
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
HardTanh is defined as:
|
||||
f(x) = +1, if x > 1
|
||||
f(x) = -1, if x < -1
|
||||
f(x) = x, otherwise
|
||||
The range of the linear region [-1, 1] can be adjusted
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
min_value | | minimum value of the linear region range
|
||||
max_value | | maximum value of the linear region range
|
||||
inplace | | can optionally do the operation in-place
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/htanh.png" >
|
||||
### Sigmoid
|
||||
|
||||
Applies the element-wise function sigmoid(x) = 1 / ( 1 + exp(-x))
|
||||
|
||||
```python
|
||||
m = nn.Sigmoid()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/sigmoid.png" >
|
||||
### Tanh
|
||||
|
||||
Applies element-wise, Tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
|
||||
|
||||
```python
|
||||
m = nn.Tanh()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/tanh.png" >
|
||||
### ELU
|
||||
|
||||
Applies element-wise, ELU(x) = max(0,x) + min(0, alpha * (exp(x) - 1))
|
||||
|
||||
```python
|
||||
m = nn.ELU()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
alpha | 1.0 | the alpha value for the ELU formulation.
|
||||
inplace | | can optionally do the operation in-place
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/elu.png" >
|
||||
### LeakyReLU
|
||||
|
||||
Applies element-wise, f(x) = max(0, x) + negative_slope * min(0, x)
|
||||
|
||||
```python
|
||||
m = nn.LeakyReLU(0.1)
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
negative_slope | 1e-2 | Controls the angle of the negative slope.
|
||||
inplace | | can optionally do the operation in-place
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
### LogSigmoid
|
||||
|
||||
Applies element-wise LogSigmoid(x) = log( 1 / (1 + exp(-x_i)))
|
||||
|
||||
```python
|
||||
m = nn.LogSigmoid()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/logsigmoid.png" >
|
||||
### Softplus
|
||||
|
||||
Applies element-wise SoftPlus(x) = 1/beta * log(1 + exp(beta * x_i))
|
||||
|
||||
```python
|
||||
m = nn.Softplus()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
SoftPlus is a smooth approximation to the ReLU function and can be used
|
||||
to constrain the output of a machine to always be positive.
|
||||
For numerical stability the implementation reverts to the linear function
|
||||
for inputs above a certain value.
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
beta | 1 | the beta value for the Softplus formulation.
|
||||
threshold | 20 | values above this revert to a linear function.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/softplus.png" >
|
||||
### Softshrink
|
||||
|
||||
Applies the soft shrinkage function elementwise
|
||||
|
||||
```python
|
||||
m = nn.Softshrink()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
SoftShrinkage operator is defined as:
|
||||
f(x) = x-lambda, if x > lambda > f(x) = x+lambda, if x < -lambda
|
||||
f(x) = 0, otherwise
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
lambd | 0.5 | the lambda value for the Softshrink formulation.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/sshrink.png" >
|
||||
### PReLU
|
||||
|
||||
Applies element-wise the function PReLU(x) = max(0,x) + a * min(0,x)
|
||||
|
||||
```python
|
||||
m = nn.PReLU()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
Here "a" is a learnable parameter.
|
||||
When called without arguments, nn.PReLU() uses a single parameter "a"
|
||||
across all input channels. If called with nn.PReLU(nChannels), a separate
|
||||
"a" is used for each input channel.
|
||||
Note that weight decay should not be used when learning "a" for good
|
||||
performance.
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
num_parameters | 1 | number of "a" to learn.
|
||||
init | 0.25 | the initial value of "a".
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/prelu.png" >
|
||||
### Softsign
|
||||
|
||||
Applies element-wise, the function Softsign(x) = x / (1 + |x|)
|
||||
|
||||
```python
|
||||
m = nn.Softsign()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
|
||||
<img src="image/softsign.png" >
|
||||
### Tanhshrink
|
||||
|
||||
Applies element-wise, Tanhshrink(x) = x - Tanh(x)
|
||||
|
||||
```python
|
||||
m = nn.Tanhshrink()
|
||||
input = autograd.Variable(torch.randn(2))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Tensor of any size and dimension
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input
|
||||
### Softmin
|
||||
|
||||
Applies the Softmin function to an n-dimensional input Tensor
|
||||
|
||||
```python
|
||||
m = nn.Softmin()
|
||||
input = autograd.Variable(torch.randn(2, 3))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
rescaling them so that the elements of the n-dimensional output Tensor
|
||||
lie in the range (0,1) and sum to 1
|
||||
Softmin(x) = exp(-x_i - shift) / sum_j exp(-x_j - shift)
|
||||
where shift = max_i - x_i
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * ] | 2D Tensor of any size
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input, with
|
||||
values in the range [0, 1]
|
||||
|
||||
<img src="image/softmin.png" >
|
||||
### Softmax
|
||||
|
||||
Applies the Softmax function to an n-dimensional input Tensor
|
||||
|
||||
```python
|
||||
m = nn.Softmax()
|
||||
input = autograd.Variable(torch.randn(2, 3))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
rescaling them so that the elements of the n-dimensional output Tensor
|
||||
lie in the range (0,1) and sum to 1
|
||||
|
||||
Softmax is defined as f_i(x) = exp(x_i - shift) / sum_j exp(x_j - shift)
|
||||
where shift = max_i x_i
|
||||
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * ] | 2D Tensor of any size
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input with
|
||||
values in the range [0, 1]
|
||||
|
||||
<img src="image/softmax.png" >
|
||||
Notes:
|
||||
Note that this module doesn't work directly with NLLLoss,
|
||||
which expects the Log to be computed between the Softmax and itself.
|
||||
Use Logsoftmax instead (it's faster).
|
||||
### Softmax2d
|
||||
|
||||
Applies SoftMax over features to each spatial location
|
||||
|
||||
```python
|
||||
m = nn.Softmax2d()
|
||||
# you softmax over the 2nd dimension
|
||||
input = autograd.Variable(torch.randn(2, 3, 12, 13))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
When given an image of Channels x Height x Width, it will
|
||||
apply Softmax to each location [Channels, h_i, w_j]
|
||||
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * , * , * ] | 4D Tensor of any size
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input with
|
||||
values in the range [0, 1]
|
||||
### LogSoftmax
|
||||
|
||||
Applies the Log(Softmax(x)) function to an n-dimensional input Tensor.
|
||||
|
||||
```python
|
||||
m = nn.LogSoftmax()
|
||||
input = autograd.Variable(torch.randn(2, 3))
|
||||
print(input)
|
||||
print(m(input))
|
||||
```
|
||||
|
||||
The LogSoftmax formulation can be simplified as
|
||||
f_i(x) = log(1 / a * exp(x_i)) where a = sum_j exp(x_j) .
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * ] | 2D Tensor of any size
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a Tensor of the same dimension and shape as the input with
|
||||
values in the range [-inf, 0)
|
||||
|
||||
<img src="image/logsoftmax.png" >
|
||||
|
|
@ -1,136 +0,0 @@
|
|||
## Containers
|
||||
### Container
|
||||
|
||||
This is the base container class for all neural networks you would define.
|
||||
|
||||
```python
|
||||
# Example of using Container
|
||||
class Net(nn.Container):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__(
|
||||
conv1 = nn.Conv2d(1, 20, 5),
|
||||
relu = nn.ReLU()
|
||||
)
|
||||
def forward(self, input):
|
||||
output = self.relu(self.conv1(x))
|
||||
return output
|
||||
model = Net()
|
||||
```
|
||||
|
||||
```python
|
||||
# one can add modules to the container after construction
|
||||
model.add_module('pool1', nn.MaxPool2d(2, 2))
|
||||
```
|
||||
|
||||
```python
|
||||
```
|
||||
|
||||
```python
|
||||
# .parameters()
|
||||
```
|
||||
|
||||
```python
|
||||
>>> for param in model.parameters():
|
||||
>>> print(type(param.data), param.size())
|
||||
<class 'torch.FloatTensor'> (20L,)
|
||||
<class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
|
||||
```
|
||||
|
||||
```python
|
||||
```
|
||||
|
||||
```python
|
||||
# .state_dict()
|
||||
```
|
||||
|
||||
```python
|
||||
>>> pdict = model.state_dict()
|
||||
>>> print(sdict.keys())
|
||||
['conv1.bias', 'conv1.weight']
|
||||
```
|
||||
|
||||
```python
|
||||
```
|
||||
|
||||
You will subclass your container from this class.
|
||||
In the constructor you define the modules that you would want to use,
|
||||
and in the "forward" function you use the constructed modules in
|
||||
your operations.
|
||||
|
||||
To make it easier to understand, given is a small example.
|
||||
|
||||
One can also add new modules to a container after construction.
|
||||
You can do this with the add_module function
|
||||
or by assigning them as Container attributes.
|
||||
|
||||
#### one can also set modules as attributes of the container
|
||||
model.conv1 = nn.Conv2d(12, 24, 3)
|
||||
The container has some important additional methods:
|
||||
|
||||
**`[generator] parameters()`**
|
||||
|
||||
returns a generator over all learnable parameters in the container instance.
|
||||
This can typically be passed to the optimizer API
|
||||
|
||||
**`[dict] state_dict()`**
|
||||
|
||||
returns a dictionary of learnable parameters of the Container.
|
||||
For example: ['conv1.weight' : Parameter(torch.FloatTensor(20x1x5x5)),
|
||||
'conv1.bias' : Parameter(torch.FloatTensor(20)),
|
||||
]
|
||||
|
||||
|
||||
**`load_state_dict(dict)`**
|
||||
|
||||
Given a parameter dict, sets the parameters of self to be the given dict.
|
||||
It loads loads the parameters recursively.
|
||||
Excessive or non-matching parameter names are ignored.
|
||||
For example, the input dict has an entry 'conv44.weight', but
|
||||
if the container does not have a module named 'conv44', then this entry is ignored.
|
||||
|
||||
**`children()`**
|
||||
|
||||
Returns a generator over all the children modules of self
|
||||
|
||||
**`train()`**
|
||||
|
||||
Sets the Container (and all it's child modules) to training mode (for modules such as batchnorm, dropout etc.)
|
||||
|
||||
**`eval()`**
|
||||
|
||||
Sets the Container (and all it's child modules) to evaluate mode (for modules such as batchnorm, dropout etc.)
|
||||
|
||||
**`apply(closure)`**
|
||||
|
||||
Applies the given closure to each parameter of the container.
|
||||
|
||||
|
||||
**__Note: Apart from these, the container will define the base functions that it has derived from nn.Module __**
|
||||
### Sequential
|
||||
|
||||
A sequential Container. It is derived from the base nn.Container class
|
||||
|
||||
```python
|
||||
# Example of using Sequential
|
||||
model = nn.Sequential(
|
||||
nn.Conv2d(1,20,5),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(20,64,5),
|
||||
nn.ReLU()
|
||||
)
|
||||
```
|
||||
|
||||
```python
|
||||
```
|
||||
|
||||
Modules will be added to it in the order they are passed in the constructor.
|
||||
Alternatively, an ordered dict of modules can also be passed in.
|
||||
|
||||
To make it easier to understand, given is a small example.
|
||||
#### Example of using Sequential with OrderedDict
|
||||
model = nn.Sequential(OrderedDict([
|
||||
('conv1', nn.Conv2d(1,20,5)),
|
||||
('relu1', nn.ReLU()),
|
||||
('conv2', nn.Conv2d(20,64,5)),
|
||||
('relu2', nn.ReLU())
|
||||
]))
|
||||
|
|
@ -1,236 +0,0 @@
|
|||
## Convolution Layers
|
||||
### Conv1d
|
||||
|
||||
Applies a 1D convolution over an input signal composed of several input
|
||||
|
||||
```python
|
||||
The output value of the layer with input (b x iC x W) and output (b x oC x oW)
|
||||
can be precisely described as:
|
||||
output[b_i][oc_i][w_i] = bias[oc_i]
|
||||
+ sum_iC sum_{ow = 0, oW-1} sum_{kw = 0 to kW-1}
|
||||
weight[oc_i][ic_i][kw] * input[b_i][ic_i][stride_w * ow + kw)]
|
||||
```
|
||||
|
||||
```python
|
||||
m = nn.Conv1d(16, 33, 3, stride=2)
|
||||
input = autograd.Variable(torch.randn(20, 16, 50))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
|
||||
|
||||
Note that depending of the size of your kernel, several (of the last)
|
||||
columns of the input might be lost. It is up to the user
|
||||
to add proper padding.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
in_channels | | The number of expected input channels in the image given as input
|
||||
out_channels | | The number of output channels the convolution layer will produce
|
||||
kernel_size | | the size of the convolving kernel.
|
||||
stride | | the stride of the convolving kernel.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , in_channels , * ] | Input is minibatch x in_channels x iW
|
||||
output | [ * , out_channels , * ] | Output shape is precisely minibatch x out_channels x floor((iW + 2*padW - kW) / dW + 1)
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight | the learnable weights of the module of shape (out_channels x in_channels x kW)
|
||||
bias | the learnable bias of the module of shape (out_channels)
|
||||
### Conv2d
|
||||
|
||||
Applies a 2D convolution over an input image composed of several input
|
||||
|
||||
```python
|
||||
The output value of the layer with input (b x iC x H x W) and output (b x oC x oH x oW)
|
||||
can be precisely described as:
|
||||
output[b_i][oc_i][h_i][w_i] = bias[oc_i]
|
||||
+ sum_iC sum_{oh = 0, oH-1} sum_{ow = 0, oW-1} sum_{kh = 0 to kH-1} sum_{kw = 0 to kW-1}
|
||||
weight[oc_i][ic_i][kh][kw] * input[b_i][ic_i][stride_h * oh + kh)][stride_w * ow + kw)]
|
||||
```
|
||||
|
||||
```python
|
||||
# With square kernels and equal stride
|
||||
m = nn.Conv2d(16, 33, 3, stride=2)
|
||||
# non-square kernels and unequal stride and with padding
|
||||
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
|
||||
# non-square kernels and unequal stride and with padding and dilation
|
||||
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
|
||||
input = autograd.Variable(torch.randn(20, 16, 50, 100))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
|
||||
|
||||
Note that depending of the size of your kernel, several (of the last)
|
||||
columns or rows of the input image might be lost. It is up to the user
|
||||
to add proper padding in images.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
in_channels | | The number of expected input channels in the image given as input
|
||||
out_channels | | The number of output channels the convolution layer will produce
|
||||
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
|
||||
stride | 1 | the stride of the convolving kernel. Can be a single number s or a tuple (sh x sw).
|
||||
padding | 0 | implicit zero padding on the input. Can be a single number s or a tuple.
|
||||
dilation | None | If given, will do dilated (or atrous) convolutions. Can be a single number s or a tuple.
|
||||
bias | True | If set to False, the layer will not learn an additive bias.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , in_channels , * , * ] | Input is minibatch x in_channels x iH x iW
|
||||
output | [ * , out_channels , * , * ] | Output shape is precisely minibatch x out_channels x floor((iH + 2*padH - kH) / dH + 1) x floor((iW + 2*padW - kW) / dW + 1)
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight | the learnable weights of the module of shape (out_channels x in_channels x kH x kW)
|
||||
bias | the learnable bias of the module of shape (out_channels)
|
||||
### ConvTranspose2d
|
||||
|
||||
Applies a 2D deconvolution operator over an input image composed of several input
|
||||
|
||||
```python
|
||||
# With square kernels and equal stride
|
||||
m = nn.ConvTranspose2d(16, 33, 3, stride=2)
|
||||
# non-square kernels and unequal stride and with padding
|
||||
m = nn.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
|
||||
input = autograd.Variable(torch.randn(20, 16, 50, 100))
|
||||
output = m(input)
|
||||
# exact output size can be also specified as an argument
|
||||
input = autograd.Variable(torch.randn(1, 16, 12, 12))
|
||||
downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
|
||||
upsample = nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
|
||||
h = downsample(input)
|
||||
output = upsample(h, output_size=input.size())
|
||||
```
|
||||
|
||||
planes.
|
||||
The deconvolution operator multiplies each input value element-wise by a learnable kernel,
|
||||
and sums over the outputs from all input feature planes.
|
||||
This module can be seen as the exact reverse of the Conv2d module.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
in_channels | | The number of expected input channels in the image given as input
|
||||
out_channels | | The number of output channels the convolution layer will produce
|
||||
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
|
||||
stride | 1 | the stride of the convolving kernel. Can be a single number or a tuple (sh x sw).
|
||||
padding | 0 | implicit zero padding on the input. Can be a single number or a tuple.
|
||||
output_padding | 0 | A zero-padding of 0 <= padding < stride that should be added to the output. Can be a single number or a tuple.
|
||||
bias | True | If set to False, the layer will not learn an additive bias.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , in_channels , * , * ] | Input is minibatch x in_channels x iH x iW
|
||||
output | [ * , out_channels , * , * ] | Output shape is minibatch x out_channels x (iH - 1) * sH - 2*padH + kH + output_paddingH x (iW - 1) * sW - 2*padW + kW, or as specified in a second argument to the call.
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight | the learnable weights of the module of shape (in_channels x out_channels x kH x kW)
|
||||
bias | the learnable bias of the module of shape (out_channels)
|
||||
### Conv3d
|
||||
|
||||
Applies a 3D convolution over an input image composed of several input
|
||||
|
||||
```python
|
||||
# With square kernels and equal stride
|
||||
m = nn.Conv3d(16, 33, 3, stride=2)
|
||||
# non-square kernels and unequal stride and with padding
|
||||
m = nn.Conv3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(4, 2, 0))
|
||||
input = autograd.Variable(torch.randn(20, 16, 10, 50, 100))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
|
||||
Note that depending of the size of your kernel, several (of the last)
|
||||
columns or rows of the input image might be lost. It is up to the user
|
||||
to add proper padding in images.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
in_channels | | The number of expected input channels in the image given as input
|
||||
out_channels | | The number of output channels the convolution layer will produce
|
||||
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
|
||||
stride | 1 | the stride of the convolving kernel. Can be a single number s or a tuple (kt x sh x sw).
|
||||
padding | 0 | implicit zero padding on the input. Can be a single number s or a tuple.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , in_channels , * , * , * ] | Input is minibatch x in_channels x iT x iH x iW
|
||||
output | [ * , out_channels , * , * , * ] | Output shape is precisely minibatch x out_channels x floor((iT + 2*padT - kT) / dT + 1) x floor((iH + 2*padH - kH) / dH + 1) x floor((iW + 2*padW - kW) / dW + 1)
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight | the learnable weights of the module of shape (out_channels x in_channels x kT x kH x kW)
|
||||
bias | the learnable bias of the module of shape (out_channels)
|
||||
### ConvTranspose3d
|
||||
|
||||
Applies a 3D deconvolution operator over an input image composed of several input
|
||||
|
||||
```python
|
||||
# With square kernels and equal stride
|
||||
m = nn.ConvTranspose3d(16, 33, 3, stride=2)
|
||||
# non-square kernels and unequal stride and with padding
|
||||
m = nn.Conv3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(0, 4, 2))
|
||||
input = autograd.Variable(torch.randn(20, 16, 10, 50, 100))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
The deconvolution operator multiplies each input value element-wise by a learnable kernel,
|
||||
and sums over the outputs from all input feature planes.
|
||||
This module can be seen as the exact reverse of the Conv3d module.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
in_channels | | The number of expected input channels in the image given as input
|
||||
out_channels | | The number of output channels the convolution layer will produce
|
||||
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
|
||||
stride | 1 | the stride of the convolving kernel. Can be a single number or a tuple (st x sh x sw).
|
||||
padding | 0 | implicit zero padding on the input. Can be a single number or a tuple.
|
||||
output_padding | 0 | A zero-padding of 0 <= padding < stride that should be added to the output. Can be a single number or a tuple.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , in_channels , * , * , * ] | Input is minibatch x in_channels x iH x iW
|
||||
output | [ * , out_channels , * , * , * ] | Output shape is precisely minibatch x out_channels x (iT - 1) * sT - 2*padT + kT + output_paddingT x (iH - 1) * sH - 2*padH + kH + output_paddingH x (iW - 1) * sW - 2*padW + kW
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight | the learnable weights of the module of shape (in_channels x out_channels x kT x kH x kW)
|
||||
bias | the learnable bias of the module of shape (out_channels)
|
||||
|
|
@ -1,233 +0,0 @@
|
|||
# Module
|
||||
|
||||
This is the base class for all Modules defined in the nn package.
|
||||
|
||||
```python
|
||||
# .parameters()
|
||||
```
|
||||
|
||||
```python
|
||||
>>> for param in model.parameters():
|
||||
>>> print(type(param.data), param.size())
|
||||
<class 'torch.FloatTensor'> (20L,)
|
||||
<class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
|
||||
```
|
||||
|
||||
```python
|
||||
```
|
||||
|
||||
```python
|
||||
# .state_dict()
|
||||
```
|
||||
|
||||
```python
|
||||
>>> pdict = model.state_dict()
|
||||
>>> print(pdict.keys())
|
||||
['bias', 'weight']
|
||||
```
|
||||
|
||||
```python
|
||||
```
|
||||
|
||||
Even the Container class derives from this class.
|
||||
|
||||
An nn.Module has the following interface:
|
||||
|
||||
**Constructor:**
|
||||
nn.Module(**parameters)
|
||||
|
||||
All arguments passed in to the constructor need to be of type
|
||||
nn.Parameter or a Tensor.
|
||||
|
||||
|
||||
**forward(...)**
|
||||
|
||||
This is the function that one defines when subclassing to create
|
||||
their own modules.
|
||||
It takes in inputs and returns outputs.
|
||||
|
||||
**__call__(...)**
|
||||
|
||||
This calls the forward function, as well as the hooks
|
||||
|
||||
**register_buffer(name, tensor)**
|
||||
|
||||
This is typically used to register a buffer that is not a Parameter.
|
||||
For example, in BatchNorm, the running_mean is a buffer, so one would
|
||||
register it in the constructor of BatchNorm with:
|
||||
|
||||
`self.register_buffer('running_mean', torch.zeros(num_features))`
|
||||
|
||||
The registered buffers can simply be accessed as class members
|
||||
when needed.
|
||||
|
||||
**cpu()**
|
||||
|
||||
Recursively moves all it's parameters and buffers to the CPU
|
||||
|
||||
**cuda(device_id=None)**
|
||||
Recursively moves all it's parameters and buffers to the CUDA memory.
|
||||
If device_id is given, moves it to GPU number device_id
|
||||
|
||||
**float()**
|
||||
Typecasts the parameters and buffers to float
|
||||
|
||||
**double()**
|
||||
Typecasts the parameters and buffers to double
|
||||
|
||||
**register_forward_hook(name, hook)**
|
||||
|
||||
This will register a user-defined closure on the module.
|
||||
Whenever the module finishes it's forward operation,
|
||||
the user closure is called.
|
||||
The signature of the closure is `def closure(input, output)`
|
||||
|
||||
**register_backward_hook(name, hook)**
|
||||
|
||||
This will register a user-defined closure on the module.
|
||||
Whenever the module finishes it's backward operation,
|
||||
the user closure is called.
|
||||
The signature of the closure is `def closure(gradOutput, gradInput)`
|
||||
|
||||
**remove_forward_hook(name)**
|
||||
|
||||
Removes a registered forward hook with the given name
|
||||
|
||||
**remove_backward_hook(name)**
|
||||
|
||||
Removes a registered backward hook with the given name
|
||||
|
||||
**`[generator] parameters()`**
|
||||
|
||||
returns a generator over all learnable parameters in the container instance.
|
||||
This can typically be passed to the optimizer API
|
||||
|
||||
**`[dict] state_dict()`**
|
||||
|
||||
returns a dictionary of learnable parameters of the Module.
|
||||
For example: ['weight' : Parameter(torch.FloatTensor(20x1x5x5)),
|
||||
'bias' : Parameter(torch.FloatTensor(20)),
|
||||
]
|
||||
|
||||
**`load_state_dict(dict)`**
|
||||
|
||||
Given a parameter dict, sets the parameters of self to be the given dict.
|
||||
|
||||
**`train()`**
|
||||
|
||||
Sets the Container to training mode (for modules such as batchnorm, dropout etc.)
|
||||
|
||||
**`eval()`**
|
||||
|
||||
Sets the Container to evaluate mode (for modules such as batchnorm, dropout etc.)
|
||||
|
||||
**`zero_grad()`**
|
||||
|
||||
Zeroes the gradients of each Parameter of the module
|
||||
# Container
|
||||
|
||||
This is the base container class for all neural networks you would define.
|
||||
|
||||
```python
|
||||
# Example of using Container
|
||||
class Net(nn.Container):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__(
|
||||
conv1 = nn.Conv2d(1, 20, 5),
|
||||
relu = nn.ReLU()
|
||||
)
|
||||
def forward(self, input):
|
||||
output = self.relu(self.conv1(x))
|
||||
return output
|
||||
model = Net()
|
||||
```
|
||||
|
||||
```python
|
||||
# one can add modules to the container after construction
|
||||
model.add_module('pool1', nn.MaxPool2d(2, 2))
|
||||
```
|
||||
|
||||
```python
|
||||
```
|
||||
|
||||
```python
|
||||
# .parameters()
|
||||
```
|
||||
|
||||
```python
|
||||
>>> for param in model.parameters():
|
||||
>>> print(type(param.data), param.size())
|
||||
<class 'torch.FloatTensor'> (20L,)
|
||||
<class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
|
||||
```
|
||||
|
||||
```python
|
||||
```
|
||||
|
||||
```python
|
||||
# .state_dict()
|
||||
```
|
||||
|
||||
```python
|
||||
>>> pdict = model.state_dict()
|
||||
>>> print(pdict.keys())
|
||||
['conv1.bias', 'conv1.weight']
|
||||
```
|
||||
|
||||
```python
|
||||
```
|
||||
|
||||
You will subclass your container from this class.
|
||||
In the constructor you define the modules that you would want to use,
|
||||
and in the "forward" function you use the constructed modules in
|
||||
your operations.
|
||||
|
||||
To make it easier to understand, given is a small example.
|
||||
|
||||
One can also add new modules to a container after construction.
|
||||
You can do this with the add_module function
|
||||
or by assigning them as Container attributes.
|
||||
|
||||
## one can also set modules as attributes of the container
|
||||
model.conv1 = nn.Conv2d(12, 24, 3)
|
||||
The container has some important additional methods:
|
||||
|
||||
**`[generator] parameters()`**
|
||||
|
||||
returns a generator over all learnable parameters in the container instance.
|
||||
This can typically be passed to the optimizer API
|
||||
|
||||
**`[dict] state_dict()`**
|
||||
|
||||
returns a dictionary of learnable parameters of the Container.
|
||||
For example: ['conv1.weight' : Parameter(torch.FloatTensor(20x1x5x5)),
|
||||
'conv1.bias' : Parameter(torch.FloatTensor(20)),
|
||||
]
|
||||
|
||||
|
||||
**`load_state_dict(dict)`**
|
||||
|
||||
Given a parameter dict, sets the parameters of self to be the given dict.
|
||||
It loads loads the parameters recursively.
|
||||
Excessive or non-matching parameter names are ignored.
|
||||
For example, the input dict has an entry 'conv44.weight', but
|
||||
if the container does not have a module named 'conv44', then this entry is ignored.
|
||||
|
||||
**`children()`**
|
||||
|
||||
Returns a generator over all the children modules of self
|
||||
|
||||
**`train()`**
|
||||
|
||||
Sets the Container (and all it's child modules) to training mode (for modules such as batchnorm, dropout etc.)
|
||||
|
||||
**`eval()`**
|
||||
|
||||
Sets the Container (and all it's child modules) to evaluate mode (for modules such as batchnorm, dropout etc.)
|
||||
|
||||
**`apply(closure)`**
|
||||
|
||||
Applies the given closure to each parameter of the container.
|
||||
|
||||
|
||||
**__Note: Apart from these, the container will define the base functions that it has derived from nn.Module __**
|
||||
|
|
@ -1,90 +0,0 @@
|
|||
## Dropout layers
|
||||
### Dropout
|
||||
|
||||
Randomly zeroes some of the elements of the input tensor.
|
||||
|
||||
```python
|
||||
m = nn.Dropout(p=0.2)
|
||||
input = autograd.Variable(torch.randn(20, 16))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
The elements to zero are randomized on every forward call.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
p | 0.5 | probability of an element to be zeroed.
|
||||
inplace | false | If set to True, will do this operation in-place.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | Any | Input can be of any shape
|
||||
output | Same | Output is of the same shape as input
|
||||
### Dropout2d
|
||||
|
||||
Randomly zeroes whole channels of the input tensor.
|
||||
|
||||
```python
|
||||
m = nn.Dropout2d(p=0.2)
|
||||
input = autograd.Variable(torch.randn(20, 16, 32, 32))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
The input is 4D (batch x channels, height, width) and each channel
|
||||
is of size (1, height, width).
|
||||
The channels to zero are randomized on every forward call.
|
||||
Usually the input comes from Conv2d modules.
|
||||
|
||||
As described in the paper "Efficient Object Localization Using Convolutional
|
||||
Networks" (http:arxiv.org/abs/1411.4280), if adjacent pixels within
|
||||
feature maps are strongly correlated (as is normally the case in early
|
||||
convolution layers) then iid dropout will not regularize the activations
|
||||
and will otherwise just result in an effective learning rate decrease.
|
||||
In this case, nn.Dropout2d will help promote independence between
|
||||
feature maps and should be used instead.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
p | 0.5 | probability of an element to be zeroed.
|
||||
inplace | false | If set to True, will do this operation in-place.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [*, *, *, *] | Input can be of any sizes of 4D shape
|
||||
output | Same | Output is of the same shape as input
|
||||
### Dropout3d
|
||||
|
||||
Randomly zeroes whole channels of the input tensor.
|
||||
|
||||
```python
|
||||
m = nn.Dropout3d(p=0.2)
|
||||
input = autograd.Variable(torch.randn(20, 16, 4, 32, 32))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
The input is 5D (batch x channels, depth, height, width) and each channel
|
||||
is of size (1, depth, height, width).
|
||||
The channels to zero are randomized on every forward call.
|
||||
Usually the input comes from Conv3d modules.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
p | 0.5 | probability of an element to be zeroed.
|
||||
inplace | false | If set to True, will do this operation in-place.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [*, *, *, *, *] | Input can be of any sizes of 5D shape
|
||||
output | Same | Output is of the same shape as input
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
## Linear layers
|
||||
### Linear
|
||||
|
||||
Applies a linear transformation to the incoming data, y = Ax + b
|
||||
|
||||
```python
|
||||
m = nn.Linear(20, 30)
|
||||
input = autograd.Variable(torch.randn(128, 20))
|
||||
output = m(input)
|
||||
print(output.size())
|
||||
```
|
||||
|
||||
The input is a 2D mini-batch of samples, each of size in_features
|
||||
The output will be a 2D Tensor of size mini-batch x out_features
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
in_features | | size of each input sample
|
||||
out_features | | size of each output sample
|
||||
bias | True | If set to False, the layer will not learn an additive bias.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [*, in_features] | Input can be of shape minibatch x in_features
|
||||
output | [*, out_features] | Output is of shape minibatch x out_features
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight | the learnable weights of the module of shape (out_features x in_features)
|
||||
bias | the learnable bias of the module of shape (out_features)
|
||||
|
|
@ -1,295 +0,0 @@
|
|||
## Loss functions
|
||||
### L1Loss
|
||||
|
||||
Creates a criterion that measures the mean absolute value of the
|
||||
|
||||
element-wise difference between input `x` and target `y`:
|
||||
|
||||
loss(x, y) = 1/n \sum |x_i - y_i|
|
||||
|
||||
`x` and `y` arbitrary shapes with a total of `n` elements each
|
||||
the sum operation still operates over all the elements, and divides by `n`.
|
||||
|
||||
The division by `n` can be avoided if one sets the internal
|
||||
variable `sizeAverage` to `False`
|
||||
### MSELoss
|
||||
|
||||
Creates a criterion that measures the mean squared error between
|
||||
|
||||
`n` elements in the input `x` and target `y`:
|
||||
loss(x, y) = 1/n \sum |x_i - y_i|^2
|
||||
`x` and `y` arbitrary shapes with a total of `n` elements each
|
||||
the sum operation still operates over all the elements, and divides by `n`.
|
||||
|
||||
The division by `n` can be avoided if one sets the internal variable
|
||||
`sizeAverage` to `False`
|
||||
By default, the losses are averaged over observations for each minibatch.
|
||||
However, if the field `sizeAverage = False`, the losses are instead summed.
|
||||
### CrossEntropyLoss
|
||||
|
||||
This criterion combines `LogSoftMax` and `ClassNLLLoss` in one single class.
|
||||
|
||||
|
||||
It is useful when training a classification problem with `n` classes.
|
||||
If provided, the optional argument `weights` should be a 1D `Tensor`
|
||||
assigning weight to each of the classes.
|
||||
This is particularly useful when you have an unbalanced training set.
|
||||
|
||||
The `input` is expected to contain scores for each class:
|
||||
`input` has to be a 2D `Tensor` of size `batch x n`.
|
||||
This criterion expects a class index (0 to nClasses-1) as the
|
||||
`target` for each value of a 1D tensor of size `n`
|
||||
|
||||
The loss can be described as:
|
||||
|
||||
loss(x, class) = -log(exp(x[class]) / (\sum_j exp(x[j])))
|
||||
= -x[class] + log(\sum_j exp(x[j]))
|
||||
|
||||
or in the case of the `weights` argument being specified:
|
||||
|
||||
loss(x, class) = weights[class] * (-x[class] + log(\sum_j exp(x[j])))
|
||||
|
||||
The losses are averaged across observations for each minibatch.
|
||||
### NLLLoss
|
||||
|
||||
The negative log likelihood loss. It is useful to train a classication problem with n classes
|
||||
|
||||
```python
|
||||
m = nn.LogSoftmax()
|
||||
loss = nn.NLLLoss()
|
||||
# input is of size nBatch x nClasses = 3 x 5
|
||||
input = autograd.Variable(torch.randn(3, 5))
|
||||
# each element in target has to have 0 <= value < nclasses
|
||||
target = autograd.Variable(torch.LongTensor([1, 0, 4]))
|
||||
output = loss(m(input), target)
|
||||
output.backward()
|
||||
```
|
||||
|
||||
|
||||
If provided, the optional argument `weights` should be a 1D Tensor assigning
|
||||
weight to each of the classes.
|
||||
This is particularly useful when you have an unbalanced training set.
|
||||
|
||||
The input given through a forward call is expected to contain log-probabilities
|
||||
of each class: input has to be a 2D Tensor of size minibatch x n
|
||||
Obtaining log-probabilities in a neural network is easily achieved by
|
||||
adding a `LogSoftmax` layer in the last layer.
|
||||
You may use `CrossEntropyLoss` instead, if you prefer not to
|
||||
add an extra layer.
|
||||
|
||||
The target that this loss expects is a class index (1 to the number of class)
|
||||
|
||||
The loss can be described as:
|
||||
loss(x, class) = -x[class]
|
||||
|
||||
or in the case of the weights argument it is specified as follows:
|
||||
loss(x, class) = -weights[class] * x[class]
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
weight | None | a manual rescaling weight given to each class. If given, has to be a Tensor of size "nclasses".
|
||||
size_average | True | By default, the losses are averaged over observations for each minibatch. However, if the field sizeAverage is set to False, the losses are instead summed for each minibatch.
|
||||
Target Shape: [ * ] : Targets of size [minibatch], each value has to be 1 <= targets[i] <= nClasses
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight | the class-weights given as input to the constructor
|
||||
### NLLLoss2d
|
||||
|
||||
This is negative log likehood loss, but for image inputs. It computes NLL loss per-pixel.
|
||||
|
||||
```python
|
||||
m = nn.Conv2d(16, 32, (3, 3)).float()
|
||||
loss = nn.NLLLoss2d()
|
||||
# input is of size nBatch x nClasses x height x width
|
||||
input = autograd.Variable(torch.randn(3, 16, 10, 10))
|
||||
# each element in target has to have 0 <= value < nclasses
|
||||
target = autograd.Variable(torch.LongTensor(3, 8, 8).random_(0, 4))
|
||||
output = loss(m(input), target)
|
||||
output.backward()
|
||||
```
|
||||
|
||||
This loss does not support per-class weights
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
size_average | True | By default, the losses are averaged over observations for each minibatch. However, if the field sizeAverage is set to False, the losses are instead summed for each minibatch.
|
||||
Target Shape: [ * , *, *] : Targets of size minibatch x height x width, each value has to be 1 <= targets[i] <= nClasses
|
||||
### KLDivLoss
|
||||
|
||||
The [Kullback-Leibler divergence](http://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence) Loss
|
||||
|
||||
KL divergence is a useful distance measure for continuous distributions
|
||||
and is often useful when performing direct regression over the space of
|
||||
(discretely sampled) continuous output distributions.
|
||||
As with ClassNLLLoss, the `input` given is expected to contain
|
||||
_log-probabilities_, however unlike ClassNLLLoss, `input` is not
|
||||
restricted to a 2D Tensor, because the criterion is applied element-wise.
|
||||
|
||||
This criterion expects a `target` `Tensor` of the same size as the
|
||||
`input` `Tensor`.
|
||||
|
||||
The loss can be described as:
|
||||
loss(x, target) = 1/n \sum(target_i * (log(target_i) - x_i))
|
||||
|
||||
By default, the losses are averaged for each minibatch over observations
|
||||
*as well as* over dimensions. However, if the field
|
||||
`sizeAverage` is set to `False`, the losses are instead summed.
|
||||
### BCELoss
|
||||
|
||||
Creates a criterion that measures the Binary Cross Entropy
|
||||
|
||||
between the target and the output:
|
||||
loss(o, t) = - 1/n sum_i (t[i] * log(o[i]) + (1 - t[i]) * log(1 - o[i]))
|
||||
|
||||
or in the case of the weights argument being specified:
|
||||
loss(o, t) = - 1/n sum_i weights[i] * (t[i] * log(o[i]) + (1 - t[i]) * log(1 - o[i]))
|
||||
|
||||
This is used for measuring the error of a reconstruction in for example
|
||||
an auto-encoder. Note that the targets `t[i]` should be numbers between 0 and 1,
|
||||
for instance, the output of an `nn.Sigmoid` layer.
|
||||
|
||||
By default, the losses are averaged for each minibatch over observations
|
||||
*as well as* over dimensions. However, if the field `sizeAverage` is set
|
||||
to `False`, the losses are instead summed.
|
||||
### MarginRankingLoss
|
||||
|
||||
Creates a criterion that measures the loss given
|
||||
|
||||
inputs `x1`, `x2`, two 1D min-batch `Tensor`s,
|
||||
and a label 1D mini-batch tensor `y` with values (`1` or `-1`).
|
||||
|
||||
If `y == 1` then it assumed the first input should be ranked higher
|
||||
(have a larger value) than the second input, and vice-versa for `y == -1`.
|
||||
|
||||
The loss function for each sample in the mini-batch is:
|
||||
|
||||
loss(x, y) = max(0, -y * (x1 - x2) + margin)
|
||||
|
||||
if the internal variable `sizeAverage = True`,
|
||||
the loss function averages the loss over the batch samples;
|
||||
if `sizeAverage = False`, then the loss function sums over the batch samples.
|
||||
By default, `sizeAverage` equals to `True`.
|
||||
### HingeEmbeddingLoss
|
||||
|
||||
Measures the loss given an input `x` which is a 2D mini-batch tensor
|
||||
|
||||
and a labels `y`, a 1D tensor containg values (`1` or `-1`).
|
||||
This is usually used for measuring whether two inputs are similar or dissimilar,
|
||||
e.g. using the L1 pairwise distance, and is typically used for learning
|
||||
nonlinear embeddings or semi-supervised learning.
|
||||
|
||||
{ x_i, if y_i == 1
|
||||
loss(x, y) = 1/n {
|
||||
{ max(0, margin - x_i), if y_i == -1
|
||||
|
||||
`x` and `y` arbitrary shapes with a total of `n` elements each
|
||||
the sum operation still operates over all the elements, and divides by `n`.
|
||||
(the division by `n` can be avoided if one sets the internal variable `sizeAverage=False`).
|
||||
The `margin` has a default value of `1`, or can be set in the constructor.
|
||||
### MultiLabelMarginLoss
|
||||
|
||||
Creates a criterion that optimizes a multi-class multi-classification
|
||||
|
||||
hinge loss (margin-based loss) between input `x` (a 2D mini-batch `Tensor`) and
|
||||
output `y` (which is a 2D `Tensor` of target class indices).
|
||||
For each sample in the mini-batch:
|
||||
|
||||
loss(x, y) = sum_ij(max(0, 1 - (x[y[j]] - x[i]))) / x:size(1)
|
||||
|
||||
where `i == 0` to `x.size(0)`, `j == 0` to `y.size(0)`,
|
||||
`y[j] != 0`, and `i != y[j]` for all `i` and `j`.
|
||||
|
||||
`y` and `x` must have the same size.
|
||||
The criterion only considers the first non zero `y[j]` targets.
|
||||
This allows for different samples to have variable amounts of target classes
|
||||
### SmoothL1Loss
|
||||
|
||||
Creates a criterion that uses a squared term if the absolute
|
||||
|
||||
element-wise error falls below 1 and an L1 term otherwise.
|
||||
It is less sensitive to outliers than the `MSELoss` and in some cases
|
||||
prevents exploding gradients (e.g. see "Fast R-CNN" paper by Ross Girshick).
|
||||
Also known as the Huber loss.
|
||||
|
||||
{ 0.5 * (x_i - y_i)^2, if |x_i - y_i| < 1
|
||||
loss(x, y) = 1/n \sum {
|
||||
{ |x_i - y_i| - 0.5, otherwise
|
||||
|
||||
`x` and `y` arbitrary shapes with a total of `n` elements each
|
||||
the sum operation still operates over all the elements, and divides by `n`.
|
||||
|
||||
The division by `n` can be avoided if one sets the internal variable
|
||||
`sizeAverage` to `False`
|
||||
### SoftMarginLoss
|
||||
|
||||
Creates a criterion that optimizes a two-class classification
|
||||
|
||||
logistic loss between input `x` (a 2D mini-batch `Tensor`) and
|
||||
target `y` (which is a tensor containing either `1`s or `-1`s).
|
||||
|
||||
loss(x, y) = sum_i (log(1 + exp(-y[i]*x[i]))) / x:nElement()
|
||||
|
||||
The normalization by the number of elements in the input can be disabled by
|
||||
setting `self.sizeAverage` to `False`.
|
||||
### MultiLabelSoftMarginLoss
|
||||
|
||||
Creates a criterion that optimizes a multi-label one-versus-all
|
||||
|
||||
loss based on max-entropy, between input `x` (a 2D mini-batch `Tensor`) and
|
||||
target `y` (a binary 2D `Tensor`). For each sample in the minibatch:
|
||||
|
||||
loss(x, y) = - sum_i (y[i] log( exp(x[i]) / (1 + exp(x[i])))
|
||||
+ (1-y[i]) log(1/(1+exp(x[i])))) / x:nElement()
|
||||
|
||||
where `i == 0` to `x.nElement()-1`, `y[i] in {0,1}`.
|
||||
`y` and `x` must have the same size.
|
||||
### CosineEmbeddingLoss
|
||||
|
||||
Creates a criterion that measures the loss given an input tensors x1, x2
|
||||
|
||||
and a `Tensor` label `y` with values 1 or -1.
|
||||
This is used for measuring whether two inputs are similar or dissimilar,
|
||||
using the cosine distance, and is typically used for learning nonlinear
|
||||
embeddings or semi-supervised learning.
|
||||
|
||||
`margin` should be a number from `-1` to `1`, `0` to `0.5` is suggested.
|
||||
If `margin` is missing, the default value is `0`.
|
||||
|
||||
The loss function for each sample is:
|
||||
|
||||
{ 1 - cos(x1, x2), if y == 1
|
||||
loss(x, y) = {
|
||||
{ max(0, cos(x1, x2) - margin), if y == -1
|
||||
|
||||
If the internal variable `sizeAverage` is equal to `True`,
|
||||
the loss function averages the loss over the batch samples;
|
||||
if `sizeAverage` is `False`, then the loss function sums over the
|
||||
batch samples. By default, `sizeAverage = True`.
|
||||
### MultiMarginLoss
|
||||
|
||||
Creates a criterion that optimizes a multi-class classification hinge loss
|
||||
|
||||
(margin-based loss) between input `x` (a 2D mini-batch `Tensor`) and
|
||||
output `y` (which is a 1D tensor of target class indices, `0` <= `y` <= `x.size(1)`):
|
||||
|
||||
For each mini-batch sample:
|
||||
loss(x, y) = sum_i(max(0, (margin - x[y] + x[i]))^p) / x.size(0)
|
||||
where `i == 0` to `x.size(0)` and `i != y`.
|
||||
|
||||
Optionally, you can give non-equal weighting on the classes by passing
|
||||
a 1D `weights` tensor into the constructor.
|
||||
|
||||
The loss function then becomes:
|
||||
loss(x, y) = sum_i(max(0, w[y] * (margin - x[y] - x[i]))^p) / x.size(0)
|
||||
|
||||
By default, the losses are averaged over observations for each minibatch.
|
||||
However, if the field `sizeAverage` is set to `False`,
|
||||
the losses are instead summed.
|
||||
|
|
@ -1,142 +0,0 @@
|
|||
## Normalization layers
|
||||
### BatchNorm1d
|
||||
|
||||
Applies Batch Normalization over a 2d input that is seen as a mini-batch of 1d inputs
|
||||
|
||||
```python
|
||||
x - mean(x)
|
||||
y = ----------------------------- * gamma + beta
|
||||
standard_deviation(x) + eps
|
||||
```
|
||||
|
||||
```python
|
||||
# With Learnable Parameters
|
||||
m = nn.BatchNorm1d(100)
|
||||
# Without Learnable Parameters
|
||||
m = nn.BatchNorm1d(100, affine=False)
|
||||
input = autograd.Variable(torch.randn(20, 100))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
|
||||
|
||||
The mean and standard-deviation are calculated per-dimension over
|
||||
the mini-batches and gamma and beta are learnable parameter vectors
|
||||
of size N (where N is the input size).
|
||||
|
||||
During training, this layer keeps a running estimate of its computed mean
|
||||
and variance. The running sum is kept with a default momentum of 0.1
|
||||
During evaluation, this running mean/variance is used for normalization.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
num_features | | the size of each 1D input in the mini-batch
|
||||
eps | 1e-5 | a value added to the denominator for numerical stability.
|
||||
momentum | 0.1 | the value used for the running_mean and running_var computation.
|
||||
affine | | a boolean value that when set to true, gives the layer learnable affine parameters.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , num_features ] | 2D Tensor of nBatches x num_features
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a normalized tensor in the batch dimension
|
||||
### BatchNorm2d
|
||||
|
||||
Applies Batch Normalization over a 4d input that is seen as a mini-batch of 3d inputs
|
||||
|
||||
```python
|
||||
x - mean(x)
|
||||
y = ----------------------------- * gamma + beta
|
||||
standard_deviation(x) + eps
|
||||
```
|
||||
|
||||
```python
|
||||
# With Learnable Parameters
|
||||
m = nn.BatchNorm2d(100)
|
||||
# Without Learnable Parameters
|
||||
m = nn.BatchNorm2d(100, affine=False)
|
||||
input = autograd.Variable(torch.randn(20, 100, 35, 45))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
|
||||
|
||||
The mean and standard-deviation are calculated per-dimension over
|
||||
the mini-batches and gamma and beta are learnable parameter vectors
|
||||
of size N (where N is the input size).
|
||||
|
||||
During training, this layer keeps a running estimate of its computed mean
|
||||
and variance. The running sum is kept with a default momentum of 0.1
|
||||
During evaluation, this running mean/variance is used for normalization.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
num_features | | num_features from an expected input of size batch_size x num_features x height x width
|
||||
eps | 1e-5 | a value added to the denominator for numerical stability.
|
||||
momentum | 0.1 | the value used for the running_mean and running_var computation.
|
||||
affine | | a boolean value that when set to true, gives the layer learnable affine parameters.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , num_features , *, * ] | 4D Tensor of batch_size x num_features x height x width
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a normalized tensor in the batch dimension
|
||||
### BatchNorm3d
|
||||
|
||||
Applies Batch Normalization over a 5d input that is seen as a mini-batch of 4d inputs
|
||||
|
||||
```python
|
||||
x - mean(x)
|
||||
y = ----------------------------- * gamma + beta
|
||||
standard_deviation(x) + eps
|
||||
```
|
||||
|
||||
```python
|
||||
# With Learnable Parameters
|
||||
m = nn.BatchNorm3d(100)
|
||||
# Without Learnable Parameters
|
||||
m = nn.BatchNorm3d(100, affine=False)
|
||||
input = autograd.Variable(torch.randn(20, 100, 35, 45, 10))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
|
||||
|
||||
The mean and standard-deviation are calculated per-dimension over
|
||||
the mini-batches and gamma and beta are learnable parameter vectors
|
||||
of size N (where N is the input size).
|
||||
|
||||
During training, this layer keeps a running estimate of its computed mean
|
||||
and variance. The running sum is kept with a default momentum of 0.1
|
||||
During evaluation, this running mean/variance is used for normalization.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
num_features | | num_features from an expected input of size batch_size x num_features x height x width
|
||||
eps | 1e-5 | a value added to the denominator for numerical stability.
|
||||
momentum | 0.1 | the value used for the running_mean and running_var computation.
|
||||
affine | | a boolean value that when set to true, gives the layer learnable affine parameters.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , num_features , * , * , * ] | 5D Tensor of batch_size x num_features x depth x height x width
|
||||
output | Same | Output has the same shape as input
|
||||
|
||||
#### Returns
|
||||
a normalized tensor in the batch dimension
|
||||
|
|
@ -1,308 +0,0 @@
|
|||
## Pooling Layers
|
||||
### MaxPool1d
|
||||
|
||||
Applies a 1D max pooling over an input signal composed of several input
|
||||
|
||||
```python
|
||||
The output value of the layer with input (b x C x W) and output (b x C x oW)
|
||||
can be precisely described as:
|
||||
output[b_i][c_i][w_i] = max_{k=1, K} input[b_i][c_i][stride_w * w_i + k)]
|
||||
```
|
||||
|
||||
```python
|
||||
# pool of size=3, stride=2
|
||||
m = nn.MaxPool1d(3, stride=2)
|
||||
input = autograd.Variable(torch.randn(20, 16, 50))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
kernel_size | | the size of the window to take a max over
|
||||
stride | | the stride of the window
|
||||
padding | 0 | implicit padding to be added.
|
||||
dilation | kernel_size | a parameter that controls the stride of elements in the window.
|
||||
return_indices | False | if True, will return the indices along with the outputs. Useful when Unpooling later.
|
||||
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * , * ] | Input is minibatch x channels x iW
|
||||
output | [ * , * , * ] | Output shape = minibatch x channels x floor((iW + 2*padW - kernel_size) / stride + 1)
|
||||
### MaxPool2d
|
||||
|
||||
Applies a 2D max pooling over an input signal composed of several input
|
||||
|
||||
```python
|
||||
The output value of the layer with input (b x C x H x W) and output (b x C x oH x oW)
|
||||
can be precisely described as:
|
||||
output[b_i][c_i][h_i][w_i] = max_{{kh=1, KH}, {kw=1, kW}} input[b_i][c_i][stride_h * h_i + kH)][stride_w * w_i + kW)]
|
||||
```
|
||||
|
||||
```python
|
||||
# pool of square window of size=3, stride=2
|
||||
m = nn.MaxPool2d(3, stride=2)
|
||||
# pool of non-square window
|
||||
m = nn.MaxPool2d((3, 2), stride=(2, 1))
|
||||
input = autograd.Variable(torch.randn(20, 16, 50, 32))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
kernel_size | | the size of the window to take a max over. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
|
||||
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
|
||||
padding | 0 | implicit padding to be added. Can be a single number or a tuple.
|
||||
dilation | 1 | a parameter that controls the stride of elements in the window. Can be a single number or a tuple.
|
||||
return_indices | False | if True, will return the indices along with the outputs. Useful to pass to nn.MaxUnpool2d .
|
||||
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
|
||||
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
|
||||
### MaxPool3d
|
||||
|
||||
Applies a 3D max pooling over an input signal composed of several input
|
||||
|
||||
```python
|
||||
# pool of square window of size=3, stride=2
|
||||
m = nn.MaxPool3d(3, stride=2)
|
||||
# pool of non-square window
|
||||
m = nn.MaxPool3d((3, 2, 2), stride=(2, 1, 2))
|
||||
input = autograd.Variable(torch.randn(20, 16, 50,44, 31))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
kernel_size | | the size of the window to take a max over. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
|
||||
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (st x sh x sw).
|
||||
padding | 0 | implicit padding to be added. Can be a single number or a tuple.
|
||||
dilation | 1 | a parameter that controls the stride of elements in the window. Can be a single number or a tuple.
|
||||
return_indices | False | if True, will return the indices along with the outputs. Useful to pass to nn.MaxUnpool3d .
|
||||
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * , *, *, * ] | Input is minibatch x channels x iT x iH x iW
|
||||
output | [ * , * , *, *, * ] | Output shape = minibatch x channels x floor((iT + 2*padT - kT) / sT + 1) x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
|
||||
### MaxUnpool2d
|
||||
|
||||
Computes the inverse operation of MaxPool2d
|
||||
|
||||
```python
|
||||
# pool of square window of size=3, stride=2
|
||||
m = nn.MaxPool2d(2, stride=2, return_indices = True)
|
||||
mu = nn.MaxUnpool2d(2, stride=2)
|
||||
input = autograd.Variable(torch.randn(20, 16, 50, 32))
|
||||
output, indices = m(input)
|
||||
unpooled_output = mu.forward(output, indices)
|
||||
# exact output size can be also specified as an argument
|
||||
input = autograd.Variable(torch.randn(1, 16, 11, 11))
|
||||
downsample = nn.MaxPool2d(3, 3, return_indices=True)
|
||||
upsample = nn.MaxUnpool2d(3, 3)
|
||||
h, indices = downsample(input)
|
||||
output = upsample(h, indices, output_size=input.size())
|
||||
```
|
||||
|
||||
MaxPool2d is not invertible, as the locations of the max locations are lost.
|
||||
MaxUnpool2d takes in as input the output of MaxPool2d and the indices of the Max locations
|
||||
and computes the inverse.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
kernel_size | | the size of the max window. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
|
||||
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
|
||||
padding | 0 | implicit padding that was added to the input. Can be a single number or a tuple.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
|
||||
output | [ * , * , *, * ] | Output shape is minibatch x channels x padH x (iH - 1) * sH + kH x padW x (iW - 1) * sW + kW, or as specified to the call.
|
||||
### MaxUnpool3d
|
||||
|
||||
Computes the inverse operation of MaxPool3d
|
||||
|
||||
```python
|
||||
# pool of square window of size=3, stride=2
|
||||
m = nn.MaxPool3d(3, stride=2, return_indices = True)
|
||||
mu = nn.MaxUnpool3d(3, stride=2)
|
||||
input, indices = autograd.Variable(torch.randn(20, 16, 50, 32, 15))
|
||||
output = m(input)
|
||||
unpooled_output = m2.forward(output, indices)
|
||||
```
|
||||
|
||||
MaxPool3d is not invertible, as the locations of the max locations are lost.
|
||||
MaxUnpool3d takes in as input the output of MaxPool3d and the indices of the Max locations
|
||||
and computes the inverse.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
kernel_size | | the size of the max window. Can be a single number k (for a square kernel of k x k) or a tuple (kt x kh x kw)
|
||||
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (st x sh x sw).
|
||||
padding | 0 | implicit padding that was added to the input. Can be a single number or a tuple.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * , *, *, * ] | Input is minibatch x channels x iT x iH x iW
|
||||
output | [ * , * , *, *, * ] | Output shape = minibatch x channels x padT x (iT - 1) * sT + kT x padH x (iH - 1) * sH + kH x padW x (iW - 1) * sW + kW
|
||||
### AvgPool2d
|
||||
|
||||
Applies a 2D average pooling over an input signal composed of several input
|
||||
|
||||
```python
|
||||
The output value of the layer with input (b x C x H x W) and output (b x C x oH x oW)
|
||||
can be precisely described as:
|
||||
output[b_i][c_i][h_i][w_i] = (1 / K) * sum_{kh=1, KH} sum_{kw=1, kW} input[b_i][c_i][stride_h * h_i + kh)][stride_w * w_i + kw)]
|
||||
```
|
||||
|
||||
```python
|
||||
# pool of square window of size=3, stride=2
|
||||
m = nn.AvgPool2d(3, stride=2)
|
||||
# pool of non-square window
|
||||
m = nn.AvgPool2d((3, 2), stride=(2, 1))
|
||||
input = autograd.Variable(torch.randn(20, 16, 50, 32))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
kernel_size | | the size of the window. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
|
||||
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
|
||||
padding | 0 | implicit padding to be added. Can be a single number or a tuple.
|
||||
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
|
||||
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
|
||||
### AvgPool3d
|
||||
|
||||
Applies a 3D average pooling over an input signal composed of several input
|
||||
|
||||
```python
|
||||
# pool of square window of size=3, stride=2
|
||||
m = nn.AvgPool3d(3, stride=2)
|
||||
# pool of non-square window
|
||||
m = nn.AvgPool3d((3, 2, 2), stride=(2, 1, 2))
|
||||
input = autograd.Variable(torch.randn(20, 16, 50,44, 31))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
kernel_size | | the size of the window to take a average over. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
|
||||
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (st x sh x sw).
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * , *, *, * ] | Input is minibatch x channels x iT x iH x iW
|
||||
output | [ * , * , *, *, * ] | Output shape = minibatch x channels x floor((iT + 2*padT - kT) / sT + 1) x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
|
||||
### FractionalMaxPool2d
|
||||
|
||||
Applies a 2D fractional max pooling over an input signal composed of several input
|
||||
|
||||
```python
|
||||
# pool of square window of size=3, and target output size 13x12
|
||||
m = nn.FractionalMaxPool2d(3, output_size=(13, 12))
|
||||
# pool of square window and target output size being half of input image size
|
||||
m = nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5))
|
||||
input = autograd.Variable(torch.randn(20, 16, 50, 32))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
|
||||
Fractiona MaxPooling is described in detail in the paper ["Fractional Max-Pooling" by Ben Graham](http://arxiv.org/abs/1412.6071)
|
||||
The max-pooling operation is applied in kHxkW regions by a stochastic
|
||||
step size determined by the target output size.
|
||||
The number of output features is equal to the number of input planes.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
kernel_size | | the size of the window to take a max over. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
|
||||
output_size | | the target output size of the image of the form oH x oW. Can be a tuple (oH, oW) or a single number oH for a square image oH x oH
|
||||
output_ratio | | If one wants to have an output size as a ratio of the input size, this option can be given. This has to be a number or tuple in the range (0, 1)
|
||||
return_indices | False | if True, will return the indices along with the outputs. Useful to pass to nn.MaxUnpool2d .
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
|
||||
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
|
||||
### LPPool2d
|
||||
|
||||
Applies a 2D power-average pooling over an input signal composed of several input
|
||||
|
||||
```python
|
||||
# power-2 pool of square window of size=3, stride=2
|
||||
m = nn.LPPool2d(2, 3, stride=2)
|
||||
# pool of non-square window of power 1.2
|
||||
m = nn.LPPool2d(1.2, (3, 2), stride=(2, 1))
|
||||
input = autograd.Variable(torch.randn(20, 16, 50, 32))
|
||||
output = m(input)
|
||||
```
|
||||
|
||||
planes.
|
||||
On each window, the function computed is: f(X) = pow(sum(pow(X, p)), 1/p)
|
||||
At p = infinity, one gets Max Pooling
|
||||
At p = 1, one gets Average Pooling
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
kernel_size | | the size of the window. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
|
||||
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
|
||||
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
|
||||
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
|
||||
|
|
@ -1,346 +0,0 @@
|
|||
## Recurrent layers
|
||||
### RNN
|
||||
|
||||
Applies a multi-layer Elman RNN with tanh or ReLU non-linearity to an input sequence.
|
||||
|
||||
```python
|
||||
h_t = tanh(w_ih * x_t + b_ih + w_hh * h_(t-1) + b_hh)
|
||||
```
|
||||
|
||||
```python
|
||||
rnn = nn.RNN(10, 20, 2)
|
||||
input = Variable(torch.randn(5, 3, 10))
|
||||
h0 = Variable(torch.randn(2, 3, 20))
|
||||
output, hn = rnn(input, h0)
|
||||
```
|
||||
|
||||
|
||||
|
||||
For each element in the input sequence, each layer computes the following
|
||||
function:
|
||||
where `h_t` is the hidden state at time t, and `x_t` is the hidden
|
||||
state of the previous layer at time t or `input_t` for the first layer.
|
||||
If nonlinearity='relu', then ReLU is used instead of tanh.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input_size | | The number of expected features in the input x
|
||||
hidden_size | | The number of features in the hidden state h
|
||||
num_layers | | the size of the convolving kernel.
|
||||
nonlinearity | 'tanh' | The non-linearity to use ['tanh'|'relu'].
|
||||
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
|
||||
batch_first | | If True, then the input tensor is provided as (batch, seq, feature)
|
||||
dropout | | If non-zero, introduces a dropout layer on the outputs of each RNN layer
|
||||
bidirectional | False | If True, becomes a bidirectional RNN.
|
||||
|
||||
#### Inputs
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input | | A (seq_len x batch x input_size) tensor containing the features of the input sequence.
|
||||
h_0 | | A (num_layers x batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
|
||||
|
||||
#### Outputs
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
output | A (seq_len x batch x hidden_size) tensor containing the output features (h_k) from the last layer of the RNN, for each k
|
||||
h_n | A (num_layers x batch x hidden_size) tensor containing the hidden state for k=seq_len
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight_ih_l[k] | the learnable input-hidden weights of the k-th layer, of shape (input_size x hidden_size)
|
||||
weight_hh_l[k] | the learnable hidden-hidden weights of the k-th layer, of shape (hidden_size x hidden_size)
|
||||
bias_ih_l[k] | the learnable input-hidden bias of the k-th layer, of shape (hidden_size)
|
||||
bias_hh_l[k] | the learnable hidden-hidden bias of the k-th layer, of shape (hidden_size)
|
||||
### LSTM
|
||||
|
||||
Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.
|
||||
|
||||
```python
|
||||
i_t = sigmoid(W_ii x_t + b_ii + W_hi h_(t-1) + b_hi)
|
||||
f_t = sigmoid(W_if x_t + b_if + W_hf h_(t-1) + b_hf)
|
||||
g_t = tanh(W_ig x_t + b_ig + W_hc h_(t-1) + b_hg)
|
||||
o_t = sigmoid(W_io x_t + b_io + W_ho h_(t-1) + b_ho)
|
||||
c_t = f_t * c_(t-1) + i_t * c_t
|
||||
h_t = o_t * tanh(c_t)
|
||||
```
|
||||
|
||||
```python
|
||||
rnn = nn.LSTM(10, 20, 2)
|
||||
input = Variable(torch.randn(5, 3, 10))
|
||||
h0 = Variable(torch.randn(2, 3, 20))
|
||||
c0 = Variable(torch.randn(2, 3, 20))
|
||||
output, hn = rnn(input, (h0, c0))
|
||||
```
|
||||
|
||||
|
||||
|
||||
For each element in the input sequence, each layer computes the following
|
||||
function:
|
||||
where `h_t` is the hidden state at time t, `c_t` is the cell state at time t,
|
||||
`x_t` is the hidden state of the previous layer at time t or input_t for the first layer,
|
||||
and `i_t`, `f_t`, `g_t`, `o_t` are the input, forget, cell, and out gates, respectively.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input_size | | The number of expected features in the input x
|
||||
hidden_size | | The number of features in the hidden state h
|
||||
num_layers | | the size of the convolving kernel.
|
||||
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
|
||||
batch_first | | If True, then the input tensor is provided as (batch, seq, feature)
|
||||
dropout | | If non-zero, introduces a dropout layer on the outputs of each RNN layer
|
||||
bidirectional | False | If True, becomes a bidirectional RNN.
|
||||
|
||||
#### Inputs
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input | | A (seq_len x batch x input_size) tensor containing the features of the input sequence.
|
||||
h_0 | | A (num_layers x batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
|
||||
c_0 | | A (num_layers x batch x hidden_size) tensor containing the initial cell state for each element in the batch.
|
||||
|
||||
#### Outputs
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
output | A (seq_len x batch x hidden_size) tensor containing the output features (h_t) from the last layer of the RNN, for each t
|
||||
h_n | A (num_layers x batch x hidden_size) tensor containing the hidden state for t=seq_len
|
||||
c_n | A (num_layers x batch x hidden_size) tensor containing the cell state for t=seq_len
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight_ih_l[k] | the learnable input-hidden weights of the k-th layer (W_ir|W_ii|W_in), of shape (input_size x 3*hidden_size)
|
||||
weight_hh_l[k] | the learnable hidden-hidden weights of the k-th layer (W_hr|W_hi|W_hn), of shape (hidden_size x 3*hidden_size)
|
||||
bias_ih_l[k] | the learnable input-hidden bias of the k-th layer (b_ir|b_ii|b_in), of shape (3*hidden_size)
|
||||
bias_hh_l[k] | the learnable hidden-hidden bias of the k-th layer (W_hr|W_hi|W_hn), of shape (3*hidden_size)
|
||||
### GRU
|
||||
|
||||
Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
|
||||
|
||||
```python
|
||||
r_t = sigmoid(W_ir x_t + b_ir + W_hr h_(t-1) + b_hr)
|
||||
i_t = sigmoid(W_ii x_t + b_ii + W_hi h_(t-1) + b_hi)
|
||||
n_t = tanh(W_in x_t + resetgate * W_hn h_(t-1))
|
||||
h_t = (1 - i_t) * n_t + i_t * h_(t-1)
|
||||
```
|
||||
|
||||
```python
|
||||
rnn = nn.GRU(10, 20, 2)
|
||||
input = Variable(torch.randn(5, 3, 10))
|
||||
h0 = Variable(torch.randn(2, 3, 20))
|
||||
output, hn = rnn(input, h0)
|
||||
```
|
||||
|
||||
|
||||
|
||||
For each element in the input sequence, each layer computes the following
|
||||
function:
|
||||
where `h_t` is the hidden state at time t, `x_t` is the hidden
|
||||
state of the previous layer at time t or input_t for the first layer,
|
||||
and `r_t`, `i_t`, `n_t` are the reset, input, and new gates, respectively.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input_size | | The number of expected features in the input x
|
||||
hidden_size | | The number of features in the hidden state h
|
||||
num_layers | | the size of the convolving kernel.
|
||||
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
|
||||
batch_first | | If True, then the input tensor is provided as (batch, seq, feature)
|
||||
dropout | | If non-zero, introduces a dropout layer on the outputs of each RNN layer
|
||||
bidirectional | False | If True, becomes a bidirectional RNN.
|
||||
|
||||
#### Inputs
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input | | A (seq_len x batch x input_size) tensor containing the features of the input sequence.
|
||||
h_0 | | A (num_layers x batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
|
||||
|
||||
#### Outputs
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
output | A (seq_len x batch x hidden_size) tensor containing the output features (h_t) from the last layer of the RNN, for each t
|
||||
h_n | A (num_layers x batch x hidden_size) tensor containing the hidden state for t=seq_len
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight_ih_l[k] | the learnable input-hidden weights of the k-th layer (W_ir|W_ii|W_in), of shape (input_size x 3*hidden_size)
|
||||
weight_hh_l[k] | the learnable hidden-hidden weights of the k-th layer (W_hr|W_hi|W_hn), of shape (hidden_size x 3*hidden_size)
|
||||
bias_ih_l[k] | the learnable input-hidden bias of the k-th layer (b_ir|b_ii|b_in), of shape (3*hidden_size)
|
||||
bias_hh_l[k] | the learnable hidden-hidden bias of the k-th layer (W_hr|W_hi|W_hn), of shape (3*hidden_size)
|
||||
### RNNCell
|
||||
|
||||
An Elman RNN cell with tanh or ReLU non-linearity.
|
||||
|
||||
```python
|
||||
h' = tanh(w_ih * x + b_ih + w_hh * h + b_hh)
|
||||
```
|
||||
|
||||
```python
|
||||
rnn = nn.RNNCell(10, 20)
|
||||
input = Variable(torch.randn(6, 3, 10))
|
||||
hx = Variable(torch.randn(3, 20))
|
||||
output = []
|
||||
for i in range(6):
|
||||
hx = rnn(input, hx)
|
||||
output[i] = hx
|
||||
```
|
||||
|
||||
If nonlinearity='relu', then ReLU is used in place of tanh.
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input_size | | The number of expected features in the input x
|
||||
hidden_size | | The number of features in the hidden state h
|
||||
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
|
||||
nonlinearity | 'tanh' | The non-linearity to use ['tanh'|'relu'].
|
||||
|
||||
#### Inputs
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input | | A (batch x input_size) tensor containing input features
|
||||
hidden | | A (batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
|
||||
|
||||
#### Outputs
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
h' | A (batch x hidden_size) tensor containing the next hidden state for each element in the batch
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight_ih | the learnable input-hidden weights, of shape (input_size x hidden_size)
|
||||
weight_hh | the learnable hidden-hidden weights, of shape (hidden_size x hidden_size)
|
||||
bias_ih | the learnable input-hidden bias, of shape (hidden_size)
|
||||
bias_hh | the learnable hidden-hidden bias, of shape (hidden_size)
|
||||
### LSTMCell
|
||||
|
||||
A long short-term memory (LSTM) cell.
|
||||
|
||||
```python
|
||||
i = sigmoid(W_ii x + b_ii + W_hi h + b_hi)
|
||||
f = sigmoid(W_if x + b_if + W_hf h + b_hf)
|
||||
g = tanh(W_ig x + b_ig + W_hc h + b_hg)
|
||||
o = sigmoid(W_io x + b_io + W_ho h + b_ho)
|
||||
c' = f * c + i * c
|
||||
h' = o * tanh(c_t)
|
||||
```
|
||||
|
||||
```python
|
||||
rnn = nn.LSTMCell(10, 20)
|
||||
input = Variable(torch.randn(6, 3, 10))
|
||||
hx = Variable(torch.randn(3, 20))
|
||||
cx = Variable(torch.randn(3, 20))
|
||||
output = []
|
||||
for i in range(6):
|
||||
hx, cx = rnn(input, (hx, cx))
|
||||
output[i] = hx
|
||||
```
|
||||
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input_size | | The number of expected features in the input x
|
||||
hidden_size | | The number of features in the hidden state h
|
||||
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
|
||||
|
||||
#### Inputs
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input | | A (batch x input_size) tensor containing input features
|
||||
hidden | | A (batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
|
||||
|
||||
#### Outputs
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
h' | A (batch x hidden_size) tensor containing the next hidden state for each element in the batch
|
||||
c' | A (batch x hidden_size) tensor containing the next cell state for each element in the batch
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight_ih | the learnable input-hidden weights, of shape (input_size x hidden_size)
|
||||
weight_hh | the learnable hidden-hidden weights, of shape (hidden_size x hidden_size)
|
||||
bias_ih | the learnable input-hidden bias, of shape (hidden_size)
|
||||
bias_hh | the learnable hidden-hidden bias, of shape (hidden_size)
|
||||
### GRUCell
|
||||
|
||||
A gated recurrent unit (GRU) cell
|
||||
|
||||
```python
|
||||
r = sigmoid(W_ir x + b_ir + W_hr h + b_hr)
|
||||
i = sigmoid(W_ii x + b_ii + W_hi h + b_hi)
|
||||
n = tanh(W_in x + resetgate * W_hn h)
|
||||
h' = (1 - i) * n + i * h
|
||||
```
|
||||
|
||||
```python
|
||||
rnn = nn.RNNCell(10, 20)
|
||||
input = Variable(torch.randn(6, 3, 10))
|
||||
hx = Variable(torch.randn(3, 20))
|
||||
output = []
|
||||
for i in range(6):
|
||||
hx = rnn(input, hx)
|
||||
output[i] = hx
|
||||
```
|
||||
|
||||
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input_size | | The number of expected features in the input x
|
||||
hidden_size | | The number of features in the hidden state h
|
||||
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
|
||||
|
||||
#### Inputs
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
input | | A (batch x input_size) tensor containing input features
|
||||
hidden | | A (batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
|
||||
|
||||
#### Outputs
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
h' | A (batch x hidden_size) tensor containing the next hidden state for each element in the batch
|
||||
|
||||
#### Members
|
||||
|
||||
Parameter | Description
|
||||
--------- | -----------
|
||||
weight_ih | the learnable input-hidden weights, of shape (input_size x hidden_size)
|
||||
weight_hh | the learnable hidden-hidden weights, of shape (hidden_size x hidden_size)
|
||||
bias_ih | the learnable input-hidden bias, of shape (hidden_size)
|
||||
bias_hh | the learnable hidden-hidden bias, of shape (hidden_size)
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
## Sparse layers
|
||||
### Embedding
|
||||
|
||||
A simple lookup table that stores embeddings of a fixed dictionary and size
|
||||
|
||||
```python
|
||||
# an Embedding module containing 10 tensors of size 3
|
||||
embedding = nn.Embedding(10, 3)
|
||||
# a batch of 2 samples of 4 indices each
|
||||
input = torch.LongTensor([[1,2,4,5],[4,3,2,9]])
|
||||
print(embedding(input))
|
||||
# example with padding_idx
|
||||
embedding = nn.Embedding(10, 3, padding_idx=0)
|
||||
input = torch.LongTensor([[0,2,0,5]])
|
||||
print(embedding(input))
|
||||
```
|
||||
|
||||
This module is often used to store word embeddings and retrieve them using indices.
|
||||
The input to the module is a list of indices, and the output is the corresponding
|
||||
word embeddings.
|
||||
|
||||
#### Constructor Arguments
|
||||
|
||||
Parameter | Default | Description
|
||||
--------- | ------- | -----------
|
||||
num_embeddings | | size of the dictionary of embeddings
|
||||
embedding_dim | | the size of each embedding vector
|
||||
padding_idx | None | If given, pads the output with zeros whenever it encounters the index.
|
||||
max_norm | None | If given, will renormalize the embeddings to always have a norm lesser than this
|
||||
norm_type | | The p of the p-norm to compute for the max_norm option
|
||||
scale_grad_by_freq | | if given, this will scale gradients by the frequency of the words in the dictionary.
|
||||
|
||||
#### Expected Shape
|
||||
| Shape | Description
|
||||
------ | ----- | ------------
|
||||
input | [ *, * ] | Input is a 2D mini_batch LongTensor of m x n indices to extract from the Embedding dictionary
|
||||
output | [ * , *, * ] | Output shape = m x n x embedding_dim
|
||||
|
|
@ -1,114 +0,0 @@
|
|||
# torch.optim
|
||||
|
||||
The Optim package in Torch is targeted for one to optimize their neural networks
|
||||
using a wide variety of optimization methods such as SGD, Adam etc.
|
||||
|
||||
Currently, the following optimization methods are supported, typically with
|
||||
options such as weight decay and other bells and whistles.
|
||||
|
||||
- SGD `(params, lr=required, momentum=0, dampening=0)`
|
||||
- AdaDelta `(params, rho=0.9, eps=1e-6, weight_decay=0)`
|
||||
- Adagrad `(params, lr=1e-2, lr_decay=0, weight_decay=0)`
|
||||
- Adam `(params, lr=1e-2, betas=(0.9, 0.999), epsilon=1e-8, weight_decay=0)`
|
||||
- AdaMax `(params, lr=1e-2, betas=(0.9, 0.999), eps=1e-38, weight_decay=0)`
|
||||
- Averaged SGD `(params, lr=1e-2, lambd=1e-4, alpha=0.75, t0=1e6, weight_decay=0)`
|
||||
- RProp `(params, lr=1e-2, etas=(0.5, 1.2), step_sizes=(1e-6, 50))`
|
||||
- RMSProp `(params, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0)`
|
||||
|
||||
|
||||
The usage of the Optim package itself is as follows.
|
||||
|
||||
1. Construct an optimizer
|
||||
2. Use `optimizer.step(...)` to optimize.
|
||||
- Call `optimizer.zero_grad()` to zero out the gradient buffers when appropriate
|
||||
|
||||
## 1. Constructing the optimizer
|
||||
|
||||
One first constructs an `Optimizer` object by giving it a list of parameters
|
||||
to optimize, as well as the optimizer options,such as learning rate, weight decay, etc.
|
||||
|
||||
Examples:
|
||||
|
||||
`optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)`
|
||||
|
||||
`optimizer = optim.Adam([var1, var2], lr = 0.0001)`
|
||||
|
||||
### Per-parameter options
|
||||
|
||||
In a more advanced usage, one can specify per-layer options by passing each parameter group along with it's custom options.
|
||||
|
||||
**__Any parameter group that does not have an attribute defined will use the default attributes.__**
|
||||
|
||||
This is very useful when one wants to specify per-layer learning rates for example.
|
||||
|
||||
Example:
|
||||
|
||||
`optim.SGD([{'params': model1.parameters()}, {'params': model2.parameters(), 'lr': 1e-3}, lr=1e-2, momentum=0.9)`
|
||||
|
||||
`model1`'s parameters will use the default learning rate of `1e-2` and momentum of `0.9`
|
||||
`model2`'s parameters will use a learning rate of `1e-3`, and the default momentum of `0.9`
|
||||
|
||||
Then, you can use the optimizer by calling `optimizer.zero_grad()` and `optimizer.step(...)`. Read the next sections.
|
||||
|
||||
## 2. Taking an optimization step using `Optimizer.step(...)`
|
||||
|
||||
The step function has the following two signatures:
|
||||
|
||||
### a. `Optimizer.step(closure)`
|
||||
|
||||
The `step` function takes a user-defined closure that computes f(x) and returns the loss.
|
||||
|
||||
The closure needs to do the following:
|
||||
- Optimizer.zero_grad()
|
||||
- Compute the loss
|
||||
- Call loss.backward()
|
||||
- return the loss
|
||||
|
||||
Example 1: training a neural network
|
||||
|
||||
```python
|
||||
# Example 1: training a neural network with optimizer.step(closure)
|
||||
net = MNISTNet()
|
||||
criterion = ClassNLLLoss()
|
||||
optimizer = optim.SGD(net.parameters(), lr=0.001)
|
||||
|
||||
for data in data_batches:
|
||||
input, target = data
|
||||
def closure():
|
||||
optimizer.zero_grad()
|
||||
output = net(input)
|
||||
loss = criterion(output, target)
|
||||
loss.backward()
|
||||
return loss
|
||||
optimizer.step(closure)
|
||||
```
|
||||
|
||||
Notes: Why is this required? Why cant we simply have the optimizer take the parameters and grads?
|
||||
Some optimization algorithms such as Conjugate Gradient and LBFGS need to evaluate their function
|
||||
multiple times. For such optimization methods, the function (i.e. the closure) has to be defined.
|
||||
|
||||
|
||||
### b. `Optimizer.step()`
|
||||
|
||||
This is a simplified usage that supports most, but not all optimization algorithms. For example, it does not support LBFGS or Conjugate Gradient.
|
||||
|
||||
The usage for this is to simply call the function after the backward() is called on your model.
|
||||
|
||||
Example 2: training a neural network
|
||||
|
||||
```python
|
||||
# Example 2: training a neural network with optimizer.step()
|
||||
net = MNISTNet()
|
||||
criterion = ClassNLLLoss()
|
||||
optimizer = optim.SGD(net.parameters(), lr=0.001)
|
||||
|
||||
for data in data_batches:
|
||||
input, target = data
|
||||
optimizer.zero_grad()
|
||||
output = net(input)
|
||||
loss = criterion(output, target)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
```
|
||||
|
||||
|
||||
|
|
@ -1,417 +0,0 @@
|
|||
# Tensors
|
||||
|
||||
A `Tensor` is a potentially multi-dimensional matrix.
|
||||
The number of dimensions is unlimited.
|
||||
|
||||
The `Tensor` set of classes are probably the most important class in
|
||||
`torch`. Almost every package depends on these classes. They are *__the__*
|
||||
class for handling numeric data. As with pretty much anything in
|
||||
[torch], tensors are serializable with `torch.save` and `torch.load`
|
||||
|
||||
There are 7 Tensor classes in torch:
|
||||
|
||||
- `torch.FloatTensor` : Signed 32-bit floating point tensor
|
||||
- `torch.DoubleTensor` : Signed 64-bit floating point tensor
|
||||
- `torch.ByteTensor` : Signed 8-bit integer tensor
|
||||
- `torch.CharTensor` : Unsigned 8-bit integer tensor
|
||||
- `torch.ShortTensor` : Signed 16-bit integer tensor
|
||||
- `torch.IntTensor` : Signed 32-bit integer tensor
|
||||
- `torch.LongTensor` : Signed 64-bit integer tensor
|
||||
|
||||
The data in these tensors lives on the system memory connected to your CPU.
|
||||
|
||||
Most numeric operations are implemented _only_ for `FloatTensor` and `DoubleTensor`.
|
||||
Other Tensor types are useful if you want to save memory space or specifically
|
||||
do integer operations.
|
||||
|
||||
The number of dimensions of a `Tensor` can be queried by
|
||||
`ndimension()` or `dim()`. Size of the `i-th` dimension is
|
||||
returned by `size(i)`. A tuple containing the size of all the dimensions
|
||||
can be returned by `size()`.
|
||||
|
||||
```python
|
||||
import torch
|
||||
|
||||
# allocate a matrix of shape 3x4
|
||||
a = torch.FloatTensor(3, 4)
|
||||
print(a)
|
||||
|
||||
# convert this into a LongTensor
|
||||
b = a.long()
|
||||
print(b)
|
||||
|
||||
# print the size of the tensor
|
||||
print(a.size())
|
||||
|
||||
# print the number of dimensions
|
||||
print(a.dim())
|
||||
```
|
||||
|
||||
These tensors can be converted to numpy arrays very efficiently
|
||||
with zero memory copies.
|
||||
For this, the two provided functions are `.numpy()` and `torch.from_numpy()`
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
# convert to numpy
|
||||
c = a.numpy()
|
||||
print(type(c))
|
||||
```
|
||||
|
||||
When using GPUs, each of the classes above has an equivalent
|
||||
class such as: `torch.cuda.FloatTensor`, `torch.cuda.LongTensor`, etc.
|
||||
When one allocates a CUDA tensor, the data in these tensors lives in the
|
||||
GPU memory.
|
||||
|
||||
One can seamlessly transfer a tensor from the CPU to the GPU, as well as
|
||||
between different GPUs on your machine.
|
||||
|
||||
Apart from the above 7 tensor types, there is one additional tensor type on the GPU
|
||||
|
||||
- `torch.cuda.HalfTensor` : Signed 16-bit floating point tensor
|
||||
|
||||
```python
|
||||
import torch.cuda
|
||||
|
||||
# allocate a matrix of shape 3x4
|
||||
a = torch.cuda.FloatTensor(3, 4)
|
||||
print(a)
|
||||
|
||||
# transfer this to the CPU
|
||||
b = a.cpu()
|
||||
print(b)
|
||||
|
||||
# transfer this back to the GPU-1
|
||||
a = b.cuda()
|
||||
print(a)
|
||||
|
||||
# transfer this to GPU-2
|
||||
b = a.cuda(1)
|
||||
```
|
||||
|
||||
## Internal data representation
|
||||
|
||||
The actual data of a `Tensor` is contained into a
|
||||
`Storage`. It can be accessed using
|
||||
`storage()`. While the memory of a
|
||||
`Tensor` has to be contained in this unique `Storage`, it might
|
||||
not be contiguous: the first position used in the `Storage` is given
|
||||
by `storage_offset()` (starting at `0`).
|
||||
And the _jump_ needed to go from one element to another
|
||||
element in the `i-th` dimension is given by
|
||||
`stride(i-1)`. See the code example for an illustration.
|
||||
|
||||
```python
|
||||
# given a 3d tensor
|
||||
x = torch.FloatTensor(7,7,7)
|
||||
|
||||
# accessing the element `(3,4,5)` can be done by
|
||||
x[3 - 1][4 - 1][5 - 1]
|
||||
# or equivalently (but slowly!)
|
||||
x.storage()[x.storageOffset()
|
||||
+ (3 - 1) * x.stride(0)
|
||||
+ (4 - 1) * x.stride(1)
|
||||
+ (5 - 1) * x.stride(2)]
|
||||
```
|
||||
|
||||
One could say that a `Tensor` is a particular way of _viewing_ a
|
||||
`Storage`: a `Storage` only represents a chunk of memory, while the
|
||||
`Tensor` interprets this chunk of memory as having dimensions:
|
||||
|
||||
```python
|
||||
# a tensor interprets a chunk of memory as having dimensions
|
||||
>>> x = torch.Tensor(4,5)
|
||||
>>> s = x.storage()
|
||||
>>> for i in range(s.size()): # fill up the Storage
|
||||
>>> s[i] = i
|
||||
|
||||
# s is interpreted by x as a 2D matrix
|
||||
>>> print(x)
|
||||
|
||||
1 2 3 4 5
|
||||
6 7 8 9 10
|
||||
11 12 13 14 15
|
||||
16 17 18 19 20
|
||||
[torch.FloatTensor of dimension 4x5]
|
||||
```
|
||||
|
||||
Note also that in Torch7 ___elements in the same row___ [elements along the __last__ dimension]
|
||||
are contiguous in memory for a matrix [tensor]:
|
||||
|
||||
This is exactly like in `C` and `numpy` (and not `Fortran`).
|
||||
|
||||
## Default Tensor type
|
||||
|
||||
For convenience, _an alias_ `torch.Tensor` is provided, which allows the user to write
|
||||
type-independent scripts, which can then ran after choosing the desired Tensor type with
|
||||
a call like
|
||||
|
||||
`torch.set_default_tensor_type('torch.DoubleTensor')`
|
||||
|
||||
|
||||
By default, the alias points to `torch.FloatTensor`.
|
||||
|
||||
## Efficient memory management
|
||||
|
||||
_All_ tensor operations post-fixed with an underscore (for example `.fill_`)
|
||||
do _not_ make any memory copy. All these methods transform the existing tensor.
|
||||
Tensor methods such as `narrow` and `select` return a new tensor referencing _the same storage_.
|
||||
This magical behavior is internally obtained by good usage of the `stride()` and
|
||||
`storage_offset()`. See the code example illustrating this.
|
||||
|
||||
```python
|
||||
>>> x = torch.Tensor(5).zero_()
|
||||
>>> print(x)
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
[torch.FloatTensor of dimension 5]
|
||||
>>> x.narrow(0, 1, 2).fill_(1)
|
||||
>>> # narrow() returns a Tensor referencing the same Storage as x
|
||||
>>> print(x)
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
[torch.FloatTensor of dimension 5]
|
||||
>>> # same thing can be achieved with slice indexing
|
||||
>>> x[1:3] = 2
|
||||
>>> print(x)
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
0
|
||||
[torch.FloatTensor of dimension 5]
|
||||
```
|
||||
|
||||
If you really need to copy a `Tensor`, you can use the `copy_()` method:
|
||||
|
||||
```python
|
||||
# making a copy of a tensor
|
||||
y = x.new(x.size()).copy_(x)
|
||||
y = x.clone()
|
||||
```
|
||||
Or the convenience method `clone()`
|
||||
|
||||
We now describe all the methods for `Tensor`. If you want to specify the Tensor type,
|
||||
just replace `Tensor` by the name of the Tensor variant (like `CharTensor`).
|
||||
|
||||
## Constructors ##
|
||||
|
||||
Tensor constructors, create new Tensor object, optionally, allocating
|
||||
new memory. By default the elements of a newly allocated memory are
|
||||
not initialized, therefore, might contain arbitrary numbers. Here are
|
||||
several ways to construct a new `Tensor`.
|
||||
|
||||
### torch.Tensor() ###
|
||||
|
||||
Returns an empty tensor.
|
||||
|
||||
### torch.Tensor(tensor) ###
|
||||
|
||||
Returns a new tensor which reference the same `Storage` than the given `tensor`.
|
||||
The `size`, `stride`, and `storage_offset` are the same than the given tensor.
|
||||
|
||||
The new `Tensor` is now going to "view" the same `storage`
|
||||
as the given `tensor`. As a result, any modification in the elements
|
||||
of the `Tensor` will have a impact on the elements of the given
|
||||
`tensor`, and vice-versa. No memory copy!
|
||||
|
||||
```python
|
||||
>>> x = torch.Tensor(2,5).fill_(3.14)
|
||||
>>> x
|
||||
3.1400 3.1400 3.1400 3.1400 3.1400
|
||||
3.1400 3.1400 3.1400 3.1400 3.1400
|
||||
[torch.FloatTensor of dimension 2x5]
|
||||
|
||||
>>> y = torch.Tensor(x)
|
||||
>>> y
|
||||
3.1400 3.1400 3.1400 3.1400 3.1400
|
||||
3.1400 3.1400 3.1400 3.1400 3.1400
|
||||
[torch.FloatTensor of dimension 2x5]
|
||||
|
||||
>>> y.zero_()
|
||||
>>> x # elements of x are the same as y!
|
||||
0 0 0 0 0
|
||||
0 0 0 0 0
|
||||
[torch.FloatTensor of dimension 2x5]
|
||||
```
|
||||
|
||||
### torch.Tensor(sz1 [,sz2 [,sz3 [,sz4 [,sz5 ...]]]]]) ###
|
||||
|
||||
Create a tensor of the given sizes.
|
||||
The tensor size will be `sz1 x sz2 x sx3 x sz4 x sz5 x ...`.
|
||||
|
||||
### torch.Tensor(sizes) ###
|
||||
|
||||
Create a tensor of any number of dimensions. `sizes` gives the size in each dimension of
|
||||
the tensor and is of type `torch.Size`.
|
||||
|
||||
```python
|
||||
Example, create a 4D 4x4x3x2 tensor:
|
||||
x = torch.Tensor(torch.Size([4,4,3,2]))
|
||||
```
|
||||
|
||||
### torch.Tensor(storage) ###
|
||||
|
||||
Returns a tensor which uses the existing `Storage` starting at a storage offset of 0.
|
||||
|
||||
### torch.Tensor(sequence) ###
|
||||
|
||||
One can create a tensor from a python sequence.
|
||||
|
||||
For example, you can create a `Tensor` from a `list` or a `tuple`
|
||||
|
||||
```python
|
||||
# create a 2d tensor from a list of lists
|
||||
>>> torch.Tensor([[1,2,3,4], [5,6,7,8]])
|
||||
1 2 3 4
|
||||
5 6 7 8
|
||||
[torch.FloatTensor of dimension 2x4]
|
||||
```
|
||||
|
||||
### torch.Tensor(ndarray) ###
|
||||
|
||||
Creates a `Tensor` from a NumPy `ndarray`.
|
||||
If the `dtype` of the `ndarray` is the same as the type of the `Tensor` being created,
|
||||
The underlying memory of both are shared, i.e. if the value of an element
|
||||
in the `ndarray` is changed, the corresponding value in the `Tensor` changes,
|
||||
and vice versa.
|
||||
|
||||
```python
|
||||
# create a ndarray of dtype=int64
|
||||
>>> a = np.random.randint(2, size=10)
|
||||
>>> a
|
||||
array([0, 0, 1, 1, 0, 1, 1, 0, 0, 0])
|
||||
# create a LongTensor. Since they are the same type (int64), the memory is shared
|
||||
>>> b = torch.LongTensor(a)
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
[torch.LongTensor of size 10]
|
||||
>>> b[3] = 100
|
||||
>>> print(a[3])
|
||||
100
|
||||
|
||||
# now create an IntTensor from the same ndarray.
|
||||
# The memory is not shared in this case as the dtype=int64 != IntTensor (int32)
|
||||
>>> b = torch.IntTensor(a)
|
||||
>>> b[3] = 30000
|
||||
>>> print(a[3])
|
||||
100
|
||||
# a did not change to the value 30000
|
||||
```
|
||||
|
||||
## NumPy Conversion ##
|
||||
### torch.from_numpy(ndarray)
|
||||
|
||||
This is a convenience function similar to the constructor above.
|
||||
Given a numpy `ndarray`, it constructs a torch `Tensor` of the same `dtype`
|
||||
as the numpy array.
|
||||
|
||||
For example, passing in an ndarray of dtype=float64 will create a torch.DoubleTensor
|
||||
|
||||
### Tensor.numpy()
|
||||
|
||||
This is a member function on a tensor that converts a torch `Tensor` to a
|
||||
numpy `ndarray`. The memory of the data of both objects is shared.
|
||||
Hence, changing a value in the `Tensor` will change the corresponding value in
|
||||
the `ndarray` and vice versa.
|
||||
|
||||
```python
|
||||
>>> a = torch.randn(3,4)
|
||||
>>> b = a.numpy() # creates a numpy array with dtype=float32 in this case
|
||||
>>> print(a)
|
||||
-1.0453 1.4730 -1.8990 -0.7763
|
||||
1.8155 1.4004 -1.5286 1.0420
|
||||
0.6551 1.0258 0.1152 -0.3239
|
||||
[torch.FloatTensor of size 3x4]
|
||||
>>> print(b)
|
||||
[[-1.04525673 1.4730444 -1.89899576 -0.77626842]
|
||||
[ 1.81549406 1.40035892 -1.5286355 1.04199517]
|
||||
[ 0.6551016 1.02575183 0.11520521 -0.32391372]]
|
||||
>>> a[2][2] = 1000
|
||||
>>> print(b)
|
||||
[[ -1.04525673e+00 1.47304440e+00 -1.89899576e+00 -7.76268423e-01]
|
||||
[ 1.81549406e+00 1.40035892e+00 -1.52863550e+00 1.04199517e+00]
|
||||
[ 6.55101597e-01 1.02575183e+00 1.00000000e+03 -3.23913723e-01]]
|
||||
# notice that b[2][2] has changed to the value 1000 too.
|
||||
```
|
||||
|
||||
### torch.is_tensor(obj)
|
||||
|
||||
Returns True if the passed-in object is a `Tensor` (of any type). Returns `False` otherwise.
|
||||
|
||||
### torch.is_storage(obj)
|
||||
|
||||
Returns True if the passed-in object is a `Storage` (of any type). Returns `False` otherwise.
|
||||
|
||||
### torch.expand_as
|
||||
### torch.expand
|
||||
### torch.view
|
||||
### torch.view_as
|
||||
### torch.permute
|
||||
### torch.pin_memory
|
||||
### copy
|
||||
### split
|
||||
### chunk
|
||||
### tolist
|
||||
### repeat
|
||||
### unsqueeze
|
||||
### unsqueeze_
|
||||
### add, iadd, sub, isub, mul, imul, matmul, div, rdiv, idiv, mod, neg
|
||||
|
||||
## GPU Semantics ##
|
||||
|
||||
When you create a `torch.cuda.*Tensor`, it is allocated on the current GPU.
|
||||
However, you could allocate it on another GPU as well, using the `with torch.cuda.device(id)` context.
|
||||
All allocations within this context will be placed on the GPU `id`.
|
||||
|
||||
Once `Tensor`s are allocated, you can do operations on them from any GPU context, and the results
|
||||
will be placed on the same device as where the source `Tensor` is located.
|
||||
|
||||
For example if Tensor `a` and `b` are on GPU-2, but the GPU-1 is the current device.
|
||||
If one does `c = a + b`, then `c` will be on GPU-2, regardless of what the current device is.
|
||||
|
||||
Cross-GPU operations are not allowed. The only Cross-GPU operation allowed is `copy`.
|
||||
|
||||
If `a` is on GPU-1 and `b` is on GPU-2, then `c = a + b` will result in an error.
|
||||
|
||||
See the example for more clarity on these semantics.
|
||||
|
||||
```python
|
||||
# Tensors are allocated on GPU 1 by default
|
||||
x = torch.cuda.FloatTensor(1)
|
||||
# x.get_device() == 0
|
||||
y = torch.FloatTensor(1).cuda()
|
||||
# y.get_device() == 0
|
||||
|
||||
with torch.cuda.device(1):
|
||||
# allocates a tensor on GPU 2
|
||||
a = torch.cuda.FloatTensor(1)
|
||||
|
||||
# transfers a tensor from CPU to GPU-2
|
||||
b = torch.FloatTensor(1).cuda()
|
||||
# a.get_device() == b.get_device() == 1
|
||||
|
||||
z = x + y
|
||||
# z.get_device() == 1
|
||||
|
||||
# even within a context, you can give a GPU id to the .cuda call
|
||||
c = torch.randn(2).cuda(2)
|
||||
# c.get_device() == 2
|
||||
|
||||
```
|
||||
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
# torch
|
||||
|
||||
```python
|
||||
# load torch with
|
||||
import torch
|
||||
```
|
||||
|
||||
```python
|
||||
# load the CUDA features of torch with
|
||||
import torch.cuda
|
||||
```
|
||||
|
||||
__torch__ is the main package where data structures for multi-dimensional
|
||||
tensors and mathematical operations over these are defined.
|
||||
Additionally, it provides many utilities for efficient serializing of
|
||||
Tensors and arbitrary types, and other useful utilities.
|
||||
|
||||
It has a CUDA counterpart, that enables you to run your tensor computations
|
||||
on an NVIDIA GPU with compute capability >= 2.0.
|
||||
|
||||
## Multi-core
|
||||
### torch.get_num_threads()
|
||||
|
||||
Gets the number of OpenMP threads that will be used for parallelizing CPU operations
|
||||
|
||||
### torch.set_num_threads(n)
|
||||
|
||||
Sets the number of OpenMP threads to use for parallelizing CPU operations
|
||||
|
||||
## Serialization
|
||||
### torch.save(object, file)
|
||||
This function pickles a Python object to the `file`. `file` is either a filename or a file handle.
|
||||
|
||||
`object` can be a picklable python object, including `torch` `Tensor`s, autograd `Variable`, nn `Module`s etc.
|
||||
|
||||
When a group of `torch` `Tensor`s are saved together, and if any of them share the same storages, then this sharing is preserved during saving and loading back.
|
||||
|
||||
|
||||
### torch.load(file)
|
||||
|
||||
This function unpickles objects that have been pickled with `torch.save`
|
||||
|
||||
## Random Numbers
|
||||
|
||||
### torch.get_rng_state()
|
||||
|
||||
Gets the current state of the torch Random Number Generator.
|
||||
|
||||
This can be passed in the future to `torch.set_rng_state` to restore the current RNG state.
|
||||
|
||||
### torch.set_rng_state(state)
|
||||
|
||||
Sets the current state of the torch Random Number Generator to the given `state`.
|
||||
|
||||
### torch.manual_seed(number)
|
||||
|
||||
Sets the initial seed of the random number generator to a given number.
|
||||
|
||||
### torch.initial_seed()
|
||||
|
||||
Returns the number that is the initial seed to the Random Number Generator
|
||||
|
||||
## CUDA
|
||||
### torch.cuda.is_available()
|
||||
|
||||
Returns `True` if CUDA is available and usable. Returns `False` otherwise.
|
||||
|
||||
### torch.cuda.device_count()
|
||||
|
||||
Returns the number of CUDA devices on the system.
|
||||
|
||||
### torch.cuda.current_device()
|
||||
|
||||
Returns the device index of the current default CUDA device.
|
||||
|
||||
### torch.cuda.synchronize()
|
||||
|
||||
This function issues a `cudaDeviceSynchronize` on the current device, and hence waits for all in-flight CUDA computation to finish.
|
||||
|
||||
### torch.cuda.current_stream()
|
||||
|
||||
Returns the handle to the current stream of the CUDA context.
|
||||
|
||||
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |