mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Refer to https://github.com/pytorch/pytorch/pull/25665, add `bitwise_and` operator. Benchmark script : ``` import timeit #for __and__ for n, t in [(10, 100000),(1000, 10000)]: print('__and__ (a.numel() == {}) for {} times'.format(n, t)) for device in ('cpu', 'cuda'): for dtype in ('torch.int8', 'torch.uint8', 'torch.int16', 'torch.int32', 'torch.int64'): print(f'device: {device}, dtype: {dtype}, {t} times', end='\t\t') print(timeit.timeit(f'a & b\nif "{device}" == "cuda": torch.cuda.synchronize()', setup=f'import torch; a = torch.randint(0, 10, ({n},), dtype = {dtype}, device="{device}"); b = torch.randint(0, 10, ({n},), dtype = {dtype}, device="{device}")', number=t)) #for __iand__ for n, t in [(10, 100000),(1000, 10000)]: print('__iand__ (a.numel() == {}) for {} times'.format(n, t)) for device in ('cpu', 'cuda'): for dtype in ('torch.int8', 'torch.uint8', 'torch.int16', 'torch.int32', 'torch.int64'): print(f'device: {device}, dtype: {dtype}, {t} times', end='\t\t') print(timeit.timeit(f'a & b\nif "{device}" == "cuda": torch.cuda.synchronize()', setup=f'import torch; a = torch.randint(0, 10, ({n},), dtype = {dtype}, device="{device}"); b = torch.tensor(5, dtype = {dtype}, device="{device}")', number=t)) ``` Device: **Tesla P100, skx-8180** Cuda verison: **9.0.176** Before: ``` __and__ (a.numel() == 10) for 100000 times device: cpu, dtype: torch.int8, 100000 times 0.1766007635742426 device: cpu, dtype: torch.uint8, 100000 times 0.17322628945112228 device: cpu, dtype: torch.int16, 100000 times 0.17650844901800156 device: cpu, dtype: torch.int32, 100000 times 0.17711848113685846 device: cpu, dtype: torch.int64, 100000 times 0.18240160401910543 device: cuda, dtype: torch.int8, 100000 times 1.273967768996954 device: cuda, dtype: torch.uint8, 100000 times 1.2778537990525365 device: cuda, dtype: torch.int16, 100000 times 1.2753686187788844 device: cuda, dtype: torch.int32, 100000 times 1.2797665279358625 device: cuda, dtype: torch.int64, 100000 times 1.2933144550770521 __and__ (a.numel() == 1000) for 10000 times device: cpu, dtype: torch.int8, 10000 times 0.031139614060521126 device: cpu, dtype: torch.uint8, 10000 times 0.03091452084481716 device: cpu, dtype: torch.int16, 10000 times 0.022756479680538177 device: cpu, dtype: torch.int32, 10000 times 0.025045674294233322 device: cpu, dtype: torch.int64, 10000 times 0.024164282716810703 device: cuda, dtype: torch.int8, 10000 times 0.12820732593536377 device: cuda, dtype: torch.uint8, 10000 times 0.12775669433176517 device: cuda, dtype: torch.int16, 10000 times 0.12697868794202805 device: cuda, dtype: torch.int32, 10000 times 0.12832533661276102 device: cuda, dtype: torch.int64, 10000 times 0.1280576130375266 __iand__ (a.numel() == 10) for 100000 times device: cpu, dtype: torch.int8, 100000 times 0.3687064303085208 device: cpu, dtype: torch.uint8, 100000 times 0.36253443732857704 device: cpu, dtype: torch.int16, 100000 times 0.362891579978168 device: cpu, dtype: torch.int32, 100000 times 0.37680106051266193 device: cpu, dtype: torch.int64, 100000 times 0.3689364707097411 device: cuda, dtype: torch.int8, 100000 times 1.419940729625523 device: cuda, dtype: torch.uint8, 100000 times 1.4247053815051913 device: cuda, dtype: torch.int16, 100000 times 1.4191444097086787 device: cuda, dtype: torch.int32, 100000 times 1.4305962566286325 device: cuda, dtype: torch.int64, 100000 times 1.4567416654899716 __iand__ (a.numel() == 1000) for 10000 times device: cpu, dtype: torch.int8, 10000 times 0.06224383972585201 device: cpu, dtype: torch.uint8, 10000 times 0.06205617543309927 device: cpu, dtype: torch.int16, 10000 times 0.05016433447599411 device: cpu, dtype: torch.int32, 10000 times 0.05216377507895231 device: cpu, dtype: torch.int64, 10000 times 0.06139362137764692 device: cuda, dtype: torch.int8, 10000 times 0.14827249851077795 device: cuda, dtype: torch.uint8, 10000 times 0.14801877550780773 device: cuda, dtype: torch.int16, 10000 times 0.14952312968671322 device: cuda, dtype: torch.int32, 10000 times 0.14999118447303772 device: cuda, dtype: torch.int64, 10000 times 0.14951884001493454 ``` After: ``` __and__ (a.numel() == 10) for 100000 times device: cpu, dtype: torch.int8, 100000 times 0.23157884553074837 device: cpu, dtype: torch.uint8, 100000 times 0.23063660878688097 device: cpu, dtype: torch.int16, 100000 times 0.23005440644919872 device: cpu, dtype: torch.int32, 100000 times 0.23748818412423134 device: cpu, dtype: torch.int64, 100000 times 0.24106105230748653 device: cuda, dtype: torch.int8, 100000 times 1.4394256137311459 device: cuda, dtype: torch.uint8, 100000 times 1.4436759827658534 device: cuda, dtype: torch.int16, 100000 times 1.4631587155163288 device: cuda, dtype: torch.int32, 100000 times 1.459101552143693 device: cuda, dtype: torch.int64, 100000 times 1.4784048134461045 __and__ (a.numel() == 1000) for 10000 times device: cpu, dtype: torch.int8, 10000 times 0.028442862443625927 device: cpu, dtype: torch.uint8, 10000 times 0.028130197897553444 device: cpu, dtype: torch.int16, 10000 times 0.025318274274468422 device: cpu, dtype: torch.int32, 10000 times 0.02519288007169962 device: cpu, dtype: torch.int64, 10000 times 0.028299466706812382 device: cuda, dtype: torch.int8, 10000 times 0.14342594426125288 device: cuda, dtype: torch.uint8, 10000 times 0.145280827768147 device: cuda, dtype: torch.int16, 10000 times 0.14673697855323553 device: cuda, dtype: torch.int32, 10000 times 0.14499565307050943 device: cuda, dtype: torch.int64, 10000 times 0.14582364354282618 __iand__ (a.numel() == 10) for 100000 times device: cpu, dtype: torch.int8, 100000 times 0.25548241566866636 device: cpu, dtype: torch.uint8, 100000 times 0.2552562616765499 device: cpu, dtype: torch.int16, 100000 times 0.25905191246420145 device: cpu, dtype: torch.int32, 100000 times 0.26635489892214537 device: cpu, dtype: torch.int64, 100000 times 0.26269810926169157 device: cuda, dtype: torch.int8, 100000 times 1.485458506271243 device: cuda, dtype: torch.uint8, 100000 times 1.4742380809038877 device: cuda, dtype: torch.int16, 100000 times 1.507783885113895 device: cuda, dtype: torch.int32, 100000 times 1.4926990242674947 device: cuda, dtype: torch.int64, 100000 times 1.519851053133607 __iand__ (a.numel() == 1000) for 10000 times device: cpu, dtype: torch.int8, 10000 times 0.03425929415971041 device: cpu, dtype: torch.uint8, 10000 times 0.03293587639927864 device: cpu, dtype: torch.int16, 10000 times 0.029559112153947353 device: cpu, dtype: torch.int32, 10000 times 0.030915481969714165 device: cpu, dtype: torch.int64, 10000 times 0.03292469773441553 device: cuda, dtype: torch.int8, 10000 times 0.15792148280888796 device: cuda, dtype: torch.uint8, 10000 times 0.16000914946198463 device: cuda, dtype: torch.int16, 10000 times 0.1600684942677617 device: cuda, dtype: torch.int32, 10000 times 0.16162546630948782 device: cuda, dtype: torch.int64, 10000 times 0.1629159888252616 ``` Fix https://github.com/pytorch/pytorch/issues/24508, https://github.com/pytorch/pytorch/issues/24509, https://github.com/pytorch/pytorch/issues/24655, https://github.com/pytorch/pytorch/issues/24656. Pull Request resolved: https://github.com/pytorch/pytorch/pull/31104 Differential Revision: D18938930 Pulled By: VitalyFedyunin fbshipit-source-id: a77e805a0b84e8ace16c6e648c2f67dad44f2e44
382 lines
9.8 KiB
ReStructuredText
382 lines
9.8 KiB
ReStructuredText
torch
|
|
===================================
|
|
.. automodule:: torch
|
|
|
|
Tensors
|
|
----------------------------------
|
|
.. autofunction:: is_tensor
|
|
.. autofunction:: is_storage
|
|
.. autofunction:: is_floating_point
|
|
.. autofunction:: set_default_dtype
|
|
.. autofunction:: get_default_dtype
|
|
.. autofunction:: set_default_tensor_type
|
|
.. autofunction:: numel
|
|
.. autofunction:: set_printoptions
|
|
.. autofunction:: set_flush_denormal
|
|
|
|
.. _tensor-creation-ops:
|
|
|
|
Creation Ops
|
|
~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
.. note::
|
|
Random sampling creation ops are listed under :ref:`random-sampling` and
|
|
include:
|
|
:func:`torch.rand`
|
|
:func:`torch.rand_like`
|
|
:func:`torch.randn`
|
|
:func:`torch.randn_like`
|
|
:func:`torch.randint`
|
|
:func:`torch.randint_like`
|
|
:func:`torch.randperm`
|
|
You may also use :func:`torch.empty` with the :ref:`inplace-random-sampling`
|
|
methods to create :class:`torch.Tensor` s with values sampled from a broader
|
|
range of distributions.
|
|
|
|
.. autofunction:: tensor
|
|
.. autofunction:: sparse_coo_tensor
|
|
.. autofunction:: as_tensor
|
|
.. autofunction:: as_strided
|
|
.. autofunction:: from_numpy
|
|
.. autofunction:: zeros
|
|
.. autofunction:: zeros_like
|
|
.. autofunction:: ones
|
|
.. autofunction:: ones_like
|
|
.. autofunction:: arange
|
|
.. autofunction:: range
|
|
.. autofunction:: linspace
|
|
.. autofunction:: logspace
|
|
.. autofunction:: eye
|
|
.. autofunction:: empty
|
|
.. autofunction:: empty_like
|
|
.. autofunction:: empty_strided
|
|
.. autofunction:: full
|
|
.. autofunction:: full_like
|
|
.. autofunction:: quantize_per_tensor
|
|
.. autofunction:: quantize_per_channel
|
|
|
|
Indexing, Slicing, Joining, Mutating Ops
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
.. autofunction:: cat
|
|
.. autofunction:: chunk
|
|
.. autofunction:: gather
|
|
.. autofunction:: index_select
|
|
.. autofunction:: masked_select
|
|
.. autofunction:: narrow
|
|
.. autofunction:: nonzero
|
|
.. autofunction:: reshape
|
|
.. autofunction:: split
|
|
.. autofunction:: squeeze
|
|
.. autofunction:: stack
|
|
.. autofunction:: t
|
|
.. autofunction:: take
|
|
.. autofunction:: transpose
|
|
.. autofunction:: unbind
|
|
.. autofunction:: unsqueeze
|
|
.. autofunction:: where
|
|
|
|
.. _generators:
|
|
|
|
Generators
|
|
----------------------------------
|
|
.. autoclass:: torch._C.Generator
|
|
:members:
|
|
|
|
.. _random-sampling:
|
|
|
|
Random sampling
|
|
----------------------------------
|
|
.. autofunction:: seed
|
|
.. autofunction:: manual_seed
|
|
.. autofunction:: initial_seed
|
|
.. autofunction:: get_rng_state
|
|
.. autofunction:: set_rng_state
|
|
.. autoattribute:: torch.default_generator
|
|
:annotation: Returns the default CPU torch.Generator
|
|
|
|
.. The following doesn't actually seem to exist.
|
|
https://github.com/pytorch/pytorch/issues/27780
|
|
.. autoattribute:: torch.cuda.default_generators
|
|
:annotation: If cuda is available, returns a tuple of default CUDA torch.Generator-s.
|
|
The number of CUDA torch.Generator-s returned is equal to the number of
|
|
GPUs available in the system.
|
|
.. autofunction:: bernoulli
|
|
.. autofunction:: multinomial
|
|
.. autofunction:: normal
|
|
.. autofunction:: poisson
|
|
.. autofunction:: rand
|
|
.. autofunction:: rand_like
|
|
.. autofunction:: randint
|
|
.. autofunction:: randint_like
|
|
.. autofunction:: randn
|
|
.. autofunction:: randn_like
|
|
.. autofunction:: randperm
|
|
|
|
.. _inplace-random-sampling:
|
|
|
|
In-place random sampling
|
|
~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
There are a few more in-place random sampling functions defined on Tensors as well. Click through to refer to their documentation:
|
|
|
|
- :func:`torch.Tensor.bernoulli_` - in-place version of :func:`torch.bernoulli`
|
|
- :func:`torch.Tensor.cauchy_` - numbers drawn from the Cauchy distribution
|
|
- :func:`torch.Tensor.exponential_` - numbers drawn from the exponential distribution
|
|
- :func:`torch.Tensor.geometric_` - elements drawn from the geometric distribution
|
|
- :func:`torch.Tensor.log_normal_` - samples from the log-normal distribution
|
|
- :func:`torch.Tensor.normal_` - in-place version of :func:`torch.normal`
|
|
- :func:`torch.Tensor.random_` - numbers sampled from the discrete uniform distribution
|
|
- :func:`torch.Tensor.uniform_` - numbers sampled from the continuous uniform distribution
|
|
|
|
Quasi-random sampling
|
|
~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
.. autoclass:: torch.quasirandom.SobolEngine
|
|
:members:
|
|
:exclude-members: MAXBIT, MAXDIM
|
|
:undoc-members:
|
|
|
|
Serialization
|
|
----------------------------------
|
|
.. autofunction:: save
|
|
.. autofunction:: load
|
|
|
|
|
|
Parallelism
|
|
----------------------------------
|
|
.. autofunction:: get_num_threads
|
|
.. autofunction:: set_num_threads
|
|
.. autofunction:: get_num_interop_threads
|
|
.. autofunction:: set_num_interop_threads
|
|
|
|
Locally disabling gradient computation
|
|
--------------------------------------
|
|
The context managers :func:`torch.no_grad`, :func:`torch.enable_grad`, and
|
|
:func:`torch.set_grad_enabled` are helpful for locally disabling and enabling
|
|
gradient computation. See :ref:`locally-disable-grad` for more details on
|
|
their usage. These context managers are thread local, so they won't
|
|
work if you send work to another thread using the ``threading`` module, etc.
|
|
|
|
Examples::
|
|
|
|
>>> x = torch.zeros(1, requires_grad=True)
|
|
>>> with torch.no_grad():
|
|
... y = x * 2
|
|
>>> y.requires_grad
|
|
False
|
|
|
|
>>> is_train = False
|
|
>>> with torch.set_grad_enabled(is_train):
|
|
... y = x * 2
|
|
>>> y.requires_grad
|
|
False
|
|
|
|
>>> torch.set_grad_enabled(True) # this can also be used as a function
|
|
>>> y = x * 2
|
|
>>> y.requires_grad
|
|
True
|
|
|
|
>>> torch.set_grad_enabled(False)
|
|
>>> y = x * 2
|
|
>>> y.requires_grad
|
|
False
|
|
|
|
|
|
Math operations
|
|
----------------------------------
|
|
|
|
Pointwise Ops
|
|
~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
.. autofunction:: abs
|
|
.. autofunction:: acos
|
|
.. autofunction:: add
|
|
.. autofunction:: addcdiv
|
|
.. autofunction:: addcmul
|
|
.. autofunction:: angle
|
|
.. autofunction:: asin
|
|
.. autofunction:: atan
|
|
.. autofunction:: atan2
|
|
.. autofunction:: bitwise_not
|
|
.. autofunction:: bitwise_and
|
|
.. autofunction:: bitwise_xor
|
|
.. autofunction:: ceil
|
|
.. autofunction:: clamp
|
|
.. autofunction:: conj
|
|
.. autofunction:: cos
|
|
.. autofunction:: cosh
|
|
.. autofunction:: div
|
|
.. autofunction:: digamma
|
|
.. autofunction:: erf
|
|
.. autofunction:: erfc
|
|
.. autofunction:: erfinv
|
|
.. autofunction:: exp
|
|
.. autofunction:: expm1
|
|
.. autofunction:: floor
|
|
.. autofunction:: floor_divide
|
|
.. autofunction:: fmod
|
|
.. autofunction:: frac
|
|
.. autofunction:: imag
|
|
.. autofunction:: lerp
|
|
.. autofunction:: lgamma
|
|
.. autofunction:: log
|
|
.. autofunction:: log10
|
|
.. autofunction:: log1p
|
|
.. autofunction:: log2
|
|
.. autofunction:: logical_and
|
|
.. autofunction:: logical_not
|
|
.. autofunction:: logical_or
|
|
.. autofunction:: logical_xor
|
|
.. autofunction:: mul
|
|
.. autofunction:: mvlgamma
|
|
.. autofunction:: neg
|
|
.. autofunction:: polygamma
|
|
.. autofunction:: pow
|
|
.. autofunction:: real
|
|
.. autofunction:: reciprocal
|
|
.. autofunction:: remainder
|
|
.. autofunction:: round
|
|
.. autofunction:: rsqrt
|
|
.. autofunction:: sigmoid
|
|
.. autofunction:: sign
|
|
.. autofunction:: sin
|
|
.. autofunction:: sinh
|
|
.. autofunction:: sqrt
|
|
.. autofunction:: square
|
|
.. autofunction:: tan
|
|
.. autofunction:: tanh
|
|
.. autofunction:: trunc
|
|
|
|
|
|
Reduction Ops
|
|
~~~~~~~~~~~~~~~~~~~~~~
|
|
.. autofunction:: argmax
|
|
.. autofunction:: argmin
|
|
.. autofunction:: dist
|
|
.. autofunction:: logsumexp
|
|
.. autofunction:: mean
|
|
.. autofunction:: median
|
|
.. autofunction:: mode
|
|
.. autofunction:: norm
|
|
.. autofunction:: prod
|
|
.. autofunction:: std
|
|
.. autofunction:: std_mean
|
|
.. autofunction:: sum
|
|
.. autofunction:: unique
|
|
.. autofunction:: unique_consecutive
|
|
.. autofunction:: var
|
|
.. autofunction:: var_mean
|
|
|
|
|
|
Comparison Ops
|
|
~~~~~~~~~~~~~~~~~~~~~~
|
|
.. autofunction:: allclose
|
|
.. autofunction:: argsort
|
|
.. autofunction:: eq
|
|
.. autofunction:: equal
|
|
.. autofunction:: ge
|
|
.. autofunction:: gt
|
|
.. autofunction:: isfinite
|
|
.. autofunction:: isinf
|
|
.. autofunction:: isnan
|
|
.. autofunction:: kthvalue
|
|
.. autofunction:: le
|
|
.. autofunction:: lt
|
|
.. autofunction:: max
|
|
.. autofunction:: min
|
|
.. autofunction:: ne
|
|
.. autofunction:: sort
|
|
.. autofunction:: topk
|
|
|
|
|
|
Spectral Ops
|
|
~~~~~~~~~~~~~~~~~~~~~~
|
|
.. autofunction:: fft
|
|
.. autofunction:: ifft
|
|
.. autofunction:: rfft
|
|
.. autofunction:: irfft
|
|
.. autofunction:: stft
|
|
.. autofunction:: bartlett_window
|
|
.. autofunction:: blackman_window
|
|
.. autofunction:: hamming_window
|
|
.. autofunction:: hann_window
|
|
|
|
|
|
Other Operations
|
|
~~~~~~~~~~~~~~~~~~~~~~
|
|
.. autofunction:: bincount
|
|
.. autofunction:: broadcast_tensors
|
|
.. autofunction:: cartesian_prod
|
|
.. autofunction:: cdist
|
|
.. autofunction:: combinations
|
|
.. autofunction:: cross
|
|
.. autofunction:: cumprod
|
|
.. autofunction:: cumsum
|
|
.. autofunction:: diag
|
|
.. autofunction:: diag_embed
|
|
.. autofunction:: diagflat
|
|
.. autofunction:: diagonal
|
|
.. autofunction:: einsum
|
|
.. autofunction:: flatten
|
|
.. autofunction:: flip
|
|
.. autofunction:: rot90
|
|
.. autofunction:: histc
|
|
.. autofunction:: meshgrid
|
|
.. autofunction:: renorm
|
|
.. autofunction:: repeat_interleave
|
|
.. autofunction:: roll
|
|
.. autofunction:: tensordot
|
|
.. autofunction:: trace
|
|
.. autofunction:: tril
|
|
.. autofunction:: tril_indices
|
|
.. autofunction:: triu
|
|
.. autofunction:: triu_indices
|
|
|
|
|
|
BLAS and LAPACK Operations
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
.. autofunction:: addbmm
|
|
.. autofunction:: addmm
|
|
.. autofunction:: addmv
|
|
.. autofunction:: addr
|
|
.. autofunction:: baddbmm
|
|
.. autofunction:: bmm
|
|
.. autofunction:: chain_matmul
|
|
.. autofunction:: cholesky
|
|
.. autofunction:: cholesky_inverse
|
|
.. autofunction:: cholesky_solve
|
|
.. autofunction:: dot
|
|
.. autofunction:: eig
|
|
.. autofunction:: geqrf
|
|
.. autofunction:: ger
|
|
.. autofunction:: inverse
|
|
.. autofunction:: det
|
|
.. autofunction:: logdet
|
|
.. autofunction:: slogdet
|
|
.. autofunction:: lstsq
|
|
.. autofunction:: lu
|
|
.. autofunction:: lu_solve
|
|
.. autofunction:: lu_unpack
|
|
.. autofunction:: matmul
|
|
.. autofunction:: matrix_power
|
|
.. autofunction:: matrix_rank
|
|
.. autofunction:: mm
|
|
.. autofunction:: mv
|
|
.. autofunction:: orgqr
|
|
.. autofunction:: ormqr
|
|
.. autofunction:: pinverse
|
|
.. autofunction:: qr
|
|
.. autofunction:: solve
|
|
.. autofunction:: svd
|
|
.. autofunction:: symeig
|
|
.. autofunction:: trapz
|
|
.. autofunction:: triangular_solve
|
|
|
|
Utilities
|
|
----------------------------------
|
|
.. autofunction:: compiled_with_cxx11_abi
|
|
.. autofunction:: result_type
|
|
.. autofunction:: can_cast
|
|
.. autofunction:: promote_types
|