mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Fix typos (#30606)
Summary: Should be non-semantic. Uses https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines to find likely typos. Pull Request resolved: https://github.com/pytorch/pytorch/pull/30606 Differential Revision: D18763028 Pulled By: mrshenli fbshipit-source-id: 896515a2156d062653408852e6c04b429fc5955c
This commit is contained in:
parent
0bebfe2143
commit
e7fe64f6a6
|
|
@ -186,7 +186,7 @@ if [ "$(uname)" == "Linux" ]; then
|
|||
build_args+=("USE_REDIS=ON")
|
||||
fi
|
||||
|
||||
# Use a speciallized onnx namespace in CI to catch hardcoded onnx namespace
|
||||
# Use a specialized onnx namespace in CI to catch hardcoded onnx namespace
|
||||
build_args+=("ONNX_NAMESPACE=ONNX_NAMESPACE_FOR_C2_CI")
|
||||
|
||||
###############################################################################
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ constexpr uint32_t LMASK = 0x7fffffff;
|
|||
* Time to get 100000000 std::mt19937 randoms with std::uniform_real_distribution = 0.352087s
|
||||
* Time to get 100000000 std::mt19937 randoms with at::uniform_real_distribution = 0.419454s
|
||||
*
|
||||
* std::mt19937 is faster when used in conjuction with std::uniform_real_distribution,
|
||||
* std::mt19937 is faster when used in conjunction with std::uniform_real_distribution,
|
||||
* however we can't use std::uniform_real_distribution because of this bug:
|
||||
* http://open-std.org/JTC1/SC22/WG21/docs/lwg-active.html#2524. Plus, even if we used
|
||||
* std::uniform_real_distribution and filtered out the 1's, it is a different algorithm
|
||||
|
|
|
|||
|
|
@ -122,10 +122,10 @@ public:
|
|||
uint32_t nlo = static_cast<uint32_t>(n);
|
||||
uint32_t nhi = static_cast<uint32_t>(n >> 32);
|
||||
counter[0] += nlo;
|
||||
// if overflow in x has occured, carry over to nhi
|
||||
// if overflow in x has occurred, carry over to nhi
|
||||
if (counter[0] < nlo) {
|
||||
nhi++;
|
||||
// if overflow in nhi has occured during carry over,
|
||||
// if overflow in nhi has occurred during carry over,
|
||||
// propagate that overflow to y and exit to increment z
|
||||
// otherwise return
|
||||
counter[1] += nhi;
|
||||
|
|
@ -135,7 +135,7 @@ public:
|
|||
}
|
||||
}
|
||||
} else {
|
||||
// if overflow in y has occured during addition,
|
||||
// if overflow in y has occurred during addition,
|
||||
// exit to increment z
|
||||
// otherwise return
|
||||
counter[1] += nhi;
|
||||
|
|
|
|||
|
|
@ -344,7 +344,7 @@ public:
|
|||
// its cache each time the kernel is looked up from the dispatch table.
|
||||
// A lambda with a capture would be global and share its capture between all kernel lookups.
|
||||
// So, instead of making users having to think about it (including the thread-safety
|
||||
// issues this causes), let's just forbid stateful lambdas alltogether.
|
||||
// issues this causes), let's just forbid stateful lambdas altogether.
|
||||
static_assert(guts::is_stateless_lambda<guts::decay_t<Lambda>>::value, "The kernel(x) API for registering a kernel only works for stateless lambdas (i.e. lambdas without captures). If you need a cache, please use the functor based API kernel<Functor>() instead.");
|
||||
|
||||
return std::move(*this).kernel(
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ typedef struct {
|
|||
|
||||
/*!
|
||||
* \brief C Tensor object, manage memory of DLTensor. This data structure is
|
||||
* intended to faciliate the borrowing of DLTensor by another framework. It is
|
||||
* intended to facilitate the borrowing of DLTensor by another framework. It is
|
||||
* not meant to transfer the tensor. When the borrowing framework doesn't need
|
||||
* the tensor, it should call the deleter to notify the host that the resource
|
||||
* is no longer needed.
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ Tensor einsum(std::string eqn, TensorList tensors) {
|
|||
// The internal representation of the left hand side fo the equation (with ellipsis expanded) is stored in input_op_idxes.
|
||||
// For each operand, we have a vector mapping each dimension to an internal index.
|
||||
// We also keep track of the number of occurrences for each letter (to infer a right hand side if not given) and
|
||||
// of the last occurence of each index.
|
||||
// of the last occurrence of each index.
|
||||
std::vector<std::vector<int64_t>> input_op_idxes; // the parsed operand indices
|
||||
std::array<std::int64_t, number_of_letters> num_letter_occurrences; // number of occurrence in the equation of this letter
|
||||
num_letter_occurrences.fill(0);
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
#include <ATen/core/EnableNamedTensor.h>
|
||||
|
||||
// TensorIterator is a helper class for element-wise operations, such as
|
||||
// arithmetic, comparisions, and trigonometric functions. It handles
|
||||
// arithmetic, comparisons, and trigonometric functions. It handles
|
||||
// broadcasting and type conversions of operands.
|
||||
//
|
||||
// This is inspired by NumPy's Array Iterator API (NpyIter).
|
||||
|
|
|
|||
|
|
@ -290,7 +290,7 @@ Tensor diagonal(const Tensor& self, int64_t offset, int64_t dim1_, int64_t dim2_
|
|||
storage_offset -= offset * self.stride(dim1);
|
||||
}
|
||||
|
||||
// construct new size and stride: we drop dim1 and dim2 (maximum first for not changing the index of the minumum)
|
||||
// construct new size and stride: we drop dim1 and dim2 (maximum first for not changing the index of the minimum)
|
||||
// the new ("joint") dimension is appended to the end of the shape / stride to match numpy semantics
|
||||
auto sizes = self.sizes().vec();
|
||||
auto strides = self.strides().vec();
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ namespace native {
|
|||
// YOU ARE NOT OBLIGED TO USE THESE HELPERS---if you're writing something more specialized, please don't try to make
|
||||
// them work for your case, but just write something new instead. Here we use helper functions instead of a flat fat
|
||||
// macro that implements everything, because the former allows some simple preprocessing that are unique to some
|
||||
// operators (more is forseeable) and is more flexible and elegant than the latter.
|
||||
// operators (more is foreseeable) and is more flexible and elegant than the latter.
|
||||
template <typename Stub>
|
||||
static inline Tensor& unary_op_impl_out(Tensor& result, const Tensor& self, Stub& stub) {
|
||||
auto iter = TensorIterator::unary_op(result, self,
|
||||
|
|
|
|||
|
|
@ -260,7 +260,7 @@ inline v8sf exp256_ps(v8sf x) {
|
|||
|
||||
tmp = _mm256_floor_ps(fx);
|
||||
|
||||
/* if greater, substract 1 */
|
||||
/* if greater, subtract 1 */
|
||||
//v8sf mask = _mm256_cmpgt_ps(tmp, fx);
|
||||
v8sf mask = _mm256_cmp_ps(tmp, fx, _CMP_GT_OS);
|
||||
mask = _mm256_and_ps(mask, one);
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
// The gpu_kernel_with_scalars generates specializations that support a
|
||||
// single scalar CPU argument, such as from `cuda_tensor + 5`. The CPU scalar
|
||||
// is lifted to a kernel paramter instead of copying to device memory.
|
||||
// This should be used in conjuction with TensorIterator::allow_cpu_scalars_,
|
||||
// This should be used in conjunction with TensorIterator::allow_cpu_scalars_,
|
||||
// which is the default for TensorIterator::binary_op. Otherwise, all inputs
|
||||
// and the output must be on the GPU.
|
||||
//
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
// 1. Graves et al: http://www.cs.toronto.edu/~graves/icml_2006.pdf
|
||||
// We use the equations from above link, but note that [1] has 1-based indexing and we (of course) use 0-based.
|
||||
// Graves et al call the probabilities y, we use log_probs (also calling them inputs)
|
||||
// A few optimizations (simmilar to those here, but also some I didn't take) are described in
|
||||
// A few optimizations (similar to those here, but also some I didn't take) are described in
|
||||
// 2. Minmin Sun: http://on-demand.gputechconf.com/gtc/2016/presentation/s6383-minmin-sun-speech-recognition.pdf
|
||||
|
||||
#include <ATen/TensorUtils.h>
|
||||
|
|
|
|||
|
|
@ -354,7 +354,7 @@ Tensor _fft_cufft(const Tensor& self, int64_t signal_ndim,
|
|||
}
|
||||
|
||||
// Now that we have done error check and data_ptr checks, we delegate all
|
||||
// futher cuFFT parameter computation and plan creation to the helper class
|
||||
// further cuFFT parameter computation and plan creation to the helper class
|
||||
// CuFFTConfig in CuFFTUtils.h.
|
||||
|
||||
// If plan caching is enabled, we check the cache. Note that this accesses
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ namespace {
|
|||
// iterations. This would give the accurate result, but is relatively slow and
|
||||
// is an overkill for most cases where double's precision suffice.
|
||||
//
|
||||
// If we directly use sqrt to calculate the root, the convertion from int64_t
|
||||
// If we directly use sqrt to calculate the root, the conversion from int64_t
|
||||
// to double would lose 11 bits precision.
|
||||
//
|
||||
// The following solution uses sqrt directly for most cases, and would only
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ static inline void barf(const char *fmt, ...) {
|
|||
}
|
||||
|
||||
//note: msg must be a string literal
|
||||
//node: In, ##__VA_ARGS '##' supresses the comma if __VA_ARGS__ is empty
|
||||
//node: In, ##__VA_ARGS '##' suppresses the comma if __VA_ARGS__ is empty
|
||||
#define ASSERTM(cond, msg, ...) \
|
||||
if (AT_EXPECT(!(cond), 0)) { \
|
||||
barf("%s:%u: %s: Assertion `%s` failed: " msg , __FILE__, __LINE__, __func__, #cond,##__VA_ARGS__); \
|
||||
|
|
|
|||
|
|
@ -254,7 +254,7 @@
|
|||
* It can hinted by a classical problem: Getting each individual digit from a whole integer(Decimal base).
|
||||
* A N-digit decimal base number could be view as a N-dimension tensor and the sizes of the tensor are 10.
|
||||
* So the value the whole integer is the line index. And the digits could be viewed as the indexes in
|
||||
* different dimentions.
|
||||
* different dimensions.
|
||||
*
|
||||
* 2. convert the indexs(coordinates) in the tensor to the memory offset.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ or run the test independently:
|
|||
|
||||
`python -m fastrnns.bench`
|
||||
|
||||
should give a good comparision, or you can specify the type of model to run
|
||||
should give a good comparison, or you can specify the type of model to run
|
||||
|
||||
`python -m fastrnns.bench --rnns cudnn aten jit --group rnns`
|
||||
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ void fillInputBlob(
|
|||
if (blob == nullptr) {
|
||||
blob = workspace->CreateBlob(tensor_kv.first);
|
||||
}
|
||||
// todo: support gpu and make this function a tempalte
|
||||
// todo: support gpu and make this function a template
|
||||
int protos_size = tensor_kv.second.protos_size();
|
||||
if (protos_size == 1 && iteration > 0) {
|
||||
// Do not override the input data if there is only one input data,
|
||||
|
|
|
|||
|
|
@ -501,7 +501,7 @@ void fillInputBlob(
|
|||
if (blob == nullptr) {
|
||||
blob = workspace->CreateBlob(tensor_kv.first);
|
||||
}
|
||||
// todo: support gpu and make this function a tempalte
|
||||
// todo: support gpu and make this function a template
|
||||
int protos_size = tensor_kv.second.protos_size();
|
||||
if (protos_size == 1 && iteration > 0) {
|
||||
// Do not override the input data if there is only one input data,
|
||||
|
|
|
|||
|
|
@ -50,13 +50,13 @@ enum class TensorTypeId : uint8_t {
|
|||
VariableTensorId,
|
||||
|
||||
// TESTING: This is intended to be a generic testing tensor type id.
|
||||
// Don't use it for anything real; its only acceptible use is within a single
|
||||
// Don't use it for anything real; its only acceptable use is within a single
|
||||
// process test. Use it by creating a TensorImpl with this TensorTypeId, and
|
||||
// then registering operators to operate on this type id.
|
||||
TESTING_ONLY_GenericWrapperTensorId,
|
||||
|
||||
// TESTING: This is intended to be a generic testing tensor type id.
|
||||
// Don't use it for anything real; its only acceptible use is within a ingle
|
||||
// Don't use it for anything real; its only acceptable use is within a ingle
|
||||
// process test. Use it by toggling the mode on and off via
|
||||
// TESTING_ONLY_tls_generic_mode_set_enabled and then registering operators
|
||||
// to operate on this type id.
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ C10_API std::ostream& operator<<(std::ostream&, TensorTypeSet);
|
|||
|
||||
// Historically, every tensor only had a single TensorTypeId, and it was
|
||||
// always something like CPUTensorId and not something weird like VariableId.
|
||||
// For the forseeable future, it will still be possible to extract /that/
|
||||
// For the foreseeable future, it will still be possible to extract /that/
|
||||
// TensorTypeId, and that's what this function does. It should be used
|
||||
// for legacy code that is still using TensorTypeId for things like instanceof
|
||||
// checks; if at all possible, refactor the code to stop using TensorTypeId
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ namespace {
|
|||
|
||||
TEST(BFloat16Math, Substraction) {
|
||||
// This test verifies that if only first 7 bits of float's mantisa are
|
||||
// changed after substraction, we should have no loss in precision.
|
||||
// changed after subtraction, we should have no loss in precision.
|
||||
|
||||
// input bits
|
||||
// S | Exponent | Mantissa
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@
|
|||
// nvcc has a bug where it doesn't understand __attribute__((deprecated))
|
||||
// declarations even when the host compiler supports it. We'll only use this gcc
|
||||
// attribute when not cuda, and when using a GCC compiler that doesn't support
|
||||
// the c++14 syntax we checked for above (availble in __GNUC__ >= 5)
|
||||
// the c++14 syntax we checked for above (available in __GNUC__ >= 5)
|
||||
#if !defined(__CUDACC__)
|
||||
# define C10_DEFINE_DEPRECATED_USING(TypeName, TypeThingy) using TypeName __attribute__((deprecated)) = TypeThingy;
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -259,7 +259,7 @@ namespace detail {
|
|||
* A normalized single-precision floating-point number is represented as:
|
||||
* FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127)
|
||||
* Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision
|
||||
* number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount.
|
||||
* number causes a change of the constructud single-precision number by 2**(-24), i.e. the same amount.
|
||||
*
|
||||
* The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number
|
||||
* is zero, the constructed single-precision number has the value of
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ struct TempFile {
|
|||
};
|
||||
|
||||
/// Attempts to return a temporary file or returns `nullopt` if an error
|
||||
/// ocurred.
|
||||
/// occurred.
|
||||
///
|
||||
/// The file returned follows the pattern
|
||||
/// `<tmp-dir>/<name-prefix><random-pattern>`, where `<tmp-dir>` is the value of
|
||||
|
|
|
|||
|
|
@ -178,7 +178,7 @@ bool TensorRTOp::RunOnDevice() {
|
|||
}
|
||||
|
||||
// We need to do the binding at RunOnDevice time because we only know the
|
||||
// exact shapes of the tensors now. In addtion, since TensorRT engine has
|
||||
// exact shapes of the tensors now. In addition, since TensorRT engine has
|
||||
// max_batch_size, we need to call that multiple times if input batch size
|
||||
// exceeeds this limit.
|
||||
CAFFE_ENFORCE_EQ(is_input_.size(), nv_dims_.size());
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ class CAFFE2_API TensorDeserializer : public BlobDeserializerBase {
|
|||
* we have to call Deserialize multiple times to get the complete deserialized
|
||||
* Tensor, each call will fill part of the Tensor given the segment begin and
|
||||
* end information in proto, therefore we have to pass in the Tensor pointer
|
||||
* rather than create a new Tensor everytime.
|
||||
* rather than create a new Tensor every time.
|
||||
*
|
||||
* Precondition: Tensor must be initialized
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -294,7 +294,7 @@ static void Caffe2SetCUDAMemoryPool() {
|
|||
* GPU present during runtime, at global initialization time we will set
|
||||
* the CPU memory allocator to allocate pinned memory.
|
||||
*
|
||||
* NB: This behavior is probably too agressive. We should consider asking users
|
||||
* NB: This behavior is probably too aggressive. We should consider asking users
|
||||
* to do on-demand memory pinning (like exposed in PyTorch APIs) instead.
|
||||
*/
|
||||
struct CAFFE2_CUDA_API PinnedCPUAllocator final : public at::Allocator {
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ class CAFFE2_CUDA_API ThreadLocalCUDAObjects {
|
|||
cublasHandle_t GetHandle(c10::cuda::CUDAStream cuda_stream) {
|
||||
CUDAGuard guard(cuda_stream.device_index());
|
||||
// Default construct in the map if it doesn't exist, and return a mutable
|
||||
// refernce to it.
|
||||
// reference to it.
|
||||
auto& r = cublas_handles_[cuda_stream];
|
||||
if (r == nullptr) {
|
||||
CUBLAS_ENFORCE(cublasCreate(&r));
|
||||
|
|
|
|||
|
|
@ -304,7 +304,7 @@ ExecutionChains computeGroups(std::vector<OperatorNode>& orig_nodes) {
|
|||
}
|
||||
}
|
||||
|
||||
// We check sync ops on the froniter first and then async ops. This gives us a
|
||||
// We check sync ops on the frontier first and then async ops. This gives us a
|
||||
// head start to execute sync ops locally while waiting for async ops to
|
||||
// finish.
|
||||
std::queue<int> q;
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class MatchGraph : public Graph<MatchPredicate<GraphType>> {
|
|||
// is rooted at the given rootNode.
|
||||
// The flag invertGraphTraversal specify if we should follow out edges or
|
||||
// in edges. The default is true which is useful for a functional
|
||||
// intepretation of a dataflow graph.
|
||||
// interpretation of a dataflow graph.
|
||||
SubgraphMatchResultType isSubgraphMatch(
|
||||
typename GraphType::NodeRef root,
|
||||
const typename MatchGraph::NodeRef& rootCriteriaRef,
|
||||
|
|
|
|||
|
|
@ -992,7 +992,7 @@ class Operator : public OperatorBase {
|
|||
|
||||
// Clear floating point exception flags before RunOnDevice. We will test
|
||||
// exception flags afterwards, and raise an error if an exception has
|
||||
// happend.
|
||||
// happened.
|
||||
if (FLAGS_caffe2_operator_throw_if_fp_exceptions ||
|
||||
FLAGS_caffe2_operator_throw_if_fp_overflow_exceptions) {
|
||||
std::feclearexcept(FE_ALL_EXCEPT);
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ struct CompiledExecutionStep;
|
|||
* ExecuteStepRecursive will call call compiled() once before the given
|
||||
* execution step is run and keep it alive for the length of its execution.
|
||||
* This means that, for steps with create_workspace=true, a child workspace
|
||||
* will be created everytime the step is executed, and destroyed right
|
||||
* will be created every time the step is executed, and destroyed right
|
||||
* afterwards.
|
||||
*/
|
||||
struct ExecutionStepWrapper {
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class IOS_CAFFE_EXPORT Caffe2IOSPredictor final {
|
|||
public:
|
||||
/**
|
||||
@allowMetalOperators Allow converting eligible operators to Metal GPU framework accelerated
|
||||
operators. Setting this flag to true doesn't gaurantee predictor will be using Metal operators;
|
||||
operators. Setting this flag to true doesn't guarantee predictor will be using Metal operators;
|
||||
Client code must check usingMetalOperators flag to determine predictor is using them.
|
||||
*/
|
||||
static Caffe2IOSPredictor* NewCaffe2IOSPredictor(const caffe2::NetDef& init_net,
|
||||
|
|
|
|||
|
|
@ -150,7 +150,7 @@ class MPSImageWrapper {
|
|||
* it is still in use. If the parent wrapper contains a static image, we
|
||||
* should create a new command buffer because we use static image so it can
|
||||
* survive synchronization(commit of the command buffer), which means if we
|
||||
* pass on the command buffer the command buffer will be commited in
|
||||
* pass on the command buffer the command buffer will be committed in
|
||||
* multiple places in the graph. Also since we don't pass on parent's
|
||||
* command buffer,we need to synchronize(commit) it since it won't be used
|
||||
* in the future.
|
||||
|
|
|
|||
|
|
@ -235,7 +235,7 @@ bool ConcatOp<Context>::RunOnDevice() {
|
|||
auto* output = Output(0);
|
||||
|
||||
// We can override default options(Context::GetDeviceType())
|
||||
// by explictly passing in device type we want
|
||||
// by explicitly passing in device type we want
|
||||
Tensor* split = Output(
|
||||
1, std::vector<int64_t>(1, InputSize()), at::dtype<int>().device(CPU));
|
||||
int* axis_data = split->template mutable_data<int>();
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ are sorted by the corresponding KEY.
|
|||
"RANGES",
|
||||
"Tensor of int32/int64 ranges, of dims (N, M, 2). "
|
||||
"Where N is number of examples and M is a size of each example. "
|
||||
"Last dimention represents a range in the format (start, lengths)")
|
||||
"Last dimension represents a range in the format (start, lengths)")
|
||||
.Input(2, "KEY", "Tensor of rank 1 and type int64.")
|
||||
.Output(0, "OUTPUT", "1-D tensor of size sum of range lengths")
|
||||
.Arg("lengths", "Expected lengths for ranges")
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ void ProcessBlob(
|
|||
blob_states[key]
|
||||
.seen_chunks_ids.insert(proto.content_chunk_id())
|
||||
.second,
|
||||
"Chunk with the same id has occured twice for: ",
|
||||
"Chunk with the same id has occurred twice for: ",
|
||||
key);
|
||||
CAFFE_ENFORCE(
|
||||
proto.content_chunk_id() >= 0 &&
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ void ROIAlignForward(
|
|||
CAFFE_ENFORCE(
|
||||
roi_width >= 0 && roi_height >= 0,
|
||||
"ROIs in ROIAlign do not have non-negative size!");
|
||||
} else { // backward compatiblity
|
||||
} else { // backward compatibility
|
||||
// Force malformed ROIs to be 1x1
|
||||
roi_width = std::max(roi_width, (float)1.);
|
||||
roi_height = std::max(roi_height, (float)1.);
|
||||
|
|
|
|||
|
|
@ -314,9 +314,9 @@ struct MaxRangeReducerDef {
|
|||
"elements in the input slices. Operation doesn't change the shape of "
|
||||
"individual blocks. This implementation imitates torch nn.Max operator. "
|
||||
"If the maximum value occurs more than once, the operator will return "
|
||||
"the first occurence of value. When computing the gradient using the "
|
||||
"the first occurrence of value. When computing the gradient using the "
|
||||
"backward propagation, the gradient input corresponding to the first "
|
||||
"occurence of the maximum value will be used.";
|
||||
"occurrence of the maximum value will be used.";
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ void ROIAlignBackwardFeature(
|
|||
CAFFE_ENFORCE(
|
||||
roi_width >= 0 && roi_height >= 0,
|
||||
"ROIs in ROIAlign do not have non-negative size!");
|
||||
} else { // backward compatiblity
|
||||
} else { // backward compatibility
|
||||
// Force malformed ROIs to be 1x1
|
||||
roi_width = std::max(roi_width, (T)1.);
|
||||
roi_height = std::max(roi_height, (T)1.);
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ __global__ void RoIAlignBackwardFeature(
|
|||
|
||||
T roi_width = roi_end_w - roi_start_w;
|
||||
T roi_height = roi_end_h - roi_start_h;
|
||||
if (!continuous_coordinate) { // backward compatiblity
|
||||
if (!continuous_coordinate) { // backward compatibility
|
||||
// Force malformed ROIs to be 1x1
|
||||
roi_width = c10::cuda::compat::max(roi_width, (T)1.);
|
||||
roi_height = c10::cuda::compat::max(roi_height, (T)1.);
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ void ROIAlignForward(
|
|||
CAFFE_ENFORCE(
|
||||
roi_width >= 0 && roi_height >= 0,
|
||||
"ROIs in ROIAlign do not have non-negative size!");
|
||||
} else { // backward compatiblity
|
||||
} else { // backward compatibility
|
||||
// Force malformed ROIs to be 1x1
|
||||
roi_width = std::max(roi_width, (T)1.);
|
||||
roi_height = std::max(roi_height, (T)1.);
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ __global__ void RoIAlignForward(
|
|||
|
||||
T roi_width = roi_end_w - roi_start_w;
|
||||
T roi_height = roi_end_h - roi_start_h;
|
||||
if (!continuous_coordinate) { // backward compatiblity
|
||||
if (!continuous_coordinate) { // backward compatibility
|
||||
// Force malformed ROIs to be 1x1
|
||||
roi_width = c10::cuda::compat::max(roi_width, (T)1.);
|
||||
roi_height = c10::cuda::compat::max(roi_height, (T)1.);
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ __global__ void RoIAlignRotatedBackward(
|
|||
T roi_height = offset_bottom_rois[4] * spatial_scale;
|
||||
T theta = offset_bottom_rois[5] * M_PI / 180.0;
|
||||
|
||||
if (!continuous_coordinate) { // backward compatiblity
|
||||
if (!continuous_coordinate) { // backward compatibility
|
||||
// Force malformed ROIs to be 1x1
|
||||
roi_width = c10::cuda::compat::max(roi_width, (T)1.);
|
||||
roi_height = c10::cuda::compat::max(roi_height, (T)1.);
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ void ROIAlignRotatedForward(
|
|||
CAFFE_ENFORCE(
|
||||
roi_width >= 0 && roi_height >= 0,
|
||||
"ROIs in ROIAlign do not have non-negative size!");
|
||||
} else { // backward compatiblity
|
||||
} else { // backward compatibility
|
||||
// Force malformed ROIs to be 1x1
|
||||
roi_width = std::max(roi_width, (T)1.);
|
||||
roi_height = std::max(roi_height, (T)1.);
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ __global__ void RoIAlignRotatedForward(
|
|||
T roi_height = offset_bottom_rois[4] * spatial_scale;
|
||||
T theta = offset_bottom_rois[5] * M_PI / 180.0;
|
||||
|
||||
if (!continuous_coordinate) { // backward compatiblity
|
||||
if (!continuous_coordinate) { // backward compatibility
|
||||
// Force malformed ROIs to be 1x1
|
||||
roi_width = c10::cuda::compat::max(roi_width, (T)1.);
|
||||
roi_height = c10::cuda::compat::max(roi_height, (T)1.);
|
||||
|
|
|
|||
|
|
@ -1981,7 +1981,7 @@ This op is basically Gather and Lengths{op} fused together.
|
|||
INDICES should contain integers in range 0..N-1 where N is the first dimension
|
||||
of DATA. INDICES represent which slices of DATA need to be pulled in.
|
||||
|
||||
LENGTHS is a vector that defines slice sizes by first dimention of DATA. Values
|
||||
LENGTHS is a vector that defines slice sizes by first dimension of DATA. Values
|
||||
belonging to the same segment are aggregated together. sum(LENGTHS) has
|
||||
to match INDICES size.
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ OPERATOR_SCHEMA(AveragePut)
|
|||
"(*boolean*): whether or not to clamp inputs to the max inputs allowed")
|
||||
.Arg(
|
||||
"default_value",
|
||||
"(*float*): Optionally provide a default value for recieving empty tensors")
|
||||
"(*float*): Optionally provide a default value for receiving empty tensors")
|
||||
.SetDoc(R"DOC(
|
||||
Consume a value and pushes it to the global stat registry as an average.
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ OPERATOR_SCHEMA(IncrementPut)
|
|||
"(*boolean*): whether or not to clamp inputs to the max inputs allowed")
|
||||
.Arg(
|
||||
"default_value",
|
||||
"(*float*): Optionally provide a default value for recieving empty tensors")
|
||||
"(*float*): Optionally provide a default value for receiving empty tensors")
|
||||
.SetDoc(R"DOC(
|
||||
Consume a value and pushes it to the global stat registry as an sum.
|
||||
|
||||
|
|
@ -94,7 +94,7 @@ OPERATOR_SCHEMA(StdDevPut)
|
|||
"(*boolean*): whether or not to clamp inputs to the max inputs allowed")
|
||||
.Arg(
|
||||
"default_value",
|
||||
"(*float*): Optionally provide a default value for recieving empty tensors")
|
||||
"(*float*): Optionally provide a default value for receiving empty tensors")
|
||||
.SetDoc(R"DOC(
|
||||
Consume a value and pushes it to the global stat registry as an standard deviation.
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ struct TemplatePutOp : public Operator<CPUContext> {
|
|||
input = *Input(0).template data<V>();
|
||||
} else if (!has_default_) {
|
||||
CAFFE_THROW(
|
||||
"Default value must be provided when recieving empty tensors for ",
|
||||
"Default value must be provided when receiving empty tensors for ",
|
||||
given_name_);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ inline void convert(
|
|||
static_cast<std::string*>(dst)->assign(src_start, src_end);
|
||||
} break;
|
||||
case TensorProto_DataType_FLOAT: {
|
||||
// TODO(azzolini): avoid copy, use faster convertion
|
||||
// TODO(azzolini): avoid copy, use faster conversion
|
||||
std::string str_copy(src_start, src_end);
|
||||
const char* src_copy = str_copy.c_str();
|
||||
char* src_copy_end;
|
||||
|
|
|
|||
|
|
@ -1180,7 +1180,7 @@ class GatherRangesOp : public Operator<Context> {
|
|||
CAFFE_ENFORCE(ranges.dim() == 3, "Ranges must be 3-D");
|
||||
CAFFE_ENFORCE(ranges.size(1) > 0, "There has to be at least one range");
|
||||
CAFFE_ENFORCE_EQ(
|
||||
ranges.size(2), 2, "Ranges last dimention should be of size 2");
|
||||
ranges.size(2), 2, "Ranges last dimension should be of size 2");
|
||||
|
||||
auto* rawData = static_cast<const char*>(data.raw_data());
|
||||
auto* rangesData = ranges.template data<Index>();
|
||||
|
|
|
|||
|
|
@ -341,7 +341,7 @@ repr::NNModule convertToNNModule(
|
|||
}
|
||||
|
||||
/// \brief For the construction of the control flow graph we keep track
|
||||
/// of a current basic block, which we split up as we come accross control
|
||||
/// of a current basic block, which we split up as we come across control
|
||||
/// flow operations such as if and while.
|
||||
auto bbNode = cfg.createNamedFunction("main");
|
||||
|
||||
|
|
|
|||
|
|
@ -784,7 +784,7 @@ void OnnxifiTransformer::getBackendId() {
|
|||
return;
|
||||
}
|
||||
// Try to find a backend that support Caffe2 proto. Note that this is quite
|
||||
// opportunistic as we don't offcially support Caffe2 proto.
|
||||
// opportunistic as we don't officially support Caffe2 proto.
|
||||
char buf[kBufferSize];
|
||||
for (int i = 0; i < backend_ids_.size(); ++i) {
|
||||
size_t len = kBufferSize;
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ void fill_with_type(
|
|||
|
||||
/*
|
||||
* @run_net: the predict net with parameter and input names
|
||||
* @input_dims: the input dimentions of all operator inputs of run_net
|
||||
* @input_dims: the input dimensions of all operator inputs of run_net
|
||||
* @input_types: the input types of all operator inputs of run_net
|
||||
*/
|
||||
class DataRandomFiller : public Filler {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ package caffe2;
|
|||
// (2) We do not use extension because that used to create quite some conflicts
|
||||
// in Caffe's protobuf design.
|
||||
// (3) We have not used any proto3 specific features, such as Any or Map. This
|
||||
// is mainly for backward compability purposes but we may consider using
|
||||
// is mainly for backward compatibility purposes but we may consider using
|
||||
// those in the future.
|
||||
|
||||
// ExternalDataProto stores the pointer to the content of TensorProto
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ class CRFWithLoss(object):
|
|||
to
|
||||
|
||||
input_blob: the input sequence in a format T x N x D
|
||||
where T is sequence size, N - batch size and D - input dimention
|
||||
where T is sequence size, N - batch size and D - input dimension
|
||||
##Only supports batch-size 1##
|
||||
|
||||
seq_lengths: blob containing sequence lengths (unused)
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ typedef struct {
|
|||
|
||||
/*!
|
||||
* \brief C Tensor object, manage memory of DLTensor. This data structure is
|
||||
* intended to faciliate the borrowing of DLTensor by another framework. It is
|
||||
* intended to facilitate the borrowing of DLTensor by another framework. It is
|
||||
* not meant to transfer the tensor. When the borrowing framework doesn't need
|
||||
* the tensor, it should call the deleter to notify the host that the resource
|
||||
* is no longer needed.
|
||||
|
|
|
|||
|
|
@ -1838,7 +1838,7 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
# error increases dramtically when input is close to 0 or 1
|
||||
# and it will fail the test.
|
||||
# So we only run gradient test in the range of (0.01, 0.99)
|
||||
# very occationally, test may fail due to random accumulated error
|
||||
# very occasionally, test may fail due to random accumulated error
|
||||
# reduce test range to (0.02, 0.98) will improve test stability
|
||||
op = core.CreateOperator('Logit', ["X"], ["Y"], eps=eps)
|
||||
self.assertDeviceChecks(dc, op, [a], [0])
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class LayerModelHelper(model_helper.ModelHelper):
|
|||
self._breakdown_map = None
|
||||
|
||||
# Connect Schema to self.net. That particular instance of schmea will be
|
||||
# use for generation of the Layers accross the network and would be used
|
||||
# use for generation of the Layers across the network and would be used
|
||||
# for connection with Readers.
|
||||
self._input_feature_schema = schema.NewRecord(
|
||||
self.net,
|
||||
|
|
|
|||
|
|
@ -212,7 +212,7 @@ class BatchLRLoss(ModelLayer):
|
|||
)
|
||||
|
||||
prediction = self.input_record.prediction()
|
||||
# focal loss = (y(1-p) + p(1-y))^gamma * orginal LR loss
|
||||
# focal loss = (y(1-p) + p(1-y))^gamma * original LR loss
|
||||
# y(1-p) + p(1-y) = y + p - 2 * yp
|
||||
y_plus_p = net.Add(
|
||||
[prediction, label],
|
||||
|
|
|
|||
|
|
@ -150,7 +150,7 @@ class FeatureSparseToDense(ModelLayer):
|
|||
self.output_schema = schema.Struct(*outputs)
|
||||
|
||||
# TODO(amalevich): Consider moving this data to schema, instead
|
||||
# Structs doens't support attaching metadata to them and clonning
|
||||
# Structs doesn't support attaching metadata to them and clonning
|
||||
# will break things badly, but this is the most elegant way to pass
|
||||
# this info around. Should we change it or it'll be too much work and
|
||||
# not worse it?
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ def Caffe2LSTM(args):
|
|||
"Max usage differs from current total usage: {} > {}".
|
||||
format(stats['max_total'], stats['total'])
|
||||
)
|
||||
log.warning("This means that costly deallocations occured.")
|
||||
log.warning("This means that costly deallocations occurred.")
|
||||
|
||||
return time.time() - start_time
|
||||
|
||||
|
|
|
|||
|
|
@ -604,7 +604,7 @@ def compute_assignments_dp(ranges_sorted, init_assignment, counter=None):
|
|||
Return -1 if not found.
|
||||
'''
|
||||
def is_compatible_all(candidate_range, assignments):
|
||||
''' return true if compatiable for all assignments in assignments '''
|
||||
''' return true if compatible for all assignments in assignments '''
|
||||
return all([is_compatible(candidate_range[1], x, []) for x in assignments])
|
||||
|
||||
ii = cur_idx - 1
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ class ModelHelper(object):
|
|||
be created in the CurrentNameScope with the respect of all parameter
|
||||
sharing logic, i.e. 'resolved_name_scope/param_name'.
|
||||
|
||||
Parameter sharing logic is going to override CurrentNameScope accoring
|
||||
Parameter sharing logic is going to override CurrentNameScope according
|
||||
to the rules that are specified through ParameterSharing contexts,
|
||||
all ParameterSharing contexts are applied recursively until there are no
|
||||
extra overrides present, where on each step the best match will be
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ class GradientClipping(NetModifier):
|
|||
|
||||
for param, grad in final_param_map.items():
|
||||
# currently sparse gradients won't be clipped
|
||||
# futher implementation is needed to enable it
|
||||
# further implementation is needed to enable it
|
||||
if isinstance(grad, core.GradientSlice):
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ class ResNetBuilder():
|
|||
return self.prev_blob
|
||||
|
||||
'''
|
||||
Add a "bottleneck" component as decribed in He et. al. Figure 3 (right)
|
||||
Add a "bottleneck" component as described in He et. al. Figure 3 (right)
|
||||
'''
|
||||
|
||||
def add_bottleneck(
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class ModifierContext(object):
|
|||
class UseModifierBase(object):
|
||||
'''
|
||||
context class to allow setting the current context.
|
||||
Example useage with layer:
|
||||
Example usage with layer:
|
||||
modifiers = {'modifier1': modifier1, 'modifier2': modifier2}
|
||||
with Modifiers(modifiers):
|
||||
modifier = ModifierContext.current().get_modifier('modifier1')
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ class NormalizerContext(ModifierContext):
|
|||
class UseNormalizer(UseModifierBase):
|
||||
'''
|
||||
context class to allow setting the current context.
|
||||
Example useage with layer:
|
||||
Example usage with layer:
|
||||
normalizers = {'norm1': norm1, 'norm2': norm2}
|
||||
with UseNormalizer(normalizers):
|
||||
norm = NormalizerContext.current().get_normalizer('norm1')
|
||||
|
|
|
|||
|
|
@ -915,7 +915,7 @@ class Caffe2Backend(Backend):
|
|||
|
||||
return init_net, pred_net
|
||||
|
||||
# wrapper for backwards compatability
|
||||
# wrapper for backwards compatibility
|
||||
@classmethod
|
||||
def onnx_graph_to_caffe2_net(cls, model, device="CPU", opset_version=_known_opset_version):
|
||||
return cls._onnx_model_to_caffe2_net(model, device=device, opset_version=opset_version, include_initializers=True)
|
||||
|
|
|
|||
|
|
@ -239,7 +239,7 @@ class TestDatasetOps(TestCase):
|
|||
"""
|
||||
This is what the flattened fields for this schema look like, along
|
||||
with its type. Each one of these fields will be stored, read and
|
||||
writen as a tensor.
|
||||
written as a tensor.
|
||||
"""
|
||||
expected_fields = [
|
||||
('dense', (np.float32, 3)),
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import hypothesis.strategies as st
|
|||
import hypothesis.extra.numpy as hnp
|
||||
|
||||
# Basic implementation of gather for axis == 0, shich is lookup of indices
|
||||
# in the outer dimention. Keeping it for reference here, although is similar
|
||||
# in the outer dimension. Keeping it for reference here, although is similar
|
||||
# to more general funciton below.
|
||||
def ref_gather_axis0():
|
||||
def inner(data, ind):
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ class TestPooling(hu.HypothesisTestCase):
|
|||
# Currently MIOpen Pooling only supports 2d pooling
|
||||
if hiputl.run_in_hip(gc, dc):
|
||||
assume(engine != "CUDNN")
|
||||
# pad and stride ignored because they will be infered in global_pooling
|
||||
# pad and stride ignored because they will be inferred in global_pooling
|
||||
op = core.CreateOperator(
|
||||
op_type,
|
||||
["X"],
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class UseOptimizer(UseModifierBase):
|
|||
with UseOptimizer(optim2):
|
||||
brew.func
|
||||
|
||||
Example useage with layer:
|
||||
Example usage with layer:
|
||||
optimizers = {'optim1': optim1, 'optim2': optim2}
|
||||
with Optimizers(optimizers):
|
||||
optim = OptimizerContext.current().get_optimizer('optim1')
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ class LRModificationTestBase(object):
|
|||
optimizer.set_lr_injection(0)
|
||||
self.assertEqual(optimizer.get_lr_injection(), 0)
|
||||
|
||||
# Test that setting the lr_injector properly propogates to the
|
||||
# Test that setting the lr_injector properly propagates to the
|
||||
# lr_multiplier. Here, we have both lr_injector and norm_ratio that
|
||||
# affect the lr_multiplier
|
||||
workspace.RunNet(model.net.Proto().name)
|
||||
|
|
|
|||
|
|
@ -111,9 +111,9 @@ def pipe(
|
|||
until a stop is signaled either by the reader or the
|
||||
writer.
|
||||
output: either a Writer, a Queue or a DataStream that will be
|
||||
writen to as long as neither reader nor writer signal
|
||||
written to as long as neither reader nor writer signal
|
||||
a stop condition. If output is not provided or is None,
|
||||
a Queue is created with given `capacity` and writen to.
|
||||
a Queue is created with given `capacity` and written to.
|
||||
num_threads: number of concurrent threads used for processing and
|
||||
piping. If set to 0, no Task is created, and a
|
||||
reader is returned instead -- the reader returned will
|
||||
|
|
|
|||
|
|
@ -348,7 +348,7 @@ class LogBarrier(Regularizer):
|
|||
**self.discount_options
|
||||
)
|
||||
# TODO(xlwang): param might still be negative at the initialization time or
|
||||
# slighly negative due to the distributed training. Enforce it's non-negativity
|
||||
# slightly negative due to the distributed training. Enforce it's non-negativity
|
||||
# for now (at least above machine epsilon)
|
||||
param_non_neg = net.NextScopedBlob(param + "_non_neg")
|
||||
net.Clip([param], [param_non_neg], min=self.kEpsilon)
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ class RegularizerContext(ModifierContext):
|
|||
class UseRegularizer(UseModifierBase):
|
||||
'''
|
||||
context class to allow setting the current context.
|
||||
Example useage with layer:
|
||||
Example usage with layer:
|
||||
regularizers = {'reg1': reg1, 'reg2': reg2}
|
||||
with UseRegularizer(regularizers):
|
||||
reg = RegularizerContext.current().get_regularizer('reg1')
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ class Field(object):
|
|||
def field_blobs(self):
|
||||
"""Return the list of blobs with contents for this Field.
|
||||
Values can either be all numpy.ndarray or BlobReference.
|
||||
If any of the fields doens't have a blob, throws.
|
||||
If any of the fields doesn't have a blob, throws.
|
||||
"""
|
||||
raise NotImplementedError('Field is an abstract class.')
|
||||
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ class Session(object):
|
|||
|
||||
|
||||
Global Workspace:
|
||||
At the beggining of the session, a global workspace is created and kept
|
||||
At the beginning of the session, a global workspace is created and kept
|
||||
alive for the duration of the session.
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -174,7 +174,7 @@ class TaskGroup(object):
|
|||
will finish execution when the last task of the group finishes.
|
||||
|
||||
Example:
|
||||
# supose that s1 ... s5 are execution steps or nets.
|
||||
# suppose that s1 ... s5 are execution steps or nets.
|
||||
with TaskGroup() as tg:
|
||||
# these tasks go to default node 'local'
|
||||
Task(step=s1)
|
||||
|
|
|
|||
|
|
@ -384,7 +384,7 @@ def run_conv_or_fc(
|
|||
# do caching so exercises different code paths from the subsequent
|
||||
# runs
|
||||
|
||||
# self.ws.run re-creates operator everytime so this test covers
|
||||
# self.ws.run re-creates operator every time so this test covers
|
||||
# cases when we have multiple nets sharing the same workspace
|
||||
test_case.ws.create_blob("X").feed(X, device_option=gc)
|
||||
test_case.ws.create_blob("W").feed(W, device_option=gc)
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ typedef struct mz_zip_archive mz_zip_archive;
|
|||
// the reader can still read files that were compressed.
|
||||
// 2. It provides a getRecordOffset function which returns the offset into the
|
||||
// raw file where file data lives. If the file was written with PyTorchStreamWriter
|
||||
// it is guarenteed to be 64 byte aligned.
|
||||
// it is guaranteed to be 64 byte aligned.
|
||||
|
||||
// PyTorchReader/Writer handle checking the version number on the archive format
|
||||
// and ensure that all files are written to a archive_name directory so they
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ OPERATOR_SCHEMA(ClipTensorByScaling)
|
|||
.Input(
|
||||
2,
|
||||
"additional_threshold",
|
||||
"An optional additonal threshold to scale the orignal threshold")
|
||||
"An optional additional threshold to scale the orignal threshold")
|
||||
.Arg("threshold", "Threshold to determine whether to scale down the tensor")
|
||||
.Output(
|
||||
0,
|
||||
|
|
|
|||
|
|
@ -437,7 +437,7 @@ void VideoDecoder::decodeLoop(
|
|||
// the decoder is still giving us frames.
|
||||
int ipacket = 0;
|
||||
while ((!eof || gotPicture) &&
|
||||
/* either you must decode all frames or decode upto maxFrames
|
||||
/* either you must decode all frames or decode up to maxFrames
|
||||
* based on status of the mustDecodeAll flag */
|
||||
(mustDecodeAll ||
|
||||
((!mustDecodeAll) && (selectiveDecodedFrames < maxFrames))) &&
|
||||
|
|
|
|||
|
|
@ -283,7 +283,7 @@ Frequently asked questions
|
|||
- **How can I contribute as a reviewer?** There is lots of value if
|
||||
community developer reproduce issues, try out new functionality, or
|
||||
otherwise help us identify or troubleshoot issues. Commenting on
|
||||
tasks or pull requests with your enviroment details is helpful and
|
||||
tasks or pull requests with your environment details is helpful and
|
||||
appreciated.
|
||||
- **CI tests failed, what does it mean?** Maybe you need to merge with
|
||||
master or rebase with latest changes. Pushing your changes should
|
||||
|
|
|
|||
|
|
@ -223,7 +223,7 @@ distributed package and ``group_name`` is deprecated as well.
|
|||
In other words, if the file is not removed/cleaned up and you call
|
||||
:func:`init_process_group` again on that file, failures are expected.
|
||||
The rule of thumb here is that, make sure that the file is non-existent or
|
||||
empty everytime :func:`init_process_group` is called.
|
||||
empty every time :func:`init_process_group` is called.
|
||||
|
||||
::
|
||||
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ A suggested workflow is
|
|||
- ``dir(model)`` to see all avaialble methods of the model.
|
||||
- ``help(model.foo)`` to check what arguments ``model.foo`` takes to run
|
||||
|
||||
To help users explore without refering to documentation back and forth, we strongly
|
||||
To help users explore without referring to documentation back and forth, we strongly
|
||||
recommend repo owners make function help messages clear and succinct. It's also helpful
|
||||
to include a minimal working example.
|
||||
|
||||
|
|
|
|||
|
|
@ -389,7 +389,7 @@ For ``torch.mm(tensor, other)``:
|
|||
('N', 'out')
|
||||
|
||||
Inherently, a matrix multiplication performs a dot product over two dimensions,
|
||||
collapsing them. When two tensors are matrix-multipled, the contracted dimensions
|
||||
collapsing them. When two tensors are matrix-multiplied, the contracted dimensions
|
||||
disappear and do not show up in the output tensor.
|
||||
|
||||
:func:`torch.mv`, :func:`torch.dot` work in a similar way: name inference does not
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ transition between L2 loss and L1 loss is adjustable by a hyper-parameter beta:
|
|||
SmoothL1(x) = 0.5 * x^2 / beta if |x| < beta
|
||||
|x| - 0.5 * beta otherwise.
|
||||
|
||||
SmoothL1 is used in Fast R-CNN and decendants as the loss function for bounding
|
||||
SmoothL1 is used in Fast R-CNN and descendants as the loss function for bounding
|
||||
box regression.
|
||||
|
||||
The loss computed by this op has a flexible form:
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
# This script helps developers set up the ONNX Caffe2 and PyTorch develop environment on devgpu.
|
||||
# It creates an virtualenv instance, and installs all the dependencies in this environment.
|
||||
# The script will creates a folder called onnx-dev folder under the $HOME directory.
|
||||
# onnx, pytorch and caffe2 are installed seperately.
|
||||
# onnx, pytorch and caffe2 are installed separately.
|
||||
# Please source $HOME/onnx-dev/.onnx_env_init to initialize the development before starting developing.
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -754,7 +754,7 @@ class TestCase(expecttest.TestCase):
|
|||
raise TypeError("Was expecting both tensors to be bool type.")
|
||||
else:
|
||||
if a.dtype == torch.bool and b.dtype == torch.bool:
|
||||
# we want to respect precision but as bool doesn't support substraction,
|
||||
# we want to respect precision but as bool doesn't support subtraction,
|
||||
# boolean tensor has to be converted to int
|
||||
a = a.to(torch.int)
|
||||
b = b.to(torch.int)
|
||||
|
|
|
|||
|
|
@ -1983,7 +1983,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
|
|||
// output, hence verify the logic. In this test, the cache size is
|
||||
// configured to be the same as chunk size and batch size. So the
|
||||
// chunk data is written to the cache one by one. Only the current
|
||||
// batch is retrieved, the next chunk is writen. Now in iteration 0,
|
||||
// batch is retrieved, the next chunk is written. Now in iteration 0,
|
||||
// after the first batch is retrieved, when we save the dataset
|
||||
// statues, there are three possible scenarios for the writer thread:
|
||||
// 1. it hasn't started loading the next chunk data yet, so the
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ TEST_F(NNUtilsTest, ClipGradNorm) {
|
|||
auto scaled = compare_scaling(grads);
|
||||
ASSERT_NEAR(0, scaled.std().item().toFloat(), 1e-7);
|
||||
}
|
||||
// Small gradients should be lefted unchanged
|
||||
// Small gradients should be left unchanged
|
||||
grads = {
|
||||
torch::rand({10, 10}).div(10000),
|
||||
torch::ones(10).div(500),
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ at::Tensor t_def(at::Tensor x);
|
|||
|
||||
// given the difference of output vs expected tensor, check whether the
|
||||
// difference is within a relative tolerance range. This is a standard way of
|
||||
// matching tensor values upto certain precision
|
||||
// matching tensor values up to certain precision
|
||||
bool checkRtol(const at::Tensor& diff, const std::vector<at::Tensor> inputs);
|
||||
bool almostEqual(const at::Tensor& a, const at::Tensor& b);
|
||||
|
||||
|
|
|
|||
|
|
@ -299,7 +299,7 @@ class DistAutogradTest(RpcAgentTestFixture):
|
|||
# nested rpc call to next dst. In return route, receive result tensor t3
|
||||
# from next dst and forwarding t3 back to previous calls.
|
||||
# For this context in this rank, it expects graph like this:
|
||||
# send and recv functions for receving and forwarding t1 and t2:
|
||||
# send and recv functions for receiving and forwarding t1 and t2:
|
||||
# rpcSendBackward
|
||||
# / \
|
||||
# t1.recvRpcBackward t2.recvRpcBackward
|
||||
|
|
|
|||
|
|
@ -355,7 +355,7 @@ def find_test_index(test, selected_tests, find_last_index=False):
|
|||
occurrence (first is default)
|
||||
|
||||
Returns:
|
||||
index of the first or last occurance of the given test
|
||||
index of the first or last occurrence of the given test
|
||||
"""
|
||||
idx = 0
|
||||
found_idx = -1
|
||||
|
|
|
|||
|
|
@ -273,7 +273,7 @@ class TestCppApiParity(common.TestCase):
|
|||
example_inputs = self._prepare_tensors_for_module_input_or_target(test_params, example_inputs)
|
||||
|
||||
# We set all inputs to torch.nn module to requires grad, so that the backward test can always be run.
|
||||
# However, we skip embedding layers for now, becuase they only accept LongTensor as inputs,
|
||||
# However, we skip embedding layers for now, because they only accept LongTensor as inputs,
|
||||
# And LongTensor cannot require grad.
|
||||
if test_params.module_name not in ["Embedding", "Embedding_sparse", "EmbeddingBag", "EmbeddingBag_sparse"]:
|
||||
example_inputs = [x.requires_grad_() for x in example_inputs]
|
||||
|
|
|
|||
|
|
@ -1937,7 +1937,7 @@ class TestNN(NNTestCase):
|
|||
amount requested by the user the moment the pruning method
|
||||
is initialized. This test checks that the expected errors are
|
||||
raised whenever the amount is invalid.
|
||||
The orginal function runs basic type checking + value range checks.
|
||||
The original function runs basic type checking + value range checks.
|
||||
It doesn't check the validity of the pruning amount with
|
||||
respect to the size of the tensor to prune. That's left to
|
||||
`_validate_pruning_amount`, tested below.
|
||||
|
|
|
|||
|
|
@ -788,7 +788,7 @@ class GraphModePostTrainingQuantTest(QuantizationTestCase):
|
|||
self.assertEqual(result_eager, result_script)
|
||||
|
||||
def test_observer_with_ignored_function(self):
|
||||
r"""Test observers with ignored fucntion and make sure it works in
|
||||
r"""Test observers with ignored function and make sure it works in
|
||||
graph mode
|
||||
"""
|
||||
# eager mode
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user