Update operator documentation with markdown descriptions and interfaces (#8085)

* Update operator documentation with markdown descriptions and interfaces * Added rest of updated operator documentation to source files * Commiting local changes for rebase * fixed bracket typo in sqrt_op.cc file * Added updated markdown documentation to remaining completed ops
2025-12-06 12:20:52 +01:00 · 2018-06-15 19:02:24 -04:00 · 2018-06-15 19:02:24 -04:00 · b10c94b507
commit b10c94b507
parent d968614502
38 changed files with 3661 additions and 453 deletions
--- a/caffe2/operators/abs_op.cc
+++ b/caffe2/operators/abs_op.cc
@ -39,12 +39,50 @@ OPERATOR_SCHEMA(Abs)
    .IdenticalTypeAndShape()
    .SetDoc(R"DOC(
 Calculates the absolute value of the given input tensor, element-wise.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/abs_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Abs",
+    ["X"],
+    ["Y"]
+)
+
+workspace.FeedBlob("X", np.random.randn(5).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+
+X: [ 0.3005476   1.551666   -1.3591481   0.39191285 -0.21866608]
+Y: [0.3005476  1.551666   1.3591481  0.39191285 0.21866608]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "input", "Input tensor")
+    .Input(0, "X", "*(type: Tensor<float\>)* Input tensor.")
    .Output(
        0,
-        "output",
-        "The absolute value of the input tensor computed element-wise")
+        "Y",
+        "*(type: Tensor`<float>`)* Absolute value of input element-wise.")
    .InheritOnnxSchema("Abs");

 OPERATOR_SCHEMA(AbsGradient).NumInputs(2).NumOutputs(1).IdenticalTypeAndShape();
--- a/caffe2/operators/arg_ops.cc
+++ b/caffe2/operators/arg_ops.cc
@ -97,34 +97,144 @@ OPERATOR_SCHEMA(ArgMax)
    .NumOutputs(1)
    .TensorInferenceFunction(InferTensor)
    .SetDoc(R"DOC(
-Retrive the argmax of the axis dimension. Given an input tensor of shape
-[a_0, a_1, ..., a_{n-1}] and two arguments axis as int and keepdims as bool,
-returns one output:
- Index tensor which contains the indices of the largest element. It has the
-  same dims as X.dims() with the dimension along axis equals 1 when
-  keepdims == true otherwise removed.
+Retrieve the argmax of an axis dimension specified by the `axis`
+argument. Given an input tensor and two arguments (`axis` and
+`keepdims`), returns a tensor containing the indices of the largest
+element along the given axis. If the `keepdims` arg is *True* (default),
+the shape of the output tensor matches the input tensor except the
+`axis` dimension equals 1. Else, the `axis` dimension of the output
+tensor is removed.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/arg_ops.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "ArgMax",
+    ["X"],
+    ["Indices"],
+    axis=2,
+    keepdims=False
+)
+
+workspace.FeedBlob("X", (np.random.randint(10, size=(3,3,3))).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Indices:", workspace.FetchBlob("Indices"))
+
+```
+
+**Result**
+
+```
+X: [[[4. 9. 6.]
+  [6. 6. 1.]
+  [9. 5. 4.]]
+
+ [[6. 7. 4.]
+  [7. 9. 1.]
+  [3. 2. 8.]]
+
+ [[3. 4. 6.]
+  [5. 2. 7.]
+  [1. 5. 7.]]]
+Indices: [[1 0 0]
+ [1 1 2]
+ [2 2 2]]
+
+```
+
+</details>
+
    )DOC")
-    .Input(0, "X", "Tenor of shape [a_0, a_1, ..., a_{n-1}].")
-    .Output(0, "Indices", "Tensor of indices for the largest values.")
-    .Arg("axis", "The axis to get argmax.")
-    .Arg("keepdims", "Whether to keep the axis dim in the output.");
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input tensor.")
+    .Output(0,
+      "Indices",
+      "*(type: Tensor`<float>`)* Tensor of indices for the largest values.")
+    .Arg("axis", "*(type: int; default: -1)* The axis to get argmax.")
+    .Arg("keepdims",
+      "*(type: bool; default: True)* If True (default), the output tensor "
+      "shape will match the input tensor shape except the `axis` dimension "
+      "equals 1. Else, the `axis` dimension of the output tensor is removed.");

 OPERATOR_SCHEMA(ArgMin)
    .NumInputs(1)
    .NumOutputs(1)
    .TensorInferenceFunction(InferTensor)
    .SetDoc(R"DOC(
-Retrive the argmin of the axis dimension. Given an input tensor of shape
-[a_0, a_1, ..., a_{n-1}] and two arguments axis as int and keepdims as bool,
-returns one output:
- Index tensor which contains the indices of the largest element. It has the
-  same dims as X.dims() with the dimension along axis equals 1 when
-  keepdims == true otherwise removed.
+Retrieve the argmin of an axis dimension specified by the `axis`
+argument. Given an input tensor and two arguments (`axis` and
+`keepdims`), returns a tensor containing the indices of the smallest
+element along the given axis. If the `keepdims` arg is *True* (default),
+the shape of the output tensor matches the input tensor except the
+`axis` dimension equals 1. Else, the `axis` dimension of the output
+tensor is removed.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/arg_ops.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "ArgMin",
+    ["X"],
+    ["Indices"],
+    axis=1
+)
+
+workspace.FeedBlob("X", (np.random.randint(10, size=(5,5))).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Indices:", workspace.FetchBlob("Indices"))
+
+```
+
+**Result**
+
+```
+
+X: [[9. 4. 6. 4. 1.]
+  [5. 9. 8. 3. 4.]
+  [6. 1. 0. 2. 9.]
+  [7. 8. 2. 4. 9.]
+  [3. 9. 4. 9. 4.]]
+Indices: [[4]
+  [3]
+  [2]
+  [2]
+  [0]]
+
+```
+
+</details>
+
    )DOC")
-    .Input(0, "X", "Tenor of shape [a_0, a_1, ..., a_{n-1}].")
-    .Output(0, "Indices", "Tensor of indices for the largest values.")
-    .Arg("axis", "The axis to get argmin.")
-    .Arg("keepdims", "Whether to keep the axis dim in the output.");
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input tensor.")
+    .Output(0,
+      "Indices",
+      "*(type: Tensor`<float>`)* Tensor of indices for the smallest values.")
+    .Arg("axis", "*(type: int; default: -1)* The axis to get argmin.")
+    .Arg("keepdims",
+      "*(type: bool; default: True)* If True (default), the output tensor "
+      "shape will match the input tensor shape except the `axis` dimension "
+      "equals 1. Else, the `axis` dimension of the output tensor is removed.");

 NO_GRADIENT(ArgMax);
 NO_GRADIENT(ArgMin);
--- a/caffe2/operators/cast_op.cc
+++ b/caffe2/operators/cast_op.cc
@ -97,25 +97,87 @@ OPERATOR_SCHEMA(Cast)
      return out;
    })
    .SetDoc(R"DOC(
-The operator casts the elements of a given input tensor to a data type
-specified by the 'to' argument and returns an output tensor of the same size in
-the converted type. The 'to' argument must be one of the data types specified
-in the 'DataType' enum field in the TensorProto message. If the 'to' argument
-is not provided or is not one of the enumerated types in DataType, Caffe2
-throws an Enforce error.
+Casts the elements of a given input tensor to a data type specified by the `to`
+argument and returns an output tensor of the same size in the converted type.
+The `to` argument must be one of the data types specified in the *DataType*
+enum field in the TensorProto message (see below). If the `to` argument is not
+provided or is not one of the enumerated types in *DataType*, Caffe2 throws an
+Enforce error.

 NOTE: Casting to and from strings is not supported yet.
+
+TensorProto *DataType* field:
+```
+message TensorProto {
+  ...
+  enum DataType {
+    UNDEFINED = 0;
+    FLOAT = 1;  // float
+    INT32 = 2;  // int
+    BYTE = 3;  // BYTE, when deserialized, is going to be restored as uint8.
+    STRING = 4;  // string
+    BOOL = 5;  // bool
+    UINT8 = 6;  // uint8_t
+    INT8 = 7;  // int8_t
+    UINT16 = 8;  // uint16_t
+    INT16 = 9;  // int16_t
+    INT64 = 10;  // int64_t
+    FLOAT16 = 12;  // caffe2::__f16, caffe2::float16
+    DOUBLE = 13;  // double
+  }
+```
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/cast_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Cast",
+    ["X"],
+    ["Y"],
+    to=2
+)
+
+workspace.FeedBlob("X", (np.random.rand(3,3)).astype(np.float32)*10)
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+```
+
+**Result**
+
+```
+X: [[9.436466   5.8529844  0.54932857]
+ [1.1583444  2.9936118  0.22950427]
+ [3.9143739  3.4040766  8.905341  ]]
+Y: [[9 5 0]
+ [1 2 0]
+ [3 3 8]]
+```
+
+</details>
+
 )DOC")
    .Arg(
        "to",
-        "The data type to which the elements of the input tensor are cast."
-        "Strictly must be one of the types from DataType enum in TensorProto")
-    .Input(0, "input", "Input tensor to be cast.")
+        "*(type: int)* Data type to which the elements of the input tensor are "
+        "cast. Strictly must be one of the types from *DataType* enum in "
+        "TensorProto.")
+    .Input(0, "X", "*(type: Tensor)* Input tensor to be cast.")
    .Output(
        0,
-        "output",
-        "Output tensor with the same shape as input with type "
-        "specified by the 'to' argument")
+        "Y",
+        "*(type: Tensor`<'to' type>`)* Output tensor with the same shape as "
+        "input with type specified by the `to` argument.")
    .InheritOnnxSchema("Cast");

 // Some Casts are compatible with gradients, but for now we don't support it
--- a/caffe2/operators/ceil_op.cc
+++ b/caffe2/operators/ceil_op.cc
@ -11,12 +11,59 @@ OPERATOR_SCHEMA(Ceil)
    .NumOutputs(1)
    .AllowInplace({{0, 0}})
    .SetDoc(R"DOC(
-Ceil takes one input data (Tensor<T>) and produces one output data
-(Tensor<T>) where the ceil function, y = ceil(x), is applied to
-the tensor elementwise. Currently supports only float32.
+Element-wise application of the ceil function ($y=ceil(x)$) to the input tensor
+`X`. Output tensor shape is the same as the input tensor.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/ceil_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Ceil",
+    ["X"],
+    ["X"],
+)
+
+workspace.FeedBlob("X", (np.random.uniform(-10, 10, (5,5))).astype(np.float32))
+print("X before running op:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("X after running op:", workspace.FetchBlob("X"))
+
+```
+
+**Result**
+
+```
+
+X before running op:
+[[ 8.44598    -6.5098248  -2.2993476  -7.6859694   0.58566964]
+ [-7.846551   -0.03689406  6.9362907  -4.0521703   4.4969673 ]
+ [ 0.33355865 -7.895527   -8.393201    9.374202   -2.3930092 ]
+ [-6.3061996   3.1403487   3.782099   -8.516556   -2.8387244 ]
+ [-2.0164998   4.7663913  -3.422966    0.3636999   8.75713   ]]
+X after running op:
+[[ 9. -6. -2. -7.  1.]
+ [-7. -0.  7. -4.  5.]
+ [ 1. -7. -8. 10. -2.]
+ [-6.  4.  4. -8. -2.]
+ [-2.  5. -3.  1.  9.]]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "X", "ND input tensor")
-    .Output(0, "Y", "ND input tensor");
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input tensor.")
+    .Output(0, "Y", "*(type: Tensor`<float>`)* Output tensor.");

 // TODO: Write gradient for this when needed
 GRADIENT_NOT_IMPLEMENTED_YET(Ceil);
--- a/caffe2/operators/clip_op.cc
+++ b/caffe2/operators/clip_op.cc
@ -40,24 +40,72 @@ OPERATOR_SCHEMA(Clip)
    .AllowInplace({{0, 0}})
    .IdenticalTypeAndShape()
    .SetDoc(R"DOC(
-Clip operator limits the given input within an interval. The interval is
-specified with arguments 'min' and 'max'. They default to
-numeric_limits::lowest() and numeric_limits::max() respectively. The clipping
-operation can be done in in-place fashion too, where the input and output blobs
-are the same.
+This operator limits the given input within an interval. The interval is
+specified by the `min` and `max` arguments. They default to
+*numeric_limits::lowest()* and *numeric_limits::max()* respectively. The
+clipping operation can be done in an in-place fashion by using the same output
+blob as the input blob.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/clip_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Clip",
+    ["X"],
+    ["Y"],
+    min=20.0,
+    max=60.0
+
+)
+
+workspace.FeedBlob("X", (np.random.randint(100, size=(5,5))).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+X: [[45. 16. 59. 99. 48.]
+ [12. 44. 46. 82. 28.]
+ [ 1. 91. 18.  9. 71.]
+ [24. 37. 61. 12. 81.]
+ [36. 38. 30. 84. 40.]]
+Y: [[45. 20. 59. 60. 48.]
+ [20. 44. 46. 60. 28.]
+ [20. 60. 20. 20. 60.]
+ [24. 37. 60. 20. 60.]
+ [36. 38. 30. 60. 40.]]
+```
+
+</details>
+
 )DOC")
-    .Arg("min", "Minimum value, under which element is replaced by min")
-    .Arg("max", "Maximum value, above which element is replaced by max")
+    .Arg("min", "*(type: float)* Minimum value, under which element is "
+    "replaced by min (default=*numeric_limits::lowest()*).")
+    .Arg("max", "*(type: float)* Maximum value, under which element is "
+    "replaced by max (default=*numeric_limits::max()*).")
    .Input(
        0,
-        "input",
-        "Input tensor (Tensor<float>) containing elements to be"
-        "clipped")
-    .Input(
-        1,
-        "output",
-        "Output tensor (Tensor<float>) containing clipped"
-        "input elements")
+        "X",
+        "*(Tensor`<float>`)* Input tensor within range "
+        "[*numeric_limits::lowest()*, *numeric_limits::max()*].")
+    .Output(
+        0,
+        "Y",
+        "*(Tensor`<float>`)* Output tensor clipped within range [`min`, `max`].")
    .InheritOnnxSchema("Clip");

 OPERATOR_SCHEMA(ClipGradient).NumInputs(2).NumOutputs(1).AllowInplace({{1, 0}});
--- a/caffe2/operators/concat_split_op.cc
+++ b/caffe2/operators/concat_split_op.cc
@ -156,14 +156,13 @@ REGISTER_CPU_OPERATOR(Concat, ConcatOp<CPUContext>);
 OPERATOR_SCHEMA(Concat)
    .NumInputs(1, INT_MAX)
    .NumOutputs(2)
-    .Arg("axis", "Which axis to concat on")
+    .Arg("axis", "*(type: int; default: -1)* Axis to concatenate on.")
    .Arg(
        "order",
-        "Either NHWC or NCHW, will concat on C axis, defaults to NCHW")
+        "*(type: string; default='NCHW')* Order of blob dimensions. Concats on the C dimension.")
    .Arg(
        "add_axis",
-        "Pass 1 to add the axis specified in arg 'axis' to all "
-        "input tensors")
+        "*(type: int)* Pass non-zero integer to add the axis specified in `axis` to all input tensors.")
    .TensorInferenceFunction(OpSchema::NeedsAllInputShapes(
      [](const OperatorDef& def,
         const vector<TensorShape>& in) {
@ -238,9 +237,128 @@ OPERATOR_SCHEMA(Concat)
    }))
    .CostInferenceFunction(CostInferenceForConcat)
    .DeviceInferenceFunction(concatOpDevInfer)
-    .SetDoc("Concatenate a list of tensors into a single tensor")
-    .Output(0, "concat_result", "Concatenated tensor")
-    .Output(1, "split_info", "The dimensions of the inputs.")
+    .SetDoc(R"DOC(
+Concatenate a list of tensors into a single tensor. Similar functionality to
+Numpy's [concatenate](https://docs.scipy.org/doc/numpy/reference/generated/numpy.concatenate.html)
+function. The `axis` argument specifies what axis along which the arrays will be concatenated.
+When set to non-zero (default=0), the `add_axis` argument adds the axis specified in `axis` to
+all input tensors.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/concat_split_op.cc
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/concat_split_op.h
+
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Concat",
+    ["X1",  "X2"],
+    ["Y", "split_info"],
+    axis=0
+)
+
+workspace.FeedBlob("X1", np.array([[1,2],[3,4]]))
+workspace.FeedBlob("X2", np.array([[5,6]]))
+print("X1:", workspace.FetchBlob("X1"))
+print("X2:", workspace.FetchBlob("X2"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+print("split_info:", workspace.FetchBlob("split_info"))
+
+```
+
+**Result**
+
+```
+
+X1: [[1 2]
+ [3 4]]
+X2: [[5 6]]
+Y: [[1 2]
+ [3 4]
+ [5 6]]
+split_info: [2 1]
+
+```
+
+</details>
+
+<details>
+
+<summary> <b>Example 2</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Concat",
+    ["X1",  "X2"],
+    ["Y", "split_info"],
+    add_axis=1,
+    axis=3
+)
+
+workspace.FeedBlob("X1", np.random.randint(10, size=(1, 1, 5, 5))) # NCHW
+workspace.FeedBlob("X2", np.random.randint(10, size=(1, 1, 5, 5))) # NCHW
+print("X1:", workspace.FetchBlob("X1"))
+print("X2:", workspace.FetchBlob("X2"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+print("split_info:", workspace.FetchBlob("split_info"))
+
+```
+
+**Result**
+
+```
+
+X1: [[[[1 8 3 9 0]
+   [6 4 6 5 6]
+   [3 9 1 9 9]
+   [5 1 0 7 7]
+   [9 4 0 0 9]]]]
+X2: [[[[7 0 2 6 1]
+   [3 9 4 0 3]
+   [5 3 8 9 4]
+   [3 4 2 1 0]
+   [0 8 8 8 1]]]]
+Y: [[[[[1 8 3 9 0]
+    [7 0 2 6 1]]
+
+   [[6 4 6 5 6]
+    [3 9 4 0 3]]
+
+   [[3 9 1 9 9]
+    [5 3 8 9 4]]
+
+   [[5 1 0 7 7]
+    [3 4 2 1 0]]
+
+   [[9 4 0 0 9]
+    [0 8 8 8 1]]]]]
+split_info: [1 1]
+
+```
+
+</details>
+
+    )DOC")
+    .Input(0, "X1, X2, ...", "*(type: Tensor`<float>`)* List of input tensors.")
+    .Output(0, "concat_result", "*(type: Tensor`<float>`)* Concatenated tensor.")
+    .Output(1, "split_info", "*(type: Tensor`<int>`)* The dimensions of the inputs.")
    .InheritOnnxSchema("Concat");

 // Backward compatibility names.
--- a/caffe2/operators/cos_op.cc
+++ b/caffe2/operators/cos_op.cc
@ -38,12 +38,53 @@ OPERATOR_SCHEMA(Cos)
    .IdenticalTypeAndShape()
    .SetDoc(R"DOC(
 Calculates the cosine of the given input tensor, element-wise.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/cos_op.cc
+
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Cos",
+    ["X"],
+    ["Y"]
+)
+
+workspace.FeedBlob("X", np.random.rand(5).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+
+X: [0.6816719  0.76771533 0.933932   0.01404487 0.11862425]
+Y: [0.7765203  0.71949923 0.5946774  0.99990135 0.9929724 ]
+
+```
+
+</details>
+
+
 )DOC")
-    .Input(0, "input", "Input tensor")
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input tensor.")
    .Output(
        0,
-        "output",
-        "The cosine of the input tensor computed element-wise");
+        "Y",
+        "*(type: Tensor`<float>`)* Output tensor calculated as the cosine of the input tensor, element-wise.");

 OPERATOR_SCHEMA(CosGradient).NumInputs(2).NumOutputs(1).IdenticalTypeAndShape();

--- a/caffe2/operators/counter_ops.cc
+++ b/caffe2/operators/counter_ops.cc
@ -1,8 +1,129 @@
 #include "counter_ops.h"
-
 #include "caffe2/core/blob_serialization.h"

 namespace caffe2 {
+
+const char* githubLinks = R"DOC(
+  Github Links:
+  - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/counter_ops.cc
+
+)DOC";
+
+const char* kCountExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+createcounter_op = core.CreateOperator(
+    "CreateCounter",
+    [],
+    ["counter"],
+    init_count=5
+)
+
+retrievecount_op = core.CreateOperator(
+    "RetrieveCount",
+    ["counter"],
+    ["count"]
+)
+
+checkcounterdone_op = core.CreateOperator(
+    "CheckCounterDone",
+    ["counter"],
+    ["done"]
+)
+
+countup_op = core.CreateOperator(
+    "CountUp",
+    ["counter"],
+    ["previous_count"],
+)
+
+countdown_op = core.CreateOperator(
+    "CountDown",
+    ["counter"],
+    ["done"],
+)
+
+resetcounter_op = core.CreateOperator(
+    "ResetCounter",
+    ["counter"],
+    ["previous_count"],
+    init_count=3
+)
+
+
+# Create counter
+workspace.RunOperatorOnce(createcounter_op)
+print("'counter' pointer:", workspace.FetchBlob("counter"))
+
+
+# Retrieve initial counter value
+workspace.RunOperatorOnce(retrievecount_op)
+print("Initial 'count':", workspace.FetchBlob("count"))
+
+
+# Check if counter is done
+workspace.RunOperatorOnce(checkcounterdone_op)
+print("Initial 'done' value:", workspace.FetchBlob("done"))
+
+
+# Test CountUp operator
+print("\nTesting CountUp operator...")
+for i in range(5):
+    workspace.RunOperatorOnce(countup_op)
+    print("'previous_count' after CountUp:", workspace.FetchBlob("previous_count"))
+
+workspace.RunOperatorOnce(retrievecount_op)
+print("'count' value after CountUp test:", workspace.FetchBlob("count"))
+
+
+# Test CountDown operator
+print("\nTesting CountDown operator...")
+for i in range(11):
+    workspace.RunOperatorOnce(countdown_op)
+    workspace.RunOperatorOnce(retrievecount_op)
+    print("'count' value after CountDown: {}\t'done' value: {}".format(workspace.FetchBlob("count"), workspace.FetchBlob("done")))
+```
+
+**Result**
+
+```
+'counter' pointer: counter, a C++ native class of type std::__1::unique_ptr<caffe2::Counter<long long>, std::__1::default_delete<caffe2::Counter<long long> > >.
+Initial 'count': 5
+Initial 'done' value: False
+
+Testing CountUp operator...
+'previous_count' after CountUp: 5
+'previous_count' after CountUp: 6
+'previous_count' after CountUp: 7
+'previous_count' after CountUp: 8
+'previous_count' after CountUp: 9
+'count' value after CountUp test: 10
+
+Testing CountDown operator...
+'count' value after CountDown: 9	'done' value: False
+'count' value after CountDown: 8	'done' value: False
+'count' value after CountDown: 7	'done' value: False
+'count' value after CountDown: 6	'done' value: False
+'count' value after CountDown: 5	'done' value: False
+'count' value after CountDown: 4	'done' value: False
+'count' value after CountDown: 3	'done' value: False
+'count' value after CountDown: 2	'done' value: False
+'count' value after CountDown: 1	'done' value: False
+'count' value after CountDown: 0	'done' value: False
+'count' value after CountDown: -1	'done' value: True
+```
+
+</details>
+
+)DOC";
+
 namespace {
 /**
 *  @brief CounterSerializer is the serializer for Counter type.
@ -74,60 +195,98 @@ OPERATOR_SCHEMA(CreateCounter)
    .NumInputs(0)
    .NumOutputs(1)
    .SetDoc(R"DOC(
-Creates a count-down counter with initial value specified by the 'init_count'
+Creates a count-down counter with initial value specified by the `init_count`
 argument.
-)DOC")
-    .Output(0, "counter", "A blob pointing to an instance of a new counter.")
-    .Arg("init_count", "Initial count for the counter, must be >= 0.");
+
+)DOC" + (string) githubLinks + (string) kCountExample)
+    .Output(
+        0,
+        "counter",
+        "*(type: Tensor`<ptr>`)* A blob pointing to an instance of a new counter.")
+    .Arg(
+        "init_count",
+        "*(type: int; default: 0)* Initial count for the counter, must be >= 0.");

 OPERATOR_SCHEMA(ResetCounter)
    .NumInputs(1)
    .NumOutputs(0, 1)
    .SetDoc(R"DOC(
-Resets a count-down counter with initial value specified by the 'init_count'
+Resets a count-down counter with initial value specified by the `init_count`
 argument.
-)DOC")
-    .Input(0, "counter", "A blob pointing to an instance of a new counter.")
-    .Output(0, "previous_value", "(optional) Previous value of the counter.")
-    .Arg("init_count", "Resets counter to this value, must be >= 0.");
+)DOC" + (string) githubLinks + (string) kCountExample)
+    .Input(
+        0,
+        "counter",
+        "*(type: Tensor`<ptr>`)* A blob pointing to an instance of a counter.")
+    .Output(
+        0,
+        "previous_value",
+        "*(type: int)* [OPTIONAL] count value BEFORE this operation.")
+    .Arg(
+        "init_count",
+        "*(type: int; default: 0)* Resets counter to this value, must be >= 0.");

 OPERATOR_SCHEMA(CountDown)
    .NumInputs(1)
    .NumOutputs(1)
    .SetDoc(R"DOC(
-If the internal count value > 0, decreases count value by 1 and outputs false,
-otherwise outputs true.
-)DOC")
-    .Input(0, "counter", "A blob pointing to an instance of a counter.")
-    .Output(0, "done", "false unless the internal count is zero.");
+If the internal count value > 0, decreases count value by 1 and outputs False,
+otherwise outputs True.
+)DOC" + (string) githubLinks + (string) kCountExample)
+    .Input(
+        0,
+        "counter",
+        "*(type: Tensor`<ptr>`)* A blob pointing to an instance of a counter.")
+    .Output(
+        0,
+        "done",
+        "*(type: bool)* False unless the internal count is zero.");

 OPERATOR_SCHEMA(CheckCounterDone)
    .NumInputs(1)
    .NumOutputs(1)
    .SetDoc(R"DOC(
-If the internal count value <= 0, outputs true, otherwise outputs false,
-)DOC")
-    .Input(0, "counter", "A blob pointing to an instance of a counter.")
-    .Output(0, "done", "true if the internal count is zero or negative.");
+If the internal count value <= 0, outputs true, otherwise outputs false.
+)DOC" + (string) githubLinks + (string) kCountExample)
+    .Input(
+        0,
+        "counter",
+        "*(type: Tensor`<ptr>`)* A blob pointing to an instance of a counter.")
+    .Output(
+        0,
+        "done",
+        "*(type: bool)* True if the internal count is zero or negative, otherwise False.");

 OPERATOR_SCHEMA(CountUp)
    .NumInputs(1)
    .NumOutputs(1)
    .SetDoc(R"DOC(
-Increases count value by 1 and outputs the previous value atomically
-)DOC")
-    .Input(0, "counter", "A blob pointing to an instance of a counter.")
-    .Output(0, "previous_count", "count value BEFORE this operation");
+Increases count value by 1 and outputs the previous value atomically.
+)DOC" + (string) githubLinks + (string) kCountExample)
+    .Input(
+        0,
+        "counter",
+        "*(type: Tensor`<ptr>`)* A blob pointing to an instance of a counter.")
+    .Output(
+        0,
+        "previous_count",
+        "*(type: int)* Count value BEFORE this operation.");

 OPERATOR_SCHEMA(RetrieveCount)
    .NumInputs(1)
    .NumOutputs(1)
    .ScalarType(TensorProto::INT64)
    .SetDoc(R"DOC(
-Retrieve the current value from the counter.
-)DOC")
-    .Input(0, "counter", "A blob pointing to an instance of a counter.")
-    .Output(0, "count", "current count value.");
+Retrieve the current value from the counter as an integer.
+)DOC" + (string) githubLinks + (string) kCountExample)
+    .Input(
+        0,
+        "counter",
+        "*(type: Tensor`<ptr>`)* A blob pointing to an instance of a counter.")
+    .Output(
+        0,
+        "count",
+        "*(type: int)* Current count value.");

 SHOULD_NOT_DO_GRADIENT(CreateCounter);
 SHOULD_NOT_DO_GRADIENT(ResetCounter);
--- a/caffe2/operators/distance_op.cc
+++ b/caffe2/operators/distance_op.cc
@ -474,12 +474,95 @@ OPERATOR_SCHEMA(DotProduct)
    .NumOutputs(1)
    .IdenticalTypeAndShapeOfInputDim(0, 0)
    .SetDoc(R"DOC(
-Given two input float tensors X, Y, and produces one output float tensor
-of the dot product between X and Y.
+Computes and outputs the dot product of the two input float tensors `X` and `Y`.
+Note that `X` and `Y` must be either 1D or 2D, and they must be the same shape.
+The output tensor is 1D, which represents either the product of each element in
+a respective dimension if the inputs are 1D, or the sum of the products in a
+given dimension if the inputs are 2D matrices. Note that the actual dot product
+is a scalar value, which is effectively the sum of the elements in the 1D
+output tensor.
+
+For 1D inputs:
+Given two vectors $X = [x_0, x_1, x_2]$ and $Y = [y_0, y_1, y_2]$; $Z = [x_0 * y_0, x_1 * y_1, x_2 * y_2]$
+
+For 2D inputs:
+Given two matrices:
+$$X = [[x_0^0, x_1^0, x_2^0], \\ [x_0^1, x_1^1, x_2^1], \\ [x_0^2, x_1^2, x_2^2], \\ ..., \\ [x_0^n, x_1^n, x_2^n]]$$
+
+and
+
+$$Y = [[y_0^0, y_1^0, y_2^0], \\ [y_0^1, y_1^1, y_2^1], \\ [y_0^2, y_1^2, y_2^2], \\ ..., \\ [y_0^n, y_1^n, y_2^n]]$$
+
+then
+
+$$Z =  \biggl[\Big((x_0^0 * y_0^0) + (x_1^0 * y_1^0) + (x_2^0 * y_2^0)\Big), \\ \Big((x_0^1 * y_0^1) + (x_1^1 * y_1^1) + (x_2^1 * y_2^1)\Big), \\ \Big((x_0^2 * y_0^2) + (x_1^2 * y_1^2) + (x_2^2 * y_2^2)\Big), \\ ..., \\ \Big((x_0^n * y_0^n) + (x_1^n * y_1^n) + (x_2^n * y_2^n)\Big)\biggr]$$
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/distance_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "DotProduct",
+    ["X",  "Y"],
+    ["Z"]
+)
+
+workspace.FeedBlob("X", np.random.randint(20, size=(5)).astype(np.float32))
+workspace.FeedBlob("Y", np.random.randint(20, size=(5)).astype(np.float32))
+print("X:\n", workspace.FetchBlob("X"))
+print("Y:\n", workspace.FetchBlob("Y"))
+workspace.RunOperatorOnce(op)
+print("Z:\n", workspace.FetchBlob("X"))
+
+
+workspace.ResetWorkspace()
+workspace.FeedBlob("X", np.random.randint(10, size=(3,3)).astype(np.float32))
+workspace.FeedBlob("Y", np.random.randint(10, size=(3,3)).astype(np.float32))
+print("X:\n", workspace.FetchBlob("X"))
+print("Y:\n", workspace.FetchBlob("Y"))
+workspace.RunOperatorOnce(op)
+print("Z:\n", workspace.FetchBlob("Z"))
+
+```
+
+**Result**
+
+```
+
+X:
+ [ 2. 15.  2.  7. 12.]
+Y:
+ [ 3. 12.  9.  3. 18.]
+Z:
+ [ 2. 15.  2.  7. 12.]
+X:
+ [[2. 0. 4.]
+ [7. 7. 4.]
+ [7. 9. 9.]]
+Y:
+ [[2. 0. 8.]
+ [9. 6. 1.]
+ [7. 8. 0.]]
+Z:
+ [ 36. 109. 121.]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "X", "1D or 2D input tensor")
-    .Input(1, "Y", "1D or 2D input tensor (must have the same shape as X)")
-    .Output(0, "Z", "1D output tensor")
+    .Input(0, "X", "*(type: Tensor`<float>`)* 1D or 2D input tensor.")
+    .Input(1, "Y", "*(type: Tensor`<float>`)* 1D or 2D input tensor (must have the same shape as X).")
+    .Output(0, "Z", "*(type: Tensor`<float>`)* 1D output tensor.")
    .CostInferenceFunction(
        OpSchema::CostInferenceFunctionType(CostInferenceForDotProduct));

--- a/caffe2/operators/dropout_op.cc
+++ b/caffe2/operators/dropout_op.cc
@ -77,22 +77,85 @@ OPERATOR_SCHEMA(Dropout)
      return out;
    })
    .SetDoc(R"DOC(
-Dropout takes one input data (Tensor<float>) and produces two Tensor outputs,
-output (Tensor<float>) and mask (Tensor<bool>). Depending on whether it is in
-test mode or not, the output Y will either be a random dropout, or a simple
-copy of the input. Note that our implementation of Dropout does scaling in
-the training phase, so during testing nothing needs to be done.
+
+`Dropout` takes one input data tensor (`X`) and produces two tensor outputs, `Y` and
+`mask`. If the `is_test` argument is zero (default=0), the output `Y` will be the input
+with random elements zeroed. The probability that a given element is zeroed is
+determined by the `ratio` argument.
+
+If the `is_test` argument is set to non-zero, the output `Y` is exactly the same as the
+input `X`. Note that outputs are scaled by a factor of $\frac{1}{1-ratio}$ during
+training, so that during test time, we can simply compute an identity function. This
+scaling is important because we want the output at test time to equal the expected value
+at training time. Dropout has been proven to be an effective regularization technique to
+prevent overfitting during training.
+
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/dropout_op.h
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/dropout_op.cc
+
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Dropout",
+    ["X"],
+    ["Y"] + ["mask"],
+    ratio=0.5,
+    is_test=0
+)
+
+workspace.FeedBlob("X", np.random.randint(10, size=(5, 5)).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+print("mask:", workspace.FetchBlob("mask"))
+```
+
+**Result**
+
+```
+X: [[5. 4. 3. 6. 9.]
+ [2. 1. 8. 0. 9.]
+ [7. 3. 0. 6. 3.]
+ [1. 8. 2. 6. 4.]
+ [6. 2. 6. 4. 0.]]
+Y: [[ 0.  0.  0. 12. 18.]
+ [ 0.  0. 16.  0.  0.]
+ [ 0.  0.  0. 12.  6.]
+ [ 0.  0.  4.  0.  0.]
+ [12.  0.  0.  0.  0.]]
+mask: [[False False False  True  True]
+ [False False  True  True False]
+ [False False  True  True  True]
+ [False False  True False False]
+ [ True False False False False]]
+```
+
+</details>
+
 )DOC")
-    .Arg("ratio", "(float, default 0.5) the ratio of random dropout")
+    .Arg("ratio", "*(type: float; default: 0.5)* Probability of an element to be zeroed.")
    .ArgIsTest(
-        "(int) if nonzero, run dropout in test mode where "
-        "the output is simply Y = X.")
-    .Input(0, "data", "The input data as Tensor.")
-    .Output(0, "output", "The output.")
+        "*(type: int; default: 0)* If zero (train mode), perform dropout. If non-zero"
+        "(test mode), Y = X.")
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input data tensor.")
+    .Output(0, "Y", "*(type: Tensor`<float>`)* Output tensor.")
    .Output(
        1,
        "mask",
-        "The output mask. If is_test is nonzero, this output is not filled.")
+        "*(type: Tensor`<bool>`)* The output mask containing boolean values for"
+        "each element, signifying which elements are dropped out. If `is_test` is" 
+        "nonzero, this output is not filled.")
    .InheritOnnxSchema("Dropout");

 OPERATOR_SCHEMA(DropoutGrad)
--- a/caffe2/operators/elementwise_ops_schema.cc
+++ b/caffe2/operators/elementwise_ops_schema.cc
@ -16,38 +16,230 @@ equal shape is specified by the argument "axis", and if it is not set, suffix
 matching is assumed. 1-dim expansion doesn't work yet.

 For example, the following tensor shapes are supported (with broadcast=1):
-
+```
  shape(A) = (2, 3, 4, 5), shape(B) = (,), i.e. B is a scalar
  shape(A) = (2, 3, 4, 5), shape(B) = (5,)
  shape(A) = (2, 3, 4, 5), shape(B) = (4, 5)
  shape(A) = (2, 3, 4, 5), shape(B) = (3, 4), with axis=1
  shape(A) = (2, 3, 4, 5), shape(B) = (2), with axis=0
-
+```
 Argument `broadcast=1` needs to be passed to enable broadcasting.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elementwise_op_schema.cc
+
 )DOC";

-std::function<void(OpSchema&)> MathDocGenerator(const char* name) {
+const char* kAddExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Add",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([[1,2],[3,4]]))
+workspace.FeedBlob("B", np.array([[5,6],[7,8]]))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A:
+[[1 2]
+ [3 4]]
+B:
+[[5 6]
+ [7 8]]
+C:
+[[ 6  8]
+ [10 12]]
+
+```
+
+</details>
+
+)DOC";
+
+const char* kSubExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Sub",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([[10,12],[4,14]]))
+workspace.FeedBlob("B", np.array([[5,16],[1,19]]))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A:
+[[10 12]
+ [ 4 14]]
+B:
+[[ 5 16]
+ [ 1 19]]
+C:
+[[ 5 -4]
+ [ 3 -5]]
+
+```
+
+</details>
+
+)DOC";
+
+const char* kMulExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Mul",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([[1,2],[3,4]]))
+workspace.FeedBlob("B", np.array([[5,6],[7,8]]))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A:
+[[1 2]
+ [3 4]]
+B:
+[[5 6]
+ [7 8]]
+C:
+[[ 5 12]
+ [21 32]]
+
+```
+
+</details>
+
+)DOC";
+
+const char* kDivExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Div",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([[18,8],[2,9]]))
+workspace.FeedBlob("B", np.array([[9,2],[3,2]]))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A:
+[[18  8]
+ [ 2  9]]
+B:
+[[9 2]
+ [3 2]]
+C:
+[[2 4]
+ [0 4]]
+
+```
+
+</details>
+)DOC";
+
+std::function<void(OpSchema&)> MathDocGenerator(const char* name, const char* extra) {
  return [=](OpSchema& schema) {
    string doc = R"DOC(
 Performs element-wise binary {name} (with limited broadcast support).
-{broadcast_doc})DOC";
+{broadcast_doc}
+
+{extra}
+)DOC";
    ReplaceAll(doc, "{name}", name);
    ReplaceAll(doc, "{broadcast_doc}", kBroadcastDoc);
+    ReplaceAll(doc, "{extra}", extra);
    schema.SetDoc(doc);
-    schema.Arg("broadcast", "Pass 1 to enable broadcasting");
+    schema.Arg("broadcast", "*(type: int; default: 0)* Pass 1 to enable broadcasting");
    schema.Arg(
        "axis",
-        "If set, defines the broadcast dimensions. See doc for details.");
+        "*(type: int; default: -1)* Axis to concatenate on.");
    schema.Input(
        0,
        "A",
-        "First operand, should share the type with the second operand.");
+        "*(type: Tensor`<float>`)* First operand, should share the type with the second operand.");
    schema.Input(
        1,
        "B",
-        "Second operand. With broadcasting can be of smaller size than A. "
-        "If broadcasting is disabled it should be of the same size.");
-    schema.Output(0, "C", "Result, has same dimensions and type as A");
+        "*(type: Tensor`<float>`)* Second operand. With broadcasting can be of smaller size than A. "
+        "If broadcasting is disabled it should be of the same size as A.");
+    schema.Output(0, "C", "*(type: Tensor`<float>`)* Output tensor with same dimensions and type as A.");
  };
 }

@ -81,7 +273,7 @@ OPERATOR_SCHEMA(Add)
    .AllowInplace({{0, 0}, {1, 0}})
    .CostInferenceFunction(PointwiseCostInference<1>)
    .TensorInferenceFunction(ElementwiseOpShapeInference)
-    .FillUsing(MathDocGenerator("addition"))
+    .FillUsing(MathDocGenerator("addition", kAddExample))
    .InheritOnnxSchema("Add");
 OPERATOR_SCHEMA(AddGradient)
    .NumInputs(3)
@ -94,7 +286,7 @@ OPERATOR_SCHEMA(Sub)
    .AllowInplace({{0, 0}, {1, 0}})
    .CostInferenceFunction(PointwiseCostInference<1>)
    .TensorInferenceFunction(ElementwiseOpShapeInference)
-    .FillUsing(MathDocGenerator("subtraction"))
+    .FillUsing(MathDocGenerator("subtraction", kSubExample))
    .InheritOnnxSchema("Sub");
 OPERATOR_SCHEMA(SubGradient)
    .NumInputs(3)
@ -107,7 +299,7 @@ OPERATOR_SCHEMA(Mul)
    .AllowInplace({{0, 0}, {1, 0}})
    .CostInferenceFunction(PointwiseCostInference<1>)
    .TensorInferenceFunction(ElementwiseOpShapeInference)
-    .FillUsing(MathDocGenerator("multiplication"))
+    .FillUsing(MathDocGenerator("multiplication", kMulExample))
    .InheritOnnxSchema("Mul");
 OPERATOR_SCHEMA(MulGradient)
    .NumInputs(3)
@ -120,7 +312,7 @@ OPERATOR_SCHEMA(Div)
    .AllowInplace({{0, 0}})
    .CostInferenceFunction(PointwiseCostInference<1>)
    .TensorInferenceFunction(ElementwiseOpShapeInference)
-    .FillUsing(MathDocGenerator("division"))
+    .FillUsing(MathDocGenerator("division", kDivExample))
    .InheritOnnxSchema("Div");
 OPERATOR_SCHEMA(DivGradient).NumInputs(4).NumOutputs(2).AllowInplace({{0, 0}});

@ -162,35 +354,270 @@ For example, the following tensor shapes are supported:
        "If broadcasting is disabled it should be of the same size.")
    .Output(0, "C", "Result, has same dimensions and type as B");

+const char* kLTExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "LT",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([1, 5, 2, 9, 12, 3]))
+workspace.FeedBlob("B", np.array([1, 3, 4, 9, 12, 8]))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A: [ 1  5  2  9 12  3]
+B: [ 1  3  4  9 12  8]
+C: [False False  True False False  True]
+
+```
+
+</details>
+)DOC";
+
+const char* kLEExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "LE",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([1, 5, 2, 9, 12, 3]))
+workspace.FeedBlob("B", np.array([1, 3, 4, 9, 12, 8]))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A: [ 1  5  2  9 12  3]
+B: [ 1  3  4  9 12  8]
+C: [ True False  True  True  True  True]
+
+```
+
+</details>
+)DOC";
+
+const char* kGTExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "GT",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([1, 5, 2, 9, 12, 3]))
+workspace.FeedBlob("B", np.array([1, 3, 4, 9, 12, 8]))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A: [ 1  5  2  9 12  3]
+B: [ 1  3  4  9 12  8]
+C: [False  True False False False False]
+
+```
+
+</details>
+)DOC";
+
+const char* kGEExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "GE",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([1, 5, 2, 9, 12, 3]))
+workspace.FeedBlob("B", np.array([1, 3, 4, 9, 12, 8]))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A: [ 1  5  2  9 12  3]
+B: [ 1  3  4  9 12  8]
+C: [ True  True False  True  True False]
+
+```
+
+</details>
+)DOC";
+
+const char* kEQExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "EQ",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([1, 5, 2, 9, 12, 3]))
+workspace.FeedBlob("B", np.array([1, 3, 4, 9, 12, 8]))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+A: [ 1  5  2  9 12  3]
+B: [ 1  3  4  9 12  8]
+C: [ True False False  True  True False]
+```
+
+</details>
+)DOC";
+
+const char* kNEExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "NE",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([1, 5, 2, 9, 12, 3]))
+workspace.FeedBlob("B", np.array([1, 3, 4, 9, 12, 8]))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+A: [ 1  5  2  9 12  3]
+B: [ 1  3  4  9 12  8]
+C: [False  True  True False False  True]
+```
+
+</details>
+)DOC";
+
 std::function<void(OpSchema&)> ComparisonDocGenerator(
    const char* name,
-    const char* desc) {
+    const char* desc,
+    const char* extra) {
  return [=](OpSchema& schema) {
    string doc = R"DOC(
-Performs element-wise {desc} comparison `{name}` (with limited broadcast support).
-{broadcast_doc})DOC";
+Performs element-wise {desc} comparison **{name}** (with limited broadcast support).
+
+{broadcast_doc}
+
+{extra}
+)DOC";
    ReplaceAll(doc, "{name}", name);
    ReplaceAll(doc, "{desc}", desc);
    ReplaceAll(doc, "{broadcast_doc}", kBroadcastDoc);
+    ReplaceAll(doc, "{extra}", extra);
    schema.SetDoc(doc);
-    schema.Arg("broadcast", "Pass 1 to enable broadcasting");
+    schema.Arg("broadcast", "*(type: int; default: 0)* Pass 1 to enable broadcasting.");
    schema.Arg(
        "axis",
-        "If set, defines the broadcast dimensions. See doc for details.");
+        "*(type: int; default: -1)* Axis to concatenate on. If set, defines the broadcast dimensions.");
    schema.Input(
        0,
        "A",
-        "First operand, should share the type with the second operand.");
+        "*(type: Tensor`<bool>`)* First operand, should share the type with the second operand.");
    schema.Input(
        1,
        "B",
-        "Second operand. With broadcasting can be of smaller size than A. "
+        "*(type: Tensor`<bool>`)* Second operand. With broadcasting can be of smaller size than `A`. "
        "If broadcasting is disabled it should be of the same size.");
-    schema.Output(0, "C", "Result, has same dimensions and A and type `bool`");
+    schema.Output(0, "C", "*(type: Tensor`<bool>`)* Output tensor with same dimensions as `A`.");
  };
 }

-#define CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(name, symbol, desc)             \
+#define CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(name, symbol, desc, extra)      \
  OPERATOR_SCHEMA(name)                                                        \
      .NumInputs(2)                                                            \
      .NumOutputs(1)                                                           \
@ -210,51 +637,200 @@ Performs element-wise {desc} comparison `{name}` (with limited broadcast support
            return vector<TensorShape>{                                        \
                CreateTensorShape(output_dims, TensorProto::BOOL)};            \
          })                                                                   \
-      .FillUsing(ComparisonDocGenerator(symbol, desc));                        \
+      .FillUsing(ComparisonDocGenerator(symbol, desc, extra));                 \
  SHOULD_NOT_DO_GRADIENT(name)

-CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(EQ, "==", "equal to");
-CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(NE, "!=", "not equal to");
-CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(LT, "<", "less than");
-CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(LE, "<=", "less or equal than");
-CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(GT, ">", "greater than");
-CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(GE, ">=", "greater or equal than");
+CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(EQ, "==", "equal to", kEQExample);
+CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(NE, "!=", "not equal to", kNEExample);
+CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(LT, "<", "less than", kLTExample);
+CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(LE, "<=", "less or equal than", kLEExample);
+CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(GT, ">", "greater than", kGTExample);
+CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(GE, ">=", "greater or equal than", kGEExample);

-std::function<void(OpSchema&)> LogicalDocGenerator(const char* name) {
+const char* kAndExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "And",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", (np.random.rand(3, 3) > 0.5))
+workspace.FeedBlob("B", (np.random.rand(3, 3) > 0.5))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A:
+ [[ True False False]
+ [False  True False]
+ [False False  True]]
+B:
+ [[ True False  True]
+ [False False False]
+ [False False False]]
+C:
+ [[ True False False]
+ [False False False]
+ [False False False]]
+
+```
+
+</details>
+)DOC";
+
+const char* kOrExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Or",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", (np.random.rand(3, 3) > 0.5))
+workspace.FeedBlob("B", (np.random.rand(3, 3) > 0.5))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A:
+[[False  True  True]
+ [False  True  True]
+ [ True  True  True]]
+B:
+[[False  True False]
+ [ True  True  True]
+ [False  True False]]
+C:
+[[False  True  True]
+ [ True  True  True]
+ [ True  True  True]]
+
+```
+
+</details>
+)DOC";
+
+const char* kXorExample = R"DOC(
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Xor",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", (np.random.rand(3, 3) > 0.5))
+workspace.FeedBlob("B", (np.random.rand(3, 3) > 0.5))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("C"))
+
+```
+
+**Result**
+
+```
+
+A:
+[[ True  True  True]
+ [False False  True]
+ [False  True False]]
+B:
+[[False False False]
+ [ True  True  True]
+ [False False False]]
+C:
+[[ True  True  True]
+ [ True  True False]
+ [False  True False]]
+
+```
+
+</details>
+)DOC";
+
+std::function<void(OpSchema&)> LogicalDocGenerator(const char* name, const char* extra) {
  return [=](OpSchema& schema) {
    string doc = R"DOC(
-Performs element-wise logical operation `{name}` (with limited broadcast support).
+Performs element-wise logical operation **{name}** (with limited broadcast support).
 Both input operands should be of type `bool`.
-{broadcast_doc})DOC";
+
+{broadcast_doc}
+
+{extra}
+    )DOC";
    ReplaceAll(doc, "{name}", name);
    ReplaceAll(doc, "{broadcast_doc}", kBroadcastDoc);
+    ReplaceAll(doc, "{extra}", extra);
    schema.SetDoc(doc);
-    schema.Arg("broadcast", "Pass 1 to enable broadcasting");
+    schema.Arg("broadcast", "*(type: int; default: 0)* Pass 1 to enable broadcasting.");
    schema.Arg(
        "axis",
-        "If set, defines the broadcast dimensions. See doc for details.");
-    schema.Input(0, "A", "First operand.");
+        "*(type: int; default: -1)* Axis to concatenate on. If set, defines the broadcast dimensions.");
+    schema.Input(0, "A", "*(type: Tensor`<bool>`)* First operand.");
    schema.Input(
        1,
        "B",
-        "Second operand. With broadcasting can be of smaller size than A. "
+        "*(type: Tensor`<bool>`)* Second operand. With broadcasting can be of smaller size than `A`. "
        "If broadcasting is disabled it should be of the same size.");
-    schema.Output(0, "C", "Result, has same dimensions and A and type `bool`");
+    schema.Output(0, "C", "*(type: Tensor`<bool>`)* Output tensor of booleans. Has same dimensions as input `A`.");
  };
 }

-#define CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(name, symbol, onnx_schema) \
+#define CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(name, symbol, onnx_schema, extra) \
  OPERATOR_SCHEMA(name)                                                \
      .NumInputs(2)                                                    \
      .NumOutputs(1)                                                   \
      .AllowInplace({{0, 0}})                                          \
-      .FillUsing(LogicalDocGenerator(symbol))                          \
+      .FillUsing(LogicalDocGenerator(symbol, extra))                   \
      .InheritOnnxSchema(onnx_schema);                                 \
  SHOULD_NOT_DO_GRADIENT(name)

-CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(Or, "or", "Or");
-CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(And, "and", "And");
-CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(Xor, "xor", "Xor");
+CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(Or, "or", "Or", kOrExample);
+CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(And, "and", "And", kAndExample);
+CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(Xor, "xor", "Xor", kXorExample);

 #undef CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP

@ -267,17 +843,17 @@ Both input operands should be of type `bool`.
    ReplaceAll(doc, "{name}", name);
    ReplaceAll(doc, "{broadcast_doc}", kBroadcastDoc);
    schema.SetDoc(doc);
-    schema.Arg("broadcast", "Pass 1 to enable broadcasting");
+    schema.Arg("broadcast", "*(type: int; default: 0)* Pass 1 to enable broadcasting.");
    schema.Arg(
        "axis",
-        "If set, defines the broadcast dimensions. See doc for details.");
-    schema.Input(0, "A", "First operand.");
+        "*(type: int; default: -1)* Axis to concatenate on. If set, defines the broadcast dimensions.");
+    schema.Input(0, "A", "*(type: Tensor)* First operand.");
    schema.Input(
        1,
        "B",
-        "Second operand. With broadcasting can be of smaller size than A. "
+        "*(type: Tensor)* Second operand. With broadcasting can be of smaller size than `A`. "
        "If broadcasting is disabled it should be of the same size.");
-    schema.Output(0, "C", "Result, has same dimensions and type with A.");
+    schema.Output(0, "C", "*(type: Tensor)* Output tensor. Has same dimensions as input `A`.");
  };
 }

@ -286,7 +862,7 @@ Both input operands should be of type `bool`.
      .NumInputs(2)                                       \
      .NumOutputs(1)                                      \
      .AllowInplace({{0, 0}})                             \
-      .FillUsing(LogicalDocGenerator(symbol));            \
+      .FillUsing(BitwiseDocGenerator(symbol));            \
  SHOULD_NOT_DO_GRADIENT(name)

 CAFFE2_SCHEMA_FOR_BINARY_BITWISE_OP(BitwiseOr, "bitwise_or");
@ -298,18 +874,111 @@ CAFFE2_SCHEMA_FOR_BINARY_BITWISE_OP(BitwiseXor, "bitwise_xor");
 OPERATOR_SCHEMA(Not)
    .NumInputs(1)
    .NumOutputs(1)
-    .SetDoc(R"DOC(Performs element-wise negation.)DOC")
-    .Input(0, "X", "Input tensor of type `bool`.")
-    .Output(0, "Y", "Output tensor of type `bool`.")
+    .SetDoc(R"DOC(
+Performs element-wise negation on input tensor `X`.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elementwise_op_schema.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+"Not",
+["X"],
+["Y"],
+)
+
+workspace.FeedBlob("X", (np.random.rand(3, 3) > 0.5))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+
+X:
+[[ True False False]
+[False False False]
+[ True  True  True]]
+Y:
+[[False  True  True]
+[ True  True  True]
+[False False False]]
+
+```
+
+</details>
+
+    )DOC")
+    .Input(0, "X", "*(Tensor`<bool>`)* Input tensor.")
+    .Output(0, "Y", "*(Tensor`<bool>`)* Negated output tensor.")
    .InheritOnnxSchema("Not");
 SHOULD_NOT_DO_GRADIENT(Not);

 OPERATOR_SCHEMA(Sign)
    .NumInputs(1)
    .NumOutputs(1)
-    .SetDoc(R"DOC(Performs element-wise sign.)DOC")
-    .Input(0, "X", "Input tensor.")
-    .Output(0, "Y", "Output tensor.");
+    .SetDoc(R"DOC(
+Computes sign for each element of the input: -1, 0 or 1.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elementwise_op_schema.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+"Sign",
+["X"],
+["Y"],
+)
+
+workspace.FeedBlob("X", (np.random.rand(3, 3).astype(np.float32) - np.random.rand(3, 3).astype(np.float32)))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+
+X:
+[[ 0.02816287  0.22408086 -0.30342305]
+[-0.18481976  0.03948995  0.39698976]
+[-0.63304734 -0.6919183  -0.31524038]]
+Y:
+[[ 1.  1. -1.]
+[-1.  1.  1.]
+[-1. -1. -1.]]
+
+```
+
+</details>
+
+    )DOC")
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input data tensor.")
+    .Output(0, "Y", "*(type: Tensor`<float>`)* Output tensor.");
 SHOULD_NOT_DO_GRADIENT(Sign);

 } // namespace caffe2
--- a/caffe2/operators/elementwise_sum_op.cc
+++ b/caffe2/operators/elementwise_sum_op.cc
@ -23,12 +23,99 @@ OPERATOR_SCHEMA(Sum)
    .InputsCanCrossDevices()
    .IdenticalTypeAndShapeOfInput(0)
    .SetDoc(R"DOC(
-Element-wise sum of each of the input tensors. The first input tensor can be
-used in-place as the output tensor, in which case the sum will be done in
-place and results will be accumulated in input0. All inputs and outputs must
+Element-wise sum of each of the input tensors. The first input tensor can be used
+in-place as the output tensor, in which case the sum will be done in place and
+results will be accumulated the first input tensor. All inputs and outputs must
 have the same shape and data type.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elementwise_sum_op.cc
+
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Sum",
+    ["A",  "B"],
+    ["C"],
+)
+
+workspace.FeedBlob("A", np.array([[1,2],[3,4]]).astype(np.float32))
+workspace.FeedBlob("B", np.array([[5,6],[7,8]]).astype(np.float32))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("C:", workspace.FetchBlob("A"))
+
+```
+
+**Result**
+
+```
+
+A: [[1. 2.]
+ [3. 4.]]
+B: [[5. 6.]
+ [7. 8.]]
+C: [[1. 2.]
+ [3. 4.]]
+
+```
+
+</details>
+
+<details>
+
+<summary> <b>Example 2</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Sum",
+    ["A",  "B"],
+    ["A"],  # inplace
+)
+
+workspace.FeedBlob("A", np.array([[1,2,5],[8,3,4]]).astype(np.float32))
+workspace.FeedBlob("B", np.array([[9,5,6],[6,7,8]]).astype(np.float32))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("A after Sum:", workspace.FetchBlob("A"))
+
+```
+
+**Result**
+
+```
+
+A: [[1. 2. 5.]
+ [8. 3. 4.]]
+B: [[9. 5. 6.]
+ [6. 7. 8.]]
+A after Sum: [[10.  7. 11.]
+ [14. 10. 12.]]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "data_0", "First of the input tensors. Can be inplace.")
-    .Output(0, "sum", "Output tensor. Same dimension as inputs.")
+    .Input(0, "A", "*(type: Tensor`<float>`)* First tensor to be added element-wise.")
+    .Input(1, "B", "*(type: Tensor`<float>`)* Second tensor to be added element-wise.")
+    .Output(0, "C", "*(type: Tensor`<float>`)* Sum of A and B.")
    .InheritOnnxSchema("Sum");
 }
--- a/caffe2/operators/exp_op.cc
+++ b/caffe2/operators/exp_op.cc
@ -15,16 +15,60 @@ OPERATOR_SCHEMA(Exp)
    .AllowInplace({{0, 0}})
    .IdenticalTypeAndShape()
    .SetDoc(R"DOC(
-Calculates the exponential of the given input tensor, element-wise. This
+Calculates the exponential of the given input tensor ($exp(x)$), element-wise. This
 operation can be done in an in-place fashion too, by providing the same input
 and output blobs.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/exp_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Exp",
+    ["X"],
+    ["X"],
+)
+
+workspace.FeedBlob("X", (np.random.rand(3,3)).astype(np.float32))
+print("X before running op:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("X after running op:", workspace.FetchBlob("X"))
+
+```
+
+**Result**
+
+```
+
+X before running op:
+[[0.5821691  0.07719802 0.50159824]
+ [0.40952456 0.36788362 0.84887683]
+ [0.02472685 0.65730894 0.9066397 ]]
+X after running op:
+[[1.7899168 1.080256  1.6513585]
+ [1.5061016 1.4446739 2.3370204]
+ [1.0250351 1.9295927 2.4759884]]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "input", "Input tensor")
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input tensor.")
    .Output(
        0,
-        "output",
-        "The exponential of the input tensor computed "
-        "element-wise")
+        "Y",
+        "*(type: Tensor`<float>`)* The exponential of the input tensor computed "
+        "element-wise.")
    .InheritOnnxSchema("Exp");

 namespace {
--- a/caffe2/operators/filler_op.cc
+++ b/caffe2/operators/filler_op.cc
@ -46,48 +46,165 @@ OPERATOR_SCHEMA(ConstantFill)
    .AllowInplace({{0, 0}})
    .TensorInferenceFunction(FillerTensorInference<>)
    .SetDoc(R"DOC(
-The operator fills the elements of the output tensor with a constant value
-specified by the 'value' argument.
+This operator fills the elements of the output tensor with a constant value
+specified by the `value` argument.

-The data type is specified by the 'dtype' argument. The 'dtype' argument must
-be one of the data types specified in the 'DataType' enum field in the
-TensorProto message. If the 'dtype' argument is not provided, the data type of
-'value' is used.
+- The data type is specified by the `dtype` argument

-The output tensor shape is specified by the 'shape' argument. If the number of
-input is 1, the shape will be identical to that of the input at run time with
-optional additional dimensions appended at the end as specified by 'extra_shape'
-argument. In that case the 'shape' argument should not be set.
+- Currently, the data types supported are *float*, *int32*, *int64*, and *bool*

-If input_as_shape is set to true, then the input should be a 1D tensor
-containing the desired output shape (the dimensions specified in extra_shape
+- If the `dtype` argument is not provided, the data type of `value` is used
+
+- The output tensor shape is either specified by the `shape` argument or will
+match the shape of the input tensor if one is provided (if an input tensor is
+provided, a shape argument should not be set)
+
+- Optional additional dimensions can be appended at the end as specified by
+`extra_shape` argument
+
+- If `input_as_shape` is set to True, the input should be a 1D tensor
+containing the desired output shape (the dimensions specified in `extra_shape`
 will also be appended)

-NOTE: Currently, it supports data type of float, int32, int64, and bool.
+When specifying `dtype` argument, use the integer keys from the *DataType* enum
+in TensorProto:
+
+```
+message TensorProto {
+  ...
+  enum DataType {
+    UNDEFINED = 0;
+    FLOAT = 1;  // float
+    INT32 = 2;  // int
+    BYTE = 3;  // BYTE, when deserialized, is going to be restored as uint8.
+    STRING = 4;  // string
+    BOOL = 5;  // bool
+    UINT8 = 6;  // uint8_t
+    INT8 = 7;  // int8_t
+    UINT16 = 8;  // uint16_t
+    INT16 = 9;  // int16_t
+    INT64 = 10;  // int64_t
+    FLOAT16 = 12;  // caffe2::__f16, caffe2::float16
+    DOUBLE = 13;  // double
+  }
+```
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/filler_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "ConstantFill",
+    [],
+    ["Y"],
+    shape=(1,5,5)
+)
+
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+```
+
+**Result**
+
+```
+Y: [[[0. 0. 0. 0. 0.]
+  [0. 0. 0. 0. 0.]
+  [0. 0. 0. 0. 0.]
+  [0. 0. 0. 0. 0.]
+  [0. 0. 0. 0. 0.]]]
+```
+</details>
+
+<details>
+<summary> <b>Example 2</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "ConstantFill",
+    ["X"],
+    ["Y"],
+    value=4.0,
+    dtype=1,
+    extra_shape=(1,2)
+)
+
+workspace.FeedBlob("X", (np.random.randint(100, size=(3,3))).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+```
+
+**Result**
+
+```
+X: [[86. 30. 84.]
+ [34. 51.  9.]
+ [29. 86. 59.]]
+Y: [[[[4. 4.]]
+
+  [[4. 4.]]
+
+  [[4. 4.]]]
+
+
+ [[[4. 4.]]
+
+  [[4. 4.]]
+
+  [[4. 4.]]]
+
+
+ [[[4. 4.]]
+
+  [[4. 4.]]
+
+  [[4. 4.]]]]
+```
+
+</details>
+
 )DOC")
-    .Arg("value", "The value for the elements of the output tensor. Default is 0.0f.")
+    .Arg(
+        "value",
+        "*(type: primitive; default: 0.0f) value to populate output tensor with.")
    .Arg(
        "dtype",
-        "The data type for the elements of the output tensor."
-        "Strictly must be one of the types from DataType enum in TensorProto.")
+        "*(type: int)* The data type for the elements of the output tensor. "
+        "Strictly must be one of the types from *DataType* enum in TensorProto.")
    .Arg(
        "shape",
-        "The shape of the output tensor."
-        "Cannot set the shape argument and pass in an input at the same time.")
+        "*(type: int | Tuple(int))* Shape of the output tensor. Cannot pass an "
+        "input blob and this arg at the same time.")
    .Arg(
        "extra_shape",
-        "The additional dimensions appended at the end of the shape indicated"
-        "by the input blob."
-        "Cannot set the extra_shape argument when there is no input blob.")
+        "*(type: int | Tuple(int))* Additional dimensions appended at the end "
+        "of the shape indicated by the input blob. Cannot set this"
+        "argument when there is no input blob.")
    .Arg(
        "input_as_shape",
-        "1D tensor containing the desired output shape.  First input must be in CPU context.")
-    .Input(0, "input", "Input tensor (optional) to provide shape information.")
+        "*(type: int | Tuple(int))* 1D tensor containing the desired output "
+        "shape. First input must be in CPU context.")
+    .Input(
+        0,
+        "X",
+        "*(type: Tensor)* [OPTIONAL] Input tensor to provide shape information.")
    .Output(
        0,
-        "output",
-        "Output tensor of constant values specified by 'value'"
-        "argument and its type is specified by the 'dtype' argument");
+        "Y",
+        "*(type: Tensor)* Output tensor of constant values.");

 OPERATOR_SCHEMA(DiagonalFill)
    .NumInputs(0, 1)
--- a/caffe2/operators/flatten_op.cc
+++ b/caffe2/operators/flatten_op.cc
@ -30,21 +30,68 @@ OPERATOR_SCHEMA(Flatten)
    })
    .SetDoc(R"DOC(
 Flattens the input tensor into a 2D matrix. If input tensor has shape
-(d_0, d_1, ... d_n) then the output will have shape
-(d_0 X d_1 ... d_(axis-1), d_axis X d_(axis+1) ... X dn)
+$(d_0, d_1, ..., d_n)$ then the output will have shape
+$\bigl((d_0 * d_1 * ... * d_{(axis-1)}), (d_{axis} * d_{(axis+1)} * ... * d_n)\bigr)$.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/flatten_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Flatten",
+    ["X"],
+    ["Y"],
+    axis=1
+)
+
+workspace.FeedBlob("X", np.random.rand(1,3,2,2))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+```
+
+**Result**
+
+```
+X: [[[[0.53432311 0.23734561]
+   [0.56481598 0.52152617]]
+
+  [[0.33662627 0.32472711]
+   [0.17939016 0.97175851]]
+
+  [[0.87226421 0.49045439]
+   [0.92470531 0.30935077]]]]
+Y: [[0.53432311 0.23734561 0.56481598 0.52152617 0.33662627 0.32472711
+  0.17939016 0.97175851 0.87226421 0.49045439 0.92470531 0.30935077]]
+```
+
+</details>
+
 )DOC")
-    .Input(0, "input", "A tensor of rank >= axis.")
+    .Input(
+        0,
+        "X",
+        "*(type: Tensor)* Input Tensor of rank >= axis.")
    .Output(
        0,
-        "output",
-        "A 2D tensor with the contents of the input tensor, "
-        "with input dimensions up to axis flattened to the outer dimension "
-        "of the output and remaining input dimensions flattened into the inner "
-        "dimension of the output.")
+        "Y",
+        "*(type: Tensor)* A 2D tensor with the contents of the input tensor, "
+        "with input dimensions up to `axis` flattened to the outer dimension "
+        "of the output and the remaining input dimensions flattened into the "
+        "inner dimension of the output.")
    .Arg(
        "axis",
-        "(Default to 1) Indicate up to which input dimensions "
-        "(exclusive) should be flattened to the outer dimension of the output")
+        "*(type: int; default: 1)* Indicates up to which input dimensions "
+        "(exclusive) should be flattened to the outer dimension of the output.")
    .InheritOnnxSchema("Flatten");

 class GetFlattenGradient : public GradientMakerBase {
--- a/caffe2/operators/floor_op.cc
+++ b/caffe2/operators/floor_op.cc
@ -11,12 +11,61 @@ OPERATOR_SCHEMA(Floor)
    .NumOutputs(1)
    .AllowInplace({{0, 0}})
    .SetDoc(R"DOC(
-Floor takes one input data (Tensor<T>) and produces one output data
-(Tensor<T>) where the floor function, y = floor(x), is applied to
-the tensor elementwise. Currently supports only float32.
+Element-wise application of the floor function ($y=floor(x)$) to the input
+tensor `X`. Output tensor shape is the same as the input tensor. This
+operator can be used in an in-place fashion by using the same input blob as the
+output blob.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/floor_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Floor",
+    ["X"],
+    ["X"],
+)
+
+workspace.FeedBlob("X", (np.random.uniform(-10, 10, (5,5))).astype(np.float32))
+print("X before running op:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("X after running op:", workspace.FetchBlob("X"))
+
+```
+
+**Result**
+
+```
+
+X before running op:
+[[ 3.813361   -1.319647    5.2089314  -4.931328    0.6218652 ]
+ [ 7.2757645   5.5552588   5.785643   -2.4790506  -0.41400087]
+ [ 1.1541046  -6.933266    3.3754056   1.6569928  -1.7670316 ]
+ [-3.4932013   4.891472    1.5530115  -3.2443287  -4.605099  ]
+ [-4.574543   -7.360948    5.91305    -8.196495   -5.357458  ]]
+X after running op:
+[[ 3. -2.  5. -5.  0.]
+ [ 7.  5.  5. -3. -1.]
+ [ 1. -7.  3.  1. -2.]
+ [-4.  4.  1. -4. -5.]
+ [-5. -8.  5. -9. -6.]]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "X", "ND input tensor")
-    .Output(0, "Y", "ND input tensor");
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input tensor.")
+    .Output(0, "Y", "*(type: Tensor`<float>`)* Output tensor.");

 // TODO: Write gradient for this when needed
 GRADIENT_NOT_IMPLEMENTED_YET(Floor);
--- a/caffe2/operators/load_save_op.cc
+++ b/caffe2/operators/load_save_op.cc
@ -20,94 +20,209 @@ OPERATOR_SCHEMA(DBExists)
    .NumInputs(0)
    .NumOutputs(1)
    .SetDoc(R"DOC(
-Checks if the DB exists.
+Checks if the db described by the arguments exists.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/load_save_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "DBExists",
+    [],
+    ["exists"],
+    db_name="test_db",
+    db_type="leveldb",
+)
+
+workspace.RunOperatorOnce(op)
+print("exists:", workspace.FetchBlob("exists"))
+
+```
+
+</details>
+
 )DOC")
-    .Output(0, "exists", "A scalar bool Tensor.")
+    .Output(0, "exists", "*(type: Tensor`<bool>`)* Scalar boolean output "
+    "tensor. True if the db exists, else false.")
    .Arg(
        "absolute_path",
-        "(int, default 0) if set, use the db path directly and do not prepend "
-        "the current root folder of the workspace.")
-    .Arg("db_name", "(string) the path to the db to load.")
-    .Arg("db_type", "(string) the type of the db.");
+        "*(type: int; default: 0)* If set to non-zero, save the db directly to "
+        "the path specified by the `db` arg. If not set (default), prepend the "
+        "path of the current root folder of the workspace to the path specified "
+        "by the `db` arg.")
+    .Arg("db_name", "*(type: string)* Path to the db in question; see the "
+    "`absolute_path` arg details for options regarding the current root folder "
+    "of the workspace.")
+    .Arg("db_type", "*(type: string)* Type of db to save (options: \"lmdb\", "
+    "\"leveldb\", \"minidb\").");

 OPERATOR_SCHEMA(Load)
    .NumInputs(0, INT_MAX)
    .NumOutputs(0, INT_MAX)
    .SetDoc(R"DOC(
 The Load operator loads a set of serialized blobs from a db or multiple dbs. It
-takes [0, infinity) number of inputs and [0, infinity) number of outputs, using
+takes $[0, \infty)$ number of inputs and $[0, \infty)$ number of outputs, using
 the db keys to match the db entries with the outputs.

 If at least one input is passed, then it is assumed that that input blobs are a
-set of DBReaders to load from. Otherwise the db or dbs argument is used to load
-blobs from one single db or multiple dbs respectively. db_type argument is used
+set of DBReaders to load from. Otherwise the `db` or `dbs` argument is used to load
+blobs from one single db or multiple dbs respectively. `db_type` argument is used
 to specify the type of the input db/dbs.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/load_save_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Load",
+    [],
+    ["X", "Y"],
+    db="test_db",
+    db_type="lmdb"
+)
+
+workspace.RunOperatorOnce(op)
+print("X:", workspace.FetchBlob("X"))
+print("Y:", workspace.FetchBlob("Y"))
+
+```
+
+</details>
+
 )DOC")
+    .Input(
+      0,
+      "X, Y, ...",
+      "*(type: List(DBReader))* [OPTIONAL] List of DBReaders to load from. Can "
+      "use this instead of the `db`/`dbs` args.")
    .Arg(
        "absolute_path",
-        "(int, default 0) if set, use the db path directly and do not prepend "
-        "the current root folder of the workspace.")
+        "*(type: int; default: 0)* If set to non-zero, save the db directly to "
+        "the path specified by the `db` arg. If not set (default), prepend the "
+        "path of the current root folder of the workspace to the path specified "
+        "by the `db` arg.")
    .Arg(
        "add_prefix",
-        "(string, default=\"\") blobs will be prefixed with this when loading."
-        "Useful for avoiding collisions with blobs existing in the workspace."
-        "The output blob names specified to this op should include this prefix.")
+        "*(type: string, default: \"\")* Blobs will be prefixed with this when "
+        "loading. Useful for avoiding collisions with blobs existing in the "
+        "workspace. The output blob names specified to this op should include "
+        "this prefix.")
    .Arg(
        "strip_prefix",
-        "(string, default=\"\") characters in the provided blob "
-        " names that match strip_prefix will be removed prior to loading."
-        " Also, characters that precede strip_prefix will be removed. Useful "
-        " for removing device scope from blob names.")
-    .Arg("db", "(string) the path to the db to load.")
+        "*(type: string, default: \"\")* Characters in the provided blob names "
+        "that match `strip_prefix` will be removed prior to saving. Also, "
+        "characters that precede `strip_prefix` will be removed. Useful for "
+        "removing device scope from blob names.")
+    .Arg("db", "*(type: string)* The output path of the db. See the "
+        "`absolute_path` arg details for options regarding the current root folder "
+        "of the workspace.")
    .Arg(
        "dbs",
-        "(list of strings) the paths to the dbs to load. This is used for loading"
-        " blobs from multiple databases. If it is set, argument in \"db\" will be"
-        " ignored.")
-    .Arg("db_type", "(string) the type of the db.")
+        "*(type: List(string))* List of paths to dbs to load blobs from. See "
+        "the `absolute_path` arg details for options regarding the current "
+        "root folder of the workspace.")
+    .Arg("db_type", "(type: string)* Type of db to save (options: \"lmdb\", "
+        "\"leveldb\", \"minidb\").")
    .Arg(
        "keep_device",
-        "(int, default 0) if nonzero, the blobs are loaded into the device that "
-        "is specified in the serialized BlobProto. Otherwise, the device will be "
-        "set as the one that the Load operator is being run under.")
+        "*(type: int; default: 0)* If nonzero, the blobs are loaded into the "
+        "device that is specified in the serialized `BlobProto`. Otherwise, "
+        "the device will be set as the one that the `Load` operator is being "
+        "run under.")
    .Arg(
        "load_all",
-        "(int, default 0) if nonzero, will load all blobs pointed to by the db "
-        "to the workspace overwriting/creating blobs as needed.")
+        "*(type: int; default: 0)* If nonzero, will load all blobs pointed to "
+        "by the db to the workspace overwriting/creating blobs as needed.")
    .Arg(
        "allow_incomplete",
-        "(bool, default false) if true, will allow not loading all the output "
-        "blobs specified in the outputs")
+        "*(type: bool; default: False)* If True, will allow not loading all "
+        "the output blobs specified in the outputs.")
    .Arg(
        "source_blob_names",
-        "(list of strings) if set, used instead of output "
-        "blob names, to specify which blobs in the db shall be loaded. Must be "
-        "the same length as number of output blobs.");
+        "*(type: List(string))* If set, used instead of output blob names to "
+        "specify which blobs in the db shall be loaded. Must be the same "
+        "length as number of output blobs.");

 OPERATOR_SCHEMA(Save)
    .NumInputs(1, INT_MAX)
    .NumOutputs(0)
    .SetDoc(R"DOC(
-The Save operator saves a set of blobs to a db. It takes [1, infinity) number
-of inputs and has no output. The contents of the inputs are written into the
-db specified by the arguments.
+Saves a set of blobs to a db. It takes $[1, \infty)$ number of inputs and has
+no output. The contents of the inputs are written into the db using the
+settings specified by the arguments.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/load_save_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Save",
+    ["X", "Y", "Z"],
+    [],
+    db="test_db2",
+    db_type="leveldb",
+    blob_name_overrides=["x_scores", "y_scores", "z_scores"]
+)
+
+workspace.FeedBlob("X", np.random.randint(20, size=(5,5)))
+workspace.FeedBlob("Y", np.random.randint(20, size=(5,5)))
+workspace.FeedBlob("Z", np.random.randint(20, size=(5,5)))
+workspace.RunOperatorOnce(op)
+
+```
+
+</details>
+
 )DOC")
    .Arg(
        "absolute_path",
-        "(int, default 0) if set, use the db path directly and do not prepend "
-        "the current root folder of the workspace.")
+        "*(type: int; default: 0)* If set to non-zero, save the db directly to "
+        "the path specified by the `db` arg. If not set (default), prepend the "
+        "path of the current root folder of the workspace to the path specified "
+        "by the `db` arg.")
     .Arg(
         "strip_prefix",
-         "(string, default=\"\") characters in the provided blob "
-         " names that match strip_prefix will be removed prior to saving."
-         " Also, characters that precede strip_prefix will be removed. Useful "
-         " for removing device scope from blob names.")
+         "*(type: string, default: \"\")* Characters in the provided blob names "
+         "that match `strip_prefix` will be removed prior to saving. Also, "
+         "characters that precede `strip_prefix` will be removed. Useful for "
+         "removing device scope from blob names.")
    .Arg(
        "blob_name_overrides",
-        "(list of strings) if set, used instead of original "
-        "blob names. Must be the same length as number of blobs.")
-    .Arg("db", "(string) the path to the db to load.")
-    .Arg("db_type", "(string) the type of the db.");
+        "*(List(string))* If set, used as blob names instead of original blob "
+        "names. Must be same length as number of blobs.")
+    .Arg("db", "*(type: string)* The output path of the db. See the "
+    "`absolute_path` arg details for options regarding the current root folder "
+    "of the workspace.")
+    .Arg("db_type", "*(type: string)* Type of db to save (options: \"lmdb\", "
+    "\"leveldb\", \"minidb\").")
+    .Input(0, "X", "*(type: Tensor)* Input tensor(s).");

 OPERATOR_SCHEMA(Checkpoint)
    .NumInputs(1, INT_MAX)
--- a/caffe2/operators/local_response_normalization_op.cc
+++ b/caffe2/operators/local_response_normalization_op.cc
@ -303,7 +303,199 @@ bool LRNGradientOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
 REGISTER_CPU_OPERATOR(LRN, LRNOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(LRNGradient, LRNGradientOp<float, CPUContext>);

-OPERATOR_SCHEMA(LRN).NumInputs(1).NumOutputs(1, 2).InheritOnnxSchema("LRN");
+OPERATOR_SCHEMA(LRN)
+  .NumInputs(1)
+  .NumOutputs(1, 2)
+  .SetDoc(R"DOC(
+
+`LRN` applies Local Response Normalization to an input blob. This operation performs
+a kind of "lateral inhibition" by normalizing over local input regions, where 
+normalization is applied across channels. This operator is typically used to 
+normalize an unbounded activation (such as ReLU). The output shape is the same as
+the input shape. The `brew` module has a wrapper for this operator for use in a
+`ModelHelper` object.
+
+The formula for LRN is as follows:
+
+$$b_{c} = a_{c}(bias + \frac{\alpha}{n}\sum_{c'=max(0,c-n/2)}^{min(N-1,c+n/2)} a_{c'}^2 )^{-\beta}$$
+
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/local_response_normalization_op.h
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/local_response_normalization_op.cc
+
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator("LRN",
+     ["X"],
+     ["Y", "Y_scale"],
+     size=11,
+     alpha=0.001,
+     beta=0.5,
+     bias=2.0,
+     order="NHWC"
+)
+
+workspace.FeedBlob("X", np.random.randn(1, 6, 6, 1).astype(np.float32)) # NCHW
+print("X:\n", workspace.FetchBlob("X"), "\n")
+workspace.RunOperatorOnce(op)
+print("Y:\n", workspace.FetchBlob("Y"))
+print("Y_scale:\n", workspace.FetchBlob("Y_scale"))
+```
+
+**Result**
+
+```
+X:
+ [[[[ 0.72985137]
+   [-0.3753357 ]
+   [ 2.7344604 ]
+   [-0.5937792 ]
+   [ 0.38440478]
+   [-2.1659644 ]]
+
+  [[-0.92846817]
+   [-0.9996144 ]
+   [ 0.212943  ]
+   [-1.968045  ]
+   [-0.77839696]
+   [ 0.45492038]]
+
+  [[-0.11263168]
+   [ 1.9901097 ]
+   [ 0.19275683]
+   [ 0.15630436]
+   [ 0.7536298 ]
+   [-0.77339894]]
+
+  [[ 0.8353551 ]
+   [-0.7784452 ]
+   [ 1.779317  ]
+   [ 0.22421335]
+   [ 1.3846219 ]
+   [-3.0546608 ]]
+
+  [[ 0.09977621]
+   [ 2.2071757 ]
+   [ 0.79971045]
+   [ 3.563886  ]
+   [-0.7169287 ]
+   [ 0.77170426]]
+
+  [[-1.4296649 ]
+   [ 0.19181213]
+   [ 0.45961624]
+   [-1.0201577 ]
+   [ 0.62854475]
+   [-0.6395456 ]]]] 
+
+Y:
+ [[[[ 0.5160766 ]
+   [-0.26540157]
+   [ 1.9332271 ]
+   [-0.41986194]
+   [ 0.27181432]
+   [-1.5314047 ]]
+
+  [[-0.6565133 ]
+   [-0.7068181 ]
+   [ 0.15057328]
+   [-1.3914955 ]
+   [-0.5504022 ]
+   [ 0.32167578]]
+
+  [[-0.0796426 ]
+   [ 1.4070934 ]
+   [ 0.13629955]
+   [ 0.11052381]
+   [ 0.53288984]
+   [-0.5468682 ]]
+
+  [[ 0.5906759 ]
+   [-0.5504363 ]
+   [ 1.2580767 ]
+   [ 0.1585426 ]
+   [ 0.9790328 ]
+   [-2.1595135 ]]
+
+  [[ 0.07055242]
+   [ 1.5605361 ]
+   [ 0.5654725 ]
+   [ 2.5193207 ]
+   [-0.50693923]
+   [ 0.54567   ]]
+
+  [[-1.0108787 ]
+   [ 0.13563155]
+   [ 0.3249962 ]
+   [-0.72134334]
+   [ 0.44444424]
+   [-0.45222285]]]]
+Y_scale:
+ [[[[2.0000484]
+   [2.0000129]
+   [2.0006797]
+   [2.000032 ]
+   [2.0000134]
+   [2.0004265]]
+
+  [[2.0000784]
+   [2.0000908]
+   [2.000004 ]
+   [2.0003521]
+   [2.000055 ]
+   [2.0000188]]
+
+  [[2.0000012]
+   [2.00036  ]
+   [2.0000033]
+   [2.0000021]
+   [2.0000517]
+   [2.0000544]]
+
+  [[2.0000634]
+   [2.000055 ]
+   [2.0002878]
+   [2.0000045]
+   [2.0001743]
+   [2.0008483]]
+
+  [[2.000001 ]
+   [2.000443 ]
+   [2.0000582]
+   [2.0011547]
+   [2.0000467]
+   [2.0000541]]
+
+  [[2.0001857]
+   [2.0000033]
+   [2.0000193]
+   [2.0000947]
+   [2.000036 ]
+   [2.0000372]]]]
+```
+
+</details>
+
+)DOC")
+  .Arg("size", "*(type: int; default: 0)* Amount of neighboring channels to sum over for normalization")
+  .Arg("alpha", "*(type: float; default: 0)* Multiplicative (scaling) factor.")
+  .Arg("beta", "*(type: float; default: 0)* Exponent.")
+  .Arg("bias", "*(type: float; default: 1.0)* Additive factor.")
+  .Arg("order", "*(type: float; default: 'NCHW')* Order of blob dimensions.")
+  .Input(0, "X", "*(type: Tensor`<float>`)* Input data tensor (ReLU output).")
+  .Output(0, "Y", "*(type: Tensor`<float>`)* Output tensor.")
+  .Output(1, "Y_scale", "*(type: Tensor`<float>`)* Output scale.") 
+  .InheritOnnxSchema("LRN");
 OPERATOR_SCHEMA(LRNGradient).NumInputs(3).NumOutputs(1);

 class GetLRNGradient : public GradientMakerBase {
--- a/caffe2/operators/log_op.cc
+++ b/caffe2/operators/log_op.cc
@ -15,16 +15,59 @@ OPERATOR_SCHEMA(Log)
    .AllowInplace({{0, 0}})
    .IdenticalTypeAndShape()
    .SetDoc(R"DOC(
-Calculates the natural log of the given input tensor, element-wise. This
+Calculates the natural log of the given input tensor ($ln(x)$), element-wise. This
 operation can be done in an in-place fashion too, by providing the same input
 and output blobs.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/log_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Log",
+    ["X"],
+    ["X"],
+)
+
+workspace.FeedBlob("X", (np.random.rand(3,3)).astype(np.float32))
+print("X before running op:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("X after running op:", workspace.FetchBlob("X"))
+
+```
+
+**Result**
+
+```
+
+X before running op:
+[[0.07341351 0.15404125 0.386613  ]
+ [0.34090295 0.99727786 0.24141751]
+ [0.32016268 0.8724168  0.93515724]]
+X after running op:
+[[-2.6116474  -1.8705349  -0.9503311 ]
+ [-1.0761575  -0.00272586 -1.4212275 ]
+ [-1.138926   -0.13648799 -0.06704059]]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "input", "Input tensor")
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input tensor.")
    .Output(
        0,
-        "output",
-        "The natural log of the input tensor computed "
-        "element-wise")
+        "Y",
+        "*(type: Tensor`<float>`)* Output tensor computed as the natural log of the input tensor computed, element-wise.")
    .InheritOnnxSchema("Log");

 namespace {
--- a/caffe2/operators/matmul_op.cc
+++ b/caffe2/operators/matmul_op.cc
@ -34,28 +34,84 @@ OPERATOR_SCHEMA(MatMul)
      return out;
    })
    .SetDoc(R"DOC(
-Matrix multiplication Y = A * B, where A has size (M x K), B has size (K x N),
-and Y will have a size (M x N).
+Matrix multiplication $Y = A * B$, where `A` has size (M x K), `B` has size
+(K x N), and `Y` will have a size (M x N). To transpose `A` or `B` before
+multiplication, pass 1 to the `trans_a` and/or `trans_b` arguments, which
+separate the first and second dimensions of the respective matrices using
+`axis_a` and `axis_b`.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/matmul_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "MatMul",
+    ["A", "B"],
+    ["Y"],
+)
+
+workspace.FeedBlob("A", np.random.randint(10, size=(3,3)).astype(np.float32))
+workspace.FeedBlob("B", np.random.randint(10, size=(3,3)).astype(np.float32))
+print("A:", workspace.FetchBlob("A"))
+print("B:", workspace.FetchBlob("B"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+```
+
+**Result**
+
+```
+A: [[1. 8. 3.]
+ [6. 4. 4.]
+ [5. 4. 7.]]
+B: [[4. 0. 3.]
+ [3. 1. 1.]
+ [8. 5. 8.]]
+Y: [[52. 23. 35.]
+ [68. 24. 54.]
+ [88. 39. 75.]]
+```
+
+</details>
+
 )DOC")
-    .Input(0, "A", "2D matrix of size (M x K)")
-    .Input(1, "B", "2D matrix of size (K x N)")
-    .Output(0, "Y", "2D matrix of size (M x N)")
+    .Input(
+        0,
+        "A",
+        "*(type: Tensor`<float>`)* 2D matrix of size (M x K).")
+    .Input(
+        1,
+        "B",
+        "*(type: Tensor`<float>`)* 2D matrix of size (K x N).")
+    .Output(
+        0,
+        "Y",
+        "*(type: Tensor`<float>`)* 2D matrix of size (M x N).")
    .Arg(
        "axis_a",
-        "Exclusive axis that divides the first and second dimension \
-of matrix A, default to 1")
+        "*(type: int; default: 1)* Exclusive axis that divides the first and "
+        "second dimension of matrix `A`.")
    .Arg(
        "axis_b",
-        "Exclusive axis that divides the first and second dimension \
-of matrix B, default to 1")
+        "*(type: int; default: 1)* Exclusive axis that divides the first and "
+        "second dimension of matrix `B`.")
    .Arg(
        "trans_a",
-        "Pass 1 to transpose A before multiplication and after the \
-dimension adjustment using axis_a")
+        "*(type: int; default: 0)* Pass 1 to transpose `A` before multiplication and "
+        "after the dimension adjustment using `axis_a`.")
    .Arg(
        "trans_b",
-        "Pass 1 to transpose B before multiplication and after the \
-dimension adjustment using axis_b");
+        "*(type: int; default: 0)* Pass 1 to transpose `B` before multiplication and "
+        "after the dimension adjustment using `axis_b`.");

 class GetMatMulGradient : public GradientMakerBase {
  using GradientMakerBase::GradientMakerBase;
--- a/caffe2/operators/mean_op.cc
+++ b/caffe2/operators/mean_op.cc
@ -11,13 +11,70 @@ OPERATOR_SCHEMA(Mean)
    .IdenticalTypeAndShapeOfInput(0)
    .AllowInplace({{0, 0}})
    .SetDoc(R"DOC(
-Element-wise mean of each of the input tensors. The first input tensor can be
-used in-place as the output tensor, in which case the mean will be done in
-place and results will be accumulated in input0. All inputs and outputs must
-have the same shape and data type.
+Element-wise mean of an arbitrary number of input tensors. This operation can be
+performed in-place, by using the first input blob as the output blob. All inputs
+must have the same shape and data type, and the output will have the same shape
+as the inputs.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/mean_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Mean",
+    ["X", "Y", "Z"],
+    ["X"],
+)
+
+workspace.FeedBlob("X", (np.random.rand(3,3)).astype(np.float32))
+workspace.FeedBlob("Y", (np.random.rand(3,3)).astype(np.float32))
+workspace.FeedBlob("Z", (np.random.rand(3,3)).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+print("Y:", workspace.FetchBlob("Y"))
+print("Z:", workspace.FetchBlob("Z"))
+workspace.RunOperatorOnce(op)
+print("Mean:", workspace.FetchBlob("X"))
+
+```
+
+**Result**
+
+```
+
+X:
+[[0.6035237  0.5305746  0.6298913 ]
+ [0.9169737  0.01280353 0.16286302]
+ [0.6017664  0.9946255  0.05128575]]
+Y:
+[[0.07544111 0.45371833 0.08460239]
+ [0.9708728  0.7422064  0.7933344 ]
+ [0.97671497 0.3411384  0.73818344]]
+Z:
+[[0.08837954 0.90187573 0.46734726]
+ [0.6308827  0.8719029  0.39888734]
+ [0.90059936 0.92883426 0.5695987 ]]
+Mean:
+[[0.25578147 0.6287229  0.39394698]
+ [0.8395764  0.5423043  0.45169494]
+ [0.8263602  0.75486606 0.45302266]]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "data_0", "First of the input tensors. Can be inplace.")
-    .Output(0, "mean", "Output tensor. Same dimension as inputs.");
+    .Input(0, "X, Y, ...", "*(type: Tensor`<Ord>`)* List of input tensors with the same shape.")
+    .Output(0, "M", "*(type: Tensor`<Ord>`)* Output tensor with the same dimensions as inputs. Contains "
+    "the mean values of the input tensors calculated element-wise.");

 class GetMeanGradient : public GradientMakerBase {
  using GradientMakerBase::GradientMakerBase;
--- a/caffe2/operators/minmax_ops.cc
+++ b/caffe2/operators/minmax_ops.cc
@ -11,13 +11,70 @@ OPERATOR_SCHEMA(Max)
    .IdenticalTypeAndShapeOfInput(0)
    .AllowInplace({{0, 0}})
    .SetDoc(R"DOC(
-Element-wise max of each of the input tensors. The first input tensor can be
-used in-place as the output tensor, in which case the max will be done in
-place and results will be accumulated in input0. All inputs and outputs must
-have the same shape and data type.
+Element-wise max of an arbitrary number of input tensors. This operation can be
+performed in-place, by using the first input blob as the output blob. All inputs
+must have the same shape and data type, and the output will have the same shape
+as the inputs.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/minmax_ops.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Max",
+    ["X", "Y", "Z"],
+    ["X"],
+)
+
+workspace.FeedBlob("X", (np.random.rand(3,3)).astype(np.float32))
+workspace.FeedBlob("Y", (np.random.rand(3,3)).astype(np.float32))
+workspace.FeedBlob("Z", (np.random.rand(3,3)).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+print("Y:", workspace.FetchBlob("Y"))
+print("Z:", workspace.FetchBlob("Z"))
+workspace.RunOperatorOnce(op)
+print("Max:", workspace.FetchBlob("X"))
+
+```
+
+**Result**
+
+```
+
+X:
+[[0.4496477  0.07061381 0.7139333 ]
+ [0.83203    0.05970785 0.72786295]
+ [0.75988126 0.04601283 0.32820013]]
+Y:
+[[0.05683139 0.16872478 0.671098  ]
+ [0.70739156 0.09878621 0.03416285]
+ [0.34087983 0.94986707 0.67263436]]
+Z:
+[[0.48051122 0.07141234 0.85264146]
+ [0.77086854 0.22082241 0.13154659]
+ [0.42401117 0.995431   0.4263775 ]]
+Max:
+[[0.48051122 0.16872478 0.85264146]
+ [0.83203    0.22082241 0.72786295]
+ [0.75988126 0.995431   0.67263436]]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "data_0", "First of the input tensors. Can be inplace.")
-    .Output(0, "max", "Output tensor. Same dimension as inputs.")
+    .Input(0, "X, Y, ...", "*(type: Tensor`<Ord>`)* List of input tensors with the same shape.")
+    .Output(0, "M", "*(type: Tensor`<Ord>`)* Output tensor with same dimensions as input(s)."
+    "Contains the maximum valued element at each location.")
    .InheritOnnxSchema("Max");

 OPERATOR_SCHEMA(Min)
@ -26,13 +83,63 @@ OPERATOR_SCHEMA(Min)
    .IdenticalTypeAndShapeOfInput(0)
    .AllowInplace({{0, 0}})
    .SetDoc(R"DOC(
-Element-wise min of each of the input tensors. The first input tensor can be
-used in-place as the output tensor, in which case the min will be done in
-place and results will be accumulated in input0. All inputs and outputs must
-have the same shape and data type.
+Element-wise min of an arbitrary number of input tensors. This operation can be performed in-place, by using the first input blob as the output blob. All inputs must have the same shape and data type, and the output will have the same shape as the inputs.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/minmax_ops.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Min",
+    ["X", "Y", "Z"],
+    ["X"],
+)
+
+workspace.FeedBlob("X", (np.random.rand(2,2)).astype(np.float32))
+workspace.FeedBlob("Y", (np.random.rand(2,2)).astype(np.float32))
+workspace.FeedBlob("Z", (np.random.rand(2,2)).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+print("Y:", workspace.FetchBlob("Y"))
+print("Z:", workspace.FetchBlob("Z"))
+workspace.RunOperatorOnce(op)
+print("Min:", workspace.FetchBlob("X"))
+
+```
+
+**Result**
+
+```
+
+X:
+[[0.32731926 0.4939747 ]
+ [0.29242373 0.43460014]]
+Y:
+[[0.40928316 0.916115  ]
+ [0.77526504 0.29339448]]
+Z:
+[[0.7899794  0.90335774]
+ [0.82599413 0.2843068 ]]
+Min:
+[[0.32731926 0.4939747 ]
+ [0.29242373 0.2843068 ]]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "data_0", "First of the input tensors. Can be inplace.")
-    .Output(0, "min", "Output tensor. Same dimension as inputs.")
+    .Input(0, "X, Y, ...", "*(type: Tensor`<Ord>`)* List of input tensors with the same shape.")
+    .Output(0, "M", "*(type: Tensor`<Ord>`)* Output tensor with same dimensions as input(s)."
+"Contains the minimum valued element at each location.")
    .InheritOnnxSchema("Min");

 template <typename T, class Context>
--- a/caffe2/operators/mod_op.cc
+++ b/caffe2/operators/mod_op.cc
@ -32,20 +32,68 @@ REGISTER_CPU_OPERATOR(Mod, ModOp<CPUContext>);
 OPERATOR_SCHEMA(Mod)
    .NumInputs(1)
    .NumOutputs(1)
-    .Arg("divisor", "The divisor of the modulo operation. Must >= 1")
+    .Arg("divisor", "*(type: int; default: 0)* Divisor of the modulo operation (must be >= 1).")
    .Arg(
        "sign_follow_divisor",
-        "The sign of output follows Dividend if set to `false`. \
-          Otherwise follows Divisor")
+        "*(type: bool; default: False)* If true, sign of output matches divisor, else if false, sign follows dividend.")
    .IdenticalTypeAndShape()
    .AllowInplace({{0, 0}})
    .SetDoc(R"DOC(
-Elementwise modulo operation. Each element in the output is the modulo result
-of the corresponding elment in the input data. The divisor of the modulo is
-provided by the operator argument `divisor`.
+Element-wise modulo operation. Each element in the output is the modulo result
+of the corresponding element in the input data. The divisor of the modulo is
+provided by the `divisor` argument.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/mod_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Mod",
+    ["X"],
+    ["Y"],
+    divisor=10
+)
+
+workspace.FeedBlob("X", (np.random.randint(100, size=(5,5))))
+print("X before running op:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("X after running op:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+
+X before running op:
+[[56 22 43 13 60]
+ [ 4 55 58 10 45]
+ [64 66  4  3 66]
+ [10 36 47 52 78]
+ [91  4 36 47 95]]
+X after running op:
+[[6 2 3 3 0]
+ [4 5 8 0 5]
+ [4 6 4 3 6]
+ [0 6 7 2 8]
+ [1 4 6 7 5]]
+
+ ```
+
+ </details>
+
 )DOC")
-    .Input(0, "data", "input int32 or int64 data")
-    .Output(0, "output", "output of data with modulo operation applied");
+    .Input(0, "X", "*(type: Tensor`<int>`)* Input tensor with int32 or int64 data.")
+    .Output(0, "Y", "*(type: Tensor`<int>`)* Output tensor of data with modulo operation applied.");

 SHOULD_NOT_DO_GRADIENT(ModOp);
 } // namespace
--- a/caffe2/operators/negative_op.cc
+++ b/caffe2/operators/negative_op.cc
@ -17,9 +17,48 @@ OPERATOR_SCHEMA(Negative)
    .IdenticalTypeAndShape()
    .SetDoc(R"DOC(
 Computes the element-wise negative of the input.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/negative_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Negative",
+    ["X"],
+    ["Y"]
+)
+
+workspace.FeedBlob("X", (np.random.rand(3,3).astype(np.float32)))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+```
+
+**Result**
+
+```
+X: [[0.83296907 0.61407167 0.32562155]
+ [0.59304523 0.03111175 0.29365504]
+ [0.09478621 0.5424558  0.73940724]]
+Y: [[-0.83296907 -0.61407167 -0.32562155]
+ [-0.59304523 -0.03111175 -0.29365504]
+ [-0.09478621 -0.5424558  -0.73940724]]
+```
+
+</details>
+
 )DOC")
-    .Input(0, "X", "1D input tensor")
-    .Output(0, "Y", "1D input tensor")
+    .Input(0, "X", "*(type: Tensor`<float>`)* 1D input tensor.")
+    .Output(0, "Y", "*(type: Tensor`<float>`)* 1D output tensor.")
    .InheritOnnxSchema("Neg");

 namespace {
--- a/caffe2/operators/pool_op.cc
+++ b/caffe2/operators/pool_op.cc
@ -728,19 +728,141 @@ bool PoolOp<T, Context, PoolType>::RunOnDeviceWithOrderNHWC() {
  return true;
 }
 const char* kAveragePoolDoc = R"DOC(
-consumes an input blob X and applies average pooling across the
-the blob according to kernel sizes, stride sizes, and pad lengths defined by the
-ConvPoolOpBase operator. Average pooling consisting of averaging all values of a
-subset of the input tensor according to the kernel size and downsampling the
-data into the output blob Y for further processing.
+consumes an input blob and applies average pooling across the the blob according
+to kernel sizes, stride sizes, pad lengths and dilation. Average pooling consists
+of taking the average value of a subset of the input tensor according to the kernel
+size and downsampling the data into the output blob for further processing. The
+`brew` module has a wrapper for this operator for use in a `ModelHelper` object.
+
+Pooling layers reduce the spatial dimensionality of the input blob. Each of the
+output blob's dimensions will reduce according to:
+
+$$dim_{out}=\frac{dim_{in}-kernel+2*pad}{stride}+1$$
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.h
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.cc
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/conv_pool_op_base.h
+
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "AveragePool",
+    ["X"],
+    ["Y"],
+    kernel=2,
+    stride=2,
+)
+
+workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) # NCHW
+print("X:\n", workspace.FetchBlob("X"), "\n")
+workspace.RunOperatorOnce(op)
+print("Y:\n", workspace.FetchBlob("Y"))
+```
+
+**Result**
+
+```
+X:
+ [[[[-0.2883434   0.43498734  0.05417408  1.912558    0.09390241
+    -0.33173105]
+   [ 1.633709    1.2047161   0.36964908  0.99961185  0.4184147
+     0.9989975 ]
+   [ 1.7644193   0.1789665   1.5812988  -0.6038542  -0.36090398
+     0.33195344]
+   [ 0.9457722  -0.95174325 -0.78124577  1.2062047   1.1903144
+     0.2586746 ]
+   [ 1.252104    0.32645547  1.8073524  -0.78397465  0.9978303
+    -0.97614396]
+   [ 0.5440196   1.5778259  -0.76750124  0.5051756   0.8838398
+    -0.37085298]]]]
+
+Y:
+ [[[[0.7462672  0.83399826 0.2948959 ]
+   [0.4843537  0.3506009  0.35500962]
+   [0.9251013  0.19026303 0.13366827]]]]
+```
+
+</details>
+
 )DOC";

 const char* kMaxPoolDoc = R"DOC(
-consumes an input blob X and applies max pooling across the
-the blob according to kernel sizes, stride sizes, and pad lengths defined by the
-ConvPoolOpBase operator. Max pooling consisting of taking the maximum value of a
-subset of the input tensor according to the kernel size and downsampling the
-data into the output blob Y for further processing.
+consumes an input blob and applies max pooling across the the blob according to
+kernel sizes, stride sizes, pad lengths and dilation. Max pooling consists of
+taking the maximum value of a subset of the input tensor according to the kernel
+size and downsampling the data into the output blob for further processing. The
+`brew` module has a wrapper for this operator for use in a `ModelHelper` object.
+
+Pooling layers reduce the spatial dimensionality of the input blob. Each of the
+output blob's dimensions will reduce according to:
+
+$$dim_{out}=\frac{dim_{in}-kernel+2*pad}{stride}+1$$
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.h
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.cc
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/conv_pool_op_base.h
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "MaxPool",
+    ["X"],
+    ["Y"],
+    kernel=2,
+    stride=2,
+)
+
+workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) # NCHW
+print("X:\n", workspace.FetchBlob("X"), "\n")
+workspace.RunOperatorOnce(op)
+print("Y:\n", workspace.FetchBlob("Y"))
+```
+
+**Result**
+
+```
+X:
+ [[[[-2.8534958e-01 -1.7719941e+00 -8.2277227e-04  1.1088650e+00
+    -2.1476576e+00 -3.5070452e-01]
+   [-9.0058845e-01 -3.0070004e-01 -1.7907504e+00 -7.1746534e-01
+     1.2798511e+00 -3.2214901e-01]
+   [ 1.5806322e+00  1.6845188e+00 -2.6633200e-01 -3.8576153e-01
+    -9.6424848e-02 -3.9696163e-01]
+   [ 1.2572408e-01  6.3612902e-01 -3.9554062e-01 -6.9735396e-01
+    -9.1898698e-01 -1.9609968e-01]
+   [-1.1587460e+00  2.4605224e+00 -1.5497679e+00  1.3020347e-01
+    -8.1293899e-01 -7.8803545e-01]
+   [ 1.4323474e+00  1.3618395e+00  9.8975077e-02 -1.1307785e-01
+     7.2035044e-01  2.7642491e-01]]]]
+
+Y:
+ [[[[-0.28534958  1.108865    1.2798511 ]
+   [ 1.6845188  -0.266332   -0.09642485]
+   [ 2.4605224   0.13020347  0.72035044]]]]
+
+```
+
+</details>
+
 )DOC";

 std::function<void(OpSchema&)> AveragePoolDocGenerator(const char* dim) {
@ -752,18 +874,18 @@ std::function<void(OpSchema&)> AveragePoolDocGenerator(const char* dim) {
    schema.Input(
        0,
        "X",
-        "Input data tensor from the previous operator; dimensions depend on "
-        "whether the NCHW or NHWC operators are being used. For example, in "
-        "the former, the input has size (N x C x H x W), where N is the batch "
-        "size, C is the number of channels, and H and W are the height and the "
-        "width of the data. The corresponding permutation of dimensions is "
-        "used in the latter case.");
+        "*(type: Tensor`<float>`)* Input data tensor of shape NCHW or NHWC.");
    schema.Output(
        0,
        "Y",
-        "Output data tensor from average pooling across the input "
-        "tensor. Dimensions will vary based on various kernel, stride, and pad "
-        "sizes.");
+        "*(type: Tensor`<float>`)* Output data tensor.");
+    /*
+    schema.Arg("kernel", "*(type: int)* Size of the window to take an average over.");
+    schema.Arg("stride", "*(type: int)* Stride of the window.");
+    schema.Arg("pad", "*(type: int)* Implicit zero padding to be added on both sides.");
+    schema.Arg("dilation", "*(type: int)* Parameter that controls the stride of elements in the window.");
+    schema.Arg("order", "*(type: string; default: 'NCHW')* Order of the blob dimensions.");
+    */
  };
 }

@ -776,18 +898,18 @@ std::function<void(OpSchema&)> MaxPoolDocGenerator(const char* dim) {
    schema.Input(
        0,
        "X",
-        "Input data tensor from the previous operator; dimensions depend on "
-        "whether the NCHW or NHWC operators are being used. For example, in "
-        "the former, the input has size (N x C x H x W), where N is the batch "
-        "size, C is the number of channels, and H and W are the height and the "
-        "width of the data. The corresponding permutation of dimensions is "
-        "used in the latter case.");
+        "*(type: Tensor`<float>`)* Input data tensor of shape NCHW or NHWC.");
    schema.Output(
        0,
        "Y",
-        "Output data tensor from max pooling across the input "
-        "tensor. Dimensions will vary based on various kernel, stride, and pad "
-        "sizes.");
+        "*(type: Tensor`<float>`)* Output data tensor.");
+    /*
+    schema.Arg("kernel", "*(type: int)* Size of the window to take an average over.");
+    schema.Arg("stride", "*(type: int)* Stride of the window.");
+    schema.Arg("pad", "*(type: int)* Implicit zero padding to be added on both sides.");
+    schema.Arg("dilation", "*(type: int)* Parameter that controls the stride of elements in the window.");
+    schema.Arg("order", "*(type: string; default: 'NCHW')* Order of the blob dimensions.");
+    */
  };
 }
 REGISTER_CPU_OPERATOR(
--- a/caffe2/operators/prepend_dim_op.h
+++ b/caffe2/operators/prepend_dim_op.h
@ -28,9 +28,7 @@ class PrependDimOp : public Operator<Context> {
    CAFFE_ENFORCE(
        input.dim(0) % dim_size_ == 0,
        "First dimension must be multiple of prepend_dim. Current first dimension: ",
-        input.dim(0),
-        ", prepend dim: ",
-        dim_size_);
+        input.dim(0));

    vector<int64_t> actual_new_shape(input.ndim() + 1);
    actual_new_shape[0] = dim_size_;
--- a/caffe2/operators/reshape_op.cc
+++ b/caffe2/operators/reshape_op.cc
@ -105,22 +105,76 @@ OPERATOR_SCHEMA(Reshape)
        })
    .AllowInplace({{0, 0}})
    .SetDoc(R"DOC(
-Reshape the input tensor similar to numpy.reshape.
+Reshape the input tensor similar to numpy's
+[reshape](https://docs.scipy.org/doc/numpy/reference/generated/numpy.reshape.html).

-It takes a tensor as input and an optional tensor specifying the new shape.
-When the second input is absent, an extra argument `shape` must be specified.
-It outputs the reshaped tensor as well as the original shape.
+Takes a tensor as input and an optional tensor specifying the new shape. When
+the second input is absent, an extra argument shape must be specified. Outputs
+the reshaped tensor as well as the original shape.

 At most one dimension of the new shape can be -1. In this case, the value is
 inferred from the size of the tensor and the remaining dimensions. A dimension
 could also be 0, in which case the actual dimension value is going to be copied
 from the input tensor.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reshape_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Reshape",
+    ["data"],
+    ["reshaped", "old_shape"],
+    shape=(3,2)
+)
+
+workspace.FeedBlob("data", (np.random.randint(100, size=(6))))
+print("data:", workspace.FetchBlob("data"))
+workspace.RunOperatorOnce(op)
+print("reshaped:", workspace.FetchBlob("reshaped"))
+print("old_shape:", workspace.FetchBlob("old_shape"))
+```
+
+**Result**
+
+```
+data: [86 60 85 96  7 37]
+reshaped: [[86 60]
+          [85 96]
+          [ 7 37]]
+old_shape: [6]
+```
+
+</details>
+
 )DOC")
-    .Arg("shape", "New shape")
-    .Input(0, "data", "An input tensor.")
-    .Input(1, "new_shape", "New shape.")
-    .Output(0, "reshaped", "Reshaped data.")
-    .Output(1, "old_shape", "Original shape.")
+    .Arg("shape", "*(type: Tuple(int))* New shape. Do not set if using "
+    "`new_shape` input.")
+    .Input(
+        0,
+        "data",
+        "*(type: Tensor)* Input tensor.")
+    .Input(
+        1,
+        "new_shape",
+        "*(type: Tensor`<int>`)* [OPTIONAL] Tensor containing new shape.")
+    .Output(
+        0,
+        "reshaped",
+        "*(type: Tensor)* Reshaped output tensor.")
+    .Output(
+        1,
+        "old_shape",
+        "*(type: Tensor`<int>`)* Tensor containing old shape of `data`.")
    .InheritOnnxSchema("Reshape");

 class GetReshapeGradient : public GradientMakerBase {
--- a/caffe2/operators/reshape_op.h
+++ b/caffe2/operators/reshape_op.h
@ -48,15 +48,13 @@ class ReshapeOp : public Operator<Context> {
      auto& shape = Input(1);
      CAFFE_ENFORCE(shape.ndim() == 1, "Shape should be 1-D");

-      if (shape.size()) {
-        const T* shape_data = shape.template data<T>();
+      const T* shape_data = shape.template data<T>();

-        // Bit awkward, but needed so works on both CPU and CUDA contexts
-        std::vector<T> tmpv(shape.size());
-        context_.template CopyBytes<Context, CPUContext>(
-            shape.size() * sizeof(T), shape_data, &tmpv[0]);
-        actual_new_shape.assign(tmpv.begin(), tmpv.begin() + shape.size());
-      }
+      // Bit awkward, but needed so works on both CPU and CUDA contexts
+      std::vector<T> tmpv(shape.size());
+      context_.template CopyBytes<Context, CPUContext>(
+          shape.size() * sizeof(T), shape_data, &tmpv[0]);
+      actual_new_shape.assign(tmpv.begin(), tmpv.begin() + shape.size());
    }

    // Copy over the dimensions for those that are specified zero.
--- a/caffe2/operators/sequence_ops.cc
+++ b/caffe2/operators/sequence_ops.cc
@ -285,60 +285,209 @@ OPERATOR_SCHEMA(AddPadding)
    .NumInputs(1, 4)
    .NumOutputs(1, 2)
    .SetDoc(R"DOC(
-Given a partitioned tensor T<N, D1..., Dn>, where the partitions are
-defined as ranges on its outer-most (slowest varying) dimension N,
-with given range lengths, return a tensor T<N + 2*padding_width, D1 ..., Dn>
-with paddings added to the start and end of each range.
-Optionally, different paddings can be provided for beginning and end. Paddings
-provided must be a tensor T<D1..., Dn>.
+Given a partitioned tensor $T<N, D_1, ..., D_n>$, where the partitions are
+defined as ranges on its outer-most (slowest varying) dimension $N$,
+return a tensor $T<(N + 2 * padding\_width), D_1, ..., D_n>$ with paddings
+added to the start and end of each range.
+
+Optionally, different paddings can be provided for beginning and end.
+Paddings provided must be a tensor $T<D_1, ..., D_n>$. If no padding is
+provided, add zero padding. If no lengths vector is provided, add padding
+only once, at the start and end of data.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sequence_ops.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "AddPadding",
+    ["X", "lengths"],
+    ["Y", "lengths_out"],
+    padding_width=1
+
+)
+
+workspace.FeedBlob("X", (np.random.rand(3,2,2).astype(np.float32)))
+workspace.FeedBlob("lengths", np.array([3]).astype(np.int32))
+
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+print("lengths_out:", workspace.FetchBlob("lengths_out"))
+```
+
+**Result**
+
+```
+X: [[[0.2531572  0.4588472 ]
+  [0.45140603 0.61161053]]
+
+ [[0.92500854 0.8045306 ]
+  [0.03356671 0.30233648]]
+
+ [[0.4660227  0.6287745 ]
+  [0.79372746 0.08609265]]]
+Y: [[[0.         0.        ]
+  [0.         0.        ]]
+
+ [[0.2531572  0.4588472 ]
+  [0.45140603 0.61161053]]
+
+ [[0.92500854 0.8045306 ]
+  [0.03356671 0.30233648]]
+
+ [[0.4660227  0.6287745 ]
+  [0.79372746 0.08609265]]
+
+ [[0.         0.        ]
+  [0.         0.        ]]]
+lengths_out: [5]
+```
+
+</details>

-If no padding is provided, add zero padding.
-If no lengths vector is provided, add padding only once,
-at the start and end of data.
 )DOC")
    .Arg(
        "padding_width",
-        "Number of copies of padding to add around each range.")
+        "*(type: int)* Number of copies of padding to add around each range.")
    .Arg(
        "end_padding_width",
-        "(Optional) Specifies a different end-padding width.")
-    .Input(0, "data_in", "(T<N, D1..., Dn>) Input data")
+        "*(type: int)* [OPTIONAL] Specifies a different end-padding width. If "
+        "this is not set, will use same as `padding_width`.")
+    .Input(
+        0,
+        "data_in",
+        "*(type: Tensor)* Input data ($T<N, D_1, ..., D_n>$).")
    .Input(
        1,
        "lengths",
-        "(i64) Num of elements in each range. sum(lengths) = N.")
-    .Input(2, "start_padding", "T<D1..., Dn> Padding data for range start.")
+        "*(type: Tensor`<int>`)* Number of elements in each range. "
+        "sum(lengths) = N.")
+    .Input(
+        2,
+        "start_padding",
+        "*(type: Tensor`<int>`)* [OPTIONAL] Padding data for range start "
+        "($T<D_1, ..., D_n>$).")
    .Input(
        3,
        "end_padding",
-        "T<D1..., Dn> (optional) Padding for range end. "
-        "If not provided, start_padding is used as end_padding as well.")
-    .Output(0, "data_out", "(T<N + 2*padding_width, D1..., Dn>) Padded data.")
-    .Output(1, "lengths_out", "(i64, optional) Lengths for each padded range.");
+        "*(type: Tensor`<int>`)* [OPTIONAL] Padding for range end. If not "
+        "provided, `start_padding` is used ($T<D_1, ..., D_n>$).")
+    .Output(
+        0,
+        "data_out",
+        "*(type: Tensor)* Padded data tensor ($T<N + 2*padding\_width, "
+        "D_1, ..., D_n>$).")
+    .Output(
+        1,
+        "lengths_out",
+        "*(type: Tensor`<int>`)* [OPTIONAL] Lengths for each padded range.");

 OPERATOR_SCHEMA(RemovePadding)
    .NumInputs(1, 2)
    .NumOutputs(1, 2)
    .SetDoc(R"DOC(
-Remove padding around the edges of each segment of the input data. This is
-the reverse opration of AddPadding, and uses the same arguments and conventions
+Remove padding around the edges of each segment of the input data. This is the
+reverse operation of **AddPadding**, and uses the same arguments and conventions
 for input and output data format.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sequence_ops.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+addpad_op = core.CreateOperator(
+    "AddPadding",
+    ["X", "lengths_add"],
+    ["Y", "lengths_out_add"],
+    padding_width=1
+)
+
+rmpad_op = core.CreateOperator(
+    "RemovePadding",
+    ["Y", "lengths_rm"],
+    ["Z", "lengths_out_rm"],
+    padding_width=1
+)
+
+workspace.FeedBlob("X", (np.random.randint(20, size=(3,5))))
+workspace.FeedBlob("lengths_add", np.array([3]).astype(np.int32))
+workspace.FeedBlob("lengths_rm", np.array([5]).astype(np.int32))
+
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(addpad_op)
+print("Y:", workspace.FetchBlob("Y"))
+print("lengths_out_add:", workspace.FetchBlob("lengths_out_add"))
+
+workspace.RunOperatorOnce(rmpad_op)
+print("Z:", workspace.FetchBlob("Z"))
+print("lengths_out_rm:", workspace.FetchBlob("lengths_out_rm"))
+```
+
+**Result**
+
+```
+X: [[17 19  1  9  1]
+ [19  3  5 19  1]
+ [16  0  0  0  4]]
+Y: [[ 0  0  0  0  0]
+ [17 19  1  9  1]
+ [19  3  5 19  1]
+ [16  0  0  0  4]
+ [ 0  0  0  0  0]]
+lengths_out_add: [5]
+Z: [[17 19  1  9  1]
+ [19  3  5 19  1]
+ [16  0  0  0  4]]
+lengths_out_rm: [3]
+```
+
+</details>
+
 )DOC")
-    .Arg("padding_width", "Outer-size of padding to remove around each range.")
+    .Arg(
+        "padding_width",
+        "*(type: int)* Outer-size of padding to remove around each range.")
    .Arg(
        "end_padding_width",
-        "(Optional) Specifies a different end-padding width.")
-    .Input(0, "data_in", "T<N, D1..., Dn> Input data")
+        "*(type: int)* [OPTIONAL] Specifies a different end-padding width. "
+        "If this is not set, will use same as `padding_width`.")
+    .Input(
+        0,
+        "data_in",
+        "Input tensor ($T<N, D_1, ..., D_n>$).")
    .Input(
        1,
        "lengths",
-        "(i64) Num of elements in each range. sum(lengths) = N. "
-        "If not provided, considers all data as a single segment.")
-    .Output(0, "data_out", "(T<N - 2*padding_width, D1..., Dn>) Unpadded data.")
+        "*(type: Tensor`<int>`)* Number of elements in each range. "
+        "sum(lengths) = N. If not provided, considers all data as a single "
+        "segment.")
+    .Output(
+        0,
+        "data_out",
+        "*(type: Tensor)* Padded data tensor "
+        "($T<N + 2*padding\_width, D_1, ..., D_n>$).")
    .Output(
        1,
        "lengths_out",
-        "(i64, optional) Lengths for each unpadded range.");
+        "*(type: Tensor`<int>`)* [OPTIONAL] Lengths for each padded range.");

 OPERATOR_SCHEMA(GatherPadding)
    .NumInputs(2)
--- a/caffe2/operators/shape_op.cc
+++ b/caffe2/operators/shape_op.cc
@ -9,9 +9,9 @@ OPERATOR_SCHEMA(Shape)
    .NumOutputs(1)
    .Arg(
        "axes",
-        "(int[]) array of interested axes."
-        "If given, this operators only returns the dimension of given axes."
-        "Otherwise, the operator returns full dimension.")
+        "*(type: int[])* Array of interested axes."
+        "If given, this operator only returns the dimensions of the given axes."
+        "Otherwise, the operator returns the dimensions of all axes.")
    .TensorInferenceFunction([](const OperatorDef& def,
                                const vector<TensorShape>& in) {
      ArgumentHelper args(def);
@ -26,9 +26,52 @@ OPERATOR_SCHEMA(Shape)
      return out;
    })
    .SetDoc(R"DOC(
-        Produce a 1D int64 tensor with the shape of the input tensor.
-        If called with an optional argument \"axes\", the result will only
-        contain the dimension of specified axes in particular order.)DOC");
+Produce a 1D int64 tensor with the shape of the input tensor.
+If called with an optional argument `axes`, the result will only
+contain the dimensions of specified axes.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/shape_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Shape",
+    ["X"],
+    ["shape"],
+)
+
+workspace.FeedBlob("X", (np.random.randint(10, size=(2,3))))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("shape:", workspace.FetchBlob("shape"))
+
+```
+
+**Result**
+
+```
+
+X:
+[[3 2 5]
+ [5 7 3]]
+shape: [2 3]
+
+```
+
+</details>
+
+      )DOC")
+    .Input(0,"X", "*(type: Tensor)* Input tensor.")
+    .Output(0,"shape", "*(type: Tensor)* Output tensor containing shape of input tensor.");

 SHOULD_NOT_DO_GRADIENT(Shape);

--- a/caffe2/operators/sigmoid_op.cc
+++ b/caffe2/operators/sigmoid_op.cc
@ -25,12 +25,55 @@ OPERATOR_SCHEMA(Sigmoid)
    .AllowInplace({{0, 0}})
    .IdenticalTypeAndShape()
    .SetDoc(R"DOC(
-Sigmoid takes one input data (Tensor<T>) and produces one output data
-(Tensor<T>) where the sigmoid function, y = 1 / (1 + exp(-x)), is applied to the
-tensor elementwise.
+Apply the Sigmoid function element-wise to the input tensor. This is often used
+as a non-linear activation function in a neural network. The sigmoid function is
+defined as:
+
+$$Sigmoid(x) = \frac{1}{1+\exp(-x)}$$
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sigmoid_op.cc
+
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Sigmoid",
+    ["X"],
+    ["Y"]
+)
+
+workspace.FeedBlob("X", np.random.randn(5).astype(np.float32))
+print("input:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("sigmoid:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+
+input: [ 1.5744036   0.31632107  1.7842269   1.4450722  -2.1726978 ]
+sigmoid: [0.8284105  0.57842743 0.85621804 0.80923885 0.10222916]
+
+```
+
+</details>
+
+
 )DOC")
-    .Input(0, "X", "1D input tensor")
-    .Output(0, "Y", "1D output tensor")
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input tensor.")
+    .Output(0, "Y", "*(type: Tensor`<float>`)* Output tensor.")
    .InheritOnnxSchema("Sigmoid");
 // Input: Y, dY, output: dX
 OPERATOR_SCHEMA(SigmoidGradient)
--- a/caffe2/operators/sin_op.cc
+++ b/caffe2/operators/sin_op.cc
@ -38,9 +38,52 @@ OPERATOR_SCHEMA(Sin)
    .IdenticalTypeAndShape()
    .SetDoc(R"DOC(
 Calculates the sine of the given input tensor, element-wise.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sin_op.cc
+
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Sin",
+    ["X"],
+    ["Y"]
+)
+
+workspace.FeedBlob("X", np.random.rand(5).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+
+X: [0.8466114  0.1803606  0.5601509  0.04959291 0.64770824]
+Y: [0.74903965 0.17938434 0.5313141  0.04957259 0.60336035]
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "input", "Input tensor")
-    .Output(0, "output", "The sine of the input tensor computed element-wise");
+.Input(0, "X", "*(type: Tensor`<float>`)* Input tensor.")
+.Output(
+    0,
+    "Y",
+    "*(type: Tensor`<float>`)* Output tensor calculated as the sine of the input tensor, element-wise.");

 OPERATOR_SCHEMA(SinGradient).NumInputs(2).NumOutputs(1).IdenticalTypeAndShape();

--- a/caffe2/operators/softmax_op.cc
+++ b/caffe2/operators/softmax_op.cc
@ -83,31 +83,75 @@ OPERATOR_SCHEMA(Softmax)
  .NumOutputs(1)
  .IdenticalTypeAndShape()
  .SetDoc(R"DOC(
-The operator computes the softmax normalized values for each layer in the batch
- of the given input. The input is a 2-D tensor (Tensor<float>) of size
-(batch_size x input_feature_dimensions). The output tensor has the same shape
-and contains the softmax normalized values of the corresponding input.

-X does not need to explicitly be a 2D vector; rather, it will be
-coerced into one. For an arbitrary n-dimensional tensor
-X \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is
-the axis provided, then X will be coerced into a 2-dimensional tensor with
-dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1}]. For the default
-case where axis=1, this means the X tensor will be coerced into a 2D tensor
-of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
-In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.
-Each of these dimensions must be matched correctly, or else the operator
-will throw errors.
+Applies the Softmax function to an n-dimensional input Tensor rescaling them so 
+that the elements of the n-dimensional output Tensor lie in the range (0,1) and 
+sum to 1. The softmax operator is typically the last layer in a classifier network,
+as its output can be interpreted as confidence probabilities of an input belonging
+to each class. The input is a 2-D tensor (Tensor) of size (batch_size x 
+input_feature_dimensions). The output tensor has the same shape and contains the 
+softmax normalized values of the corresponding input. The softmax function is 
+defined as follows:
+
+$$softmax(x_i) = \frac{\exp(x_i)}{\sum_{j} \exp(x_j)}$$
+
+The input does not need to explicitly be a 2D vector; rather, it will be coerced
+into one. For an arbitrary n-dimensional tensor `X` in 
+$[a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}]$, where k is the `axis` provided, 
+then `X` will be coerced into a 2-dimensional tensor with dimensions 
+$[(a_0 * ... * a_{k-1}), (a_k * ... * a_{n-1})]$. For the default case where
+`axis`=1, the `X` tensor will be coerced into a 2D tensor of dimensions 
+$[a_0, (a_1 * ... * a_{n-1})]$, where $a_0$ is often the batch size. In this
+situation, we must have $a_0 = N$ and $a_1 * ... * a_{n-1} = D$. Each of these 
+dimensions must be matched correctly, or else the operator will throw errors.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/softmax_op.h
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/softmax_op.cc
+
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Softmax",
+    ["X"],
+    ["Y"]
+)
+
+workspace.FeedBlob("X", np.random.randn(1, 5).astype(np.float32))
+print("input:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("softmax:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+input: [[ 0.0417839   0.61960053 -0.23150268 -0.64389366 -3.0000346 ]]
+softmax: [[0.24422921 0.43525138 0.18582782 0.12303016 0.01166145]]
+
+```
+
+</details>
+
+
+
 )DOC")
  .Arg("axis",
-       "(int) default to 1; describes the axis of the inputs when coerced "
-       "to 2D; defaults to one because the 0th axis most likely describes "
-       "the batch_size")
-  .Input(0, "input",
-         "The input tensor that's coerced into a 2D matrix of size (NxD) "
-         "as described above.")
-  .Output(0, "output", "The softmax normalized output values with the same "
-          "shape as input tensor.")
+       "*(type: int; default: 1)* Axis of the inputs when coerced to 2D matrix.")
+  .Input(0, "X",
+         "*(type: Tensor`<float>`)* Input tensor that's coerced into a 2D matrix of size (NxD) as described above.")
+  .Output(0, "Y",
+	 "*(type: Tensor`<float>`)* The softmax normalized output tensor with the same shape as input tensor.")
  .InheritOnnxSchema("Softmax");

 // Input: Y, dY. Output: dX
--- a/caffe2/operators/softmax_with_loss_op.cc
+++ b/caffe2/operators/softmax_with_loss_op.cc
@ -35,26 +35,115 @@ OPERATOR_SCHEMA(SoftmaxWithLoss)
          return out;
        })
    .SetDoc(R"DOC(
-Combined Softmax and Cross-Entropy loss operator.
-The operator computes the softmax normalized values for each layer in the batch
-of the given input, after which cross-entropy loss is computed. This operator is
-numerically more stable than separate Softmax and CrossEntropy ops.
-The inputs are a 2-D tensor (Tensor<float>) of size
-(batch_size x input_feature_dimensions) and tensor of labels (ground truth).
-Output is tensor with the probability for each label for each example (N x D)
-and averaged loss (scalar).
-Use parameter label_prob=1 to enable inputting labels as a probability
-distribution.
-Optional third input blob can be used to weight the samples for the loss.
+Combined Softmax and Cross-Entropy loss operator. The operator first computes the softmax normalized values for each layer in the batch of the given input, then computes cross-entropy loss. This operator is numerically more stable than separate `Softmax` and `CrossEntropy` ops. The inputs are a 2-D tensor `logits` of size (batch_size x input_feature_dimensions), which represents the unscaled log probabilities, and a 1-dimensional integer `labels` tensor for ground truth. An optional third input blob (`weight_tensor`) can be used to weight the samples for the loss, which is useful if the training set is unbalanced. This operator outputs a `softmax` tensor which contains the probability for each label for each example (same shape is `logits` input), and a scalar `loss` value, which is the averaged cross-entropy loss between the softmax probabilities and the ground truth values. Use parameter `label_prob`=1 to enable inputting labels as a probability distribution.
+
+Softmax cross-entropy loss function:
+
+$$loss(x, class) = -\log{\biggl(\frac{\exp(x[class])}{\sum_{j} \exp(x[j])}\biggr)} = -x[class] + \log{\biggl(\sum_{j} \exp(x[j])\biggr)}$$
+
+or if the `weight_tensor` has been passed:
+
+$$loss(x, class) = weight[class]\biggl(-x[class] + \log{\biggl(\sum_{j} \exp(x[j])\biggr)}\biggr)$$
+
+The `logits` input does not need to explicitly be a 2D vector; rather, it will be coerced into one. For an arbitrary n-dimensional tensor `X` in $[a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}]$, where k is the `axis` provided, then `X` will be coerced into a 2-dimensional tensor with dimensions $[(a_0 * ... * a_{k-1}), (a_k * ... * a_{n-1})]$. For the default case where `axis`=1, the `X` tensor will be coerced into a 2D tensor of dimensions $[a_0, (a_1 * ... * a_{n-1})]$, where $a_0$ is often the batch size. In this situation, we must have $a_0 = N$ and $a_1 * ... * a_{n-1} = D$. Each of these dimensions must be matched correctly, or else the operator will throw errors.
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/softmax_with_loss_op.cc
+
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "SoftmaxWithLoss",
+    ["logits", "labels"],
+    ["softmax", "avgloss"]
+)
+
+workspace.FeedBlob("logits", np.random.randn(1, 5).astype(np.float32))
+workspace.FeedBlob("labels", np.asarray([4]).astype(np.int32))
+print("logits:", workspace.FetchBlob("logits"))
+print("labels:", workspace.FetchBlob("labels"))
+workspace.RunOperatorOnce(op)
+print("softmax:", workspace.FetchBlob("softmax"))
+print("avgloss:", workspace.FetchBlob("avgloss"))
+
+```
+
+**Result**
+
+```
+
+logits: [[-0.3429451  -0.80375195  0.23104447  1.4569176  -0.5268362 ]]
+labels: [4]
+softmax: [[0.09721052 0.0613179  0.17258129 0.58800864 0.0808817 ]]
+avgloss: 2.5147676
+
+```
+
+</details>
+
+<details>
+
+<summary> <b>Example 2</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "SoftmaxWithLoss",
+    ["logits", "labels"],
+    ["softmax", "avgloss"],
+    scale=5.0
+)
+
+workspace.FeedBlob("logits", np.asarray([[.1, .4, .7, 1.5, .2]]).astype(np.float32))
+workspace.FeedBlob("labels", np.asarray([4]).astype(np.int32))
+print("logits:", workspace.FetchBlob("logits"))
+print("labels:", workspace.FetchBlob("labels"))
+workspace.RunOperatorOnce(op)
+print("softmax:", workspace.FetchBlob("softmax"))
+print("avgloss:", workspace.FetchBlob("avgloss"))
+
+```
+
+**Result**
+
+```
+
+logits: [[0.1 0.4 0.7 1.5 0.2]]
+labels: [4]
+softmax: [[0.10715417 0.144643   0.19524762 0.4345316  0.11842369]]
+avgloss: 10.667433
+
+```
+
+</details>
+
 )DOC")
-    .Input(0, "logits", "Unscaled log probabilities")
-    .Input(1, "labels", "Ground truth")
+    .Arg("label_prob","*(type: int; default: 0)* Setting to 1 enables inputting labels as probability distribution.")
+    .Arg("axis","*(type: int; default: 1)* Axis of the inputs when coerced to 2D.")
+    .Arg("scale","*(type: float)* Average loss output scaling factor (must be >= 0).")
+    .Arg("order","*(type: string; default: 'NCHW')* Order of blob dimensions (only 'NCHW' is supported currently).")
+    .Input(0, "logits", "*(type: Tensor`<float>`)* Input tensor.")
+    .Input(1, "labels", "*(type: Tensor`<float>`)* Ground truth label tensor.")
    .Input(
        2,
        "weight_tensor",
-        "Optional blob to be used to weight the samples for the loss.")
-    .Output(0, "softmax", "Tensor with softmax cross entropy loss")
-    .Output(1, "loss", "Average loss");
+        "*(type: Tensor`<float>`)* [OPTIONAL] Blob used to weight the samples for the loss.")
+    .Output(0, "softmax", "*(type: Tensor`<float>`)* Softmax output tensor.")
+    .Output(1, "loss", "*(type: float)* Averaged cross-entropy loss output.");

 // Input: X, T, P, dY; Output: dX
 OPERATOR_SCHEMA(SoftmaxWithLossGradient).NumOutputs(1);
--- a/caffe2/operators/sqr_op.cc
+++ b/caffe2/operators/sqr_op.cc
@ -14,9 +14,55 @@ OPERATOR_SCHEMA(Sqr)
    .NumOutputs(1)
    .AllowInplace({{0, 0}})
    .IdenticalTypeAndShape()
-    .SetDoc("Square (x^2) the elements of the input")
-    .Input(0, "input", "Input tensor")
-    .Output(0, "output", "Squared elements of the input");
+    .SetDoc(R"DOC(
+Performs element-wise squaring ($x^2$) of input tensor.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sqr_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Sqr",
+    ["X"],
+    ["Y"],
+)
+
+workspace.FeedBlob("X", (np.random.randint(10, size=(3,3))).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+
+X:
+[[4. 6. 2.]
+ [0. 1. 6.]
+ [9. 2. 7.]]
+Y:
+[[16. 36.  4.]
+ [ 0.  1. 36.]
+ [81.  4. 49.]]
+
+```
+
+</details>
+
+    )DOC")
+    .Input(0, "X", "*(type: Tensor`<float>`)* Input data tensor.")
+    .Output(0, "Y", "*(type: Tensor`<float>`)* Output tensor.");

 namespace {

--- a/caffe2/operators/sqrt_op.cc
+++ b/caffe2/operators/sqrt_op.cc
@ -19,10 +19,53 @@ OPERATOR_SCHEMA(Sqrt)
    .AllowInplace({{0, 0}})
    .IdenticalTypeAndShape()
    .SetDoc(R"DOC(
-Computes the element-wise sqrt of the input.
+Performs element-wise square-root ($\sqrt{x}$) of input tensor $X$.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sqrt_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Sqrt",
+    ["X"],
+    ["Y"],
+)
+
+workspace.FeedBlob("X", (np.random.randint(10, size=(3,3))).astype(np.float32))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("Y:", workspace.FetchBlob("Y"))
+
+```
+
+**Result**
+
+```
+
+X:
+[[8. 3. 3.]
+ [4. 0. 0.]
+ [1. 2. 5.]]
+Y:
+[[2.8284268  1.7320508  1.7320508 ]
+ [1.9999999  0.         0.        ]
+ [0.99999994 1.4142134  2.236068  ]]
+
+```
+
+</details>
 )DOC")
-    .Input(0, "X", "ND input tensor")
-    .Output(0, "Y", "ND input tensor");
+.Input(0, "X", "*(type: Tensor`<float>`)* Input data tensor.")
+.Output(0, "Y", "*(type: Tensor`<float>`)* Output tensor.");

 namespace {

--- a/caffe2/operators/transpose_op.cc
+++ b/caffe2/operators/transpose_op.cc
@ -49,16 +49,57 @@ OPERATOR_SCHEMA(Transpose)
      return out;
    })
    .SetDoc(R"DOC(
-Transpose the input tensor similar to numpy.transpose. For example, when
-axes=(1, 0, 2), given an input tensor of shape (1, 2, 3), the output shape
-will be (2, 1, 3).
+Transpose the input tensor by permuting the axes of the input according
+to the `axes` argument. Similar to numpy's
+[transpose](https://docs.scipy.org/doc/numpy/reference/generated/numpy.transpose.html)
+function.
+
+For example, when axes=(1, 0, 2), given an input tensor of shape
+(1, 2, 3), the output shape will be (2, 1, 3).
+
+Github Links:
+
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/transpose_op.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Transpose",
+    ["X"],
+    ["Y"],
+    axes=(0,3,1,2)
+)
+
+x = np.random.rand(1,32,32,3)
+workspace.FeedBlob("X", x)
+print("X.shape (NHWC order):", workspace.FetchBlob("X").shape)
+workspace.RunOperatorOnce(op)
+print("Y.shape (NCHW order):", workspace.FetchBlob("Y").shape)
+```
+
+**Result**
+
+```
+X.shape (NHWC order): (1, 32, 32, 3)
+Y.shape (NCHW order): (1, 3, 32, 32)
+```
+
+</details>
+
 )DOC")
    .Arg(
        "axes",
-        "A list of integers. By default, reverse the dimensions, "
-        "otherwise permute the axes according to the values given.")
-    .Input(0, "data", "An input tensor.")
-    .Output(0, "transposed", "Transposed output.")
+        "*(type: Tuple(int))* Order to permute axes of input tensor. Reverses "
+        "the dimensions by default.")
+    .Input(0, "X", "*(type: Tensor)* Input tensor.")
+    .Output(0, "Y", "*(type: Tensor)* Transposed output.")
    .InheritOnnxSchema("Transpose");

 class GetTransposeGradient : public GradientMakerBase {
--- a/caffe2/operators/utility_ops.cc
+++ b/caffe2/operators/utility_ops.cc
@ -1202,15 +1202,71 @@ OPERATOR_SCHEMA(NanCheck)
 OPERATOR_SCHEMA(Size)
    .NumInputs(1)
    .NumOutputs(1)
-    .SetDoc(
-        "Return a 1D tensor of type int64 that contains the number "
-        "of elements of the input tensor")
-    .Input(0, "tensor", "Tensor to calculate number of elements")
+    .SetDoc(R"DOC(
+Return a 1D tensor of type *int64* that contains the number of elements of the input tensor.
+
+Github Link:
+- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/utility_ops.cc
+
+<details>
+
+<summary> <b>Example</b> </summary>
+
+**Code**
+
+```
+
+workspace.ResetWorkspace()
+
+op = core.CreateOperator(
+    "Size",
+    ["X"],
+    ["size"],
+)
+
+workspace.FeedBlob("X", (np.random.randint(10, size=(3,3))))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("size:", workspace.FetchBlob("size"))
+
+workspace.ResetWorkspace()
+
+workspace.FeedBlob("X", (np.random.rand(6,4)))
+print("X:", workspace.FetchBlob("X"))
+workspace.RunOperatorOnce(op)
+print("size:", workspace.FetchBlob("size"))
+
+```
+
+**Result**
+
+```
+
+X:
+[[3 7 0]
+ [0 1 6]
+ [5 0 8]]
+size: 9
+X:
+[[0.92017884 0.32115368 0.68692035 0.64135016]
+ [0.8723328  0.77830265 0.80688656 0.25524236]
+ [0.37970216 0.76407047 0.85689564 0.30692883]
+ [0.69352573 0.42531502 0.16415212 0.59209324]
+ [0.52684188 0.37094846 0.60670079 0.6489272 ]
+ [0.94715906 0.34800557 0.61898769 0.28947359]]
+size: 24
+
+```
+
+</details>
+
+      )DOC")
+    .Input(0, "X", "*(type: Tensor)* Input tensor to calculate number of elements.")
    .Output(
        0,
-        "output",
-        "1D tensor of type int64 that contains the number of "
-        "elements in the input tensor.");
+        "size",
+        "*(type: Tensor)* 1D tensor of type int64 that contains the number of "
+        "elements in the input tensor *X*.");

 REGISTER_CPU_OPERATOR(Size, SizeOp<CPUContext>);
 NO_GRADIENT(Size);