diff --git a/.github/scripts/github_utils.py b/.github/scripts/github_utils.py
index 3a42298cdf3..0fc60cb31e2 100644
--- a/.github/scripts/github_utils.py
+++ b/.github/scripts/github_utils.py
@@ -128,7 +128,7 @@ def gh_fetch_json_dict(
 
 def gh_graphql(query: str, **kwargs: Any) -> dict[str, Any]:
     rc = gh_fetch_url(
-        "https://api.github.com/graphql",
+        "https://api.github.com/graphql",  # @lint-ignore
         data={"query": query, "variables": kwargs},
         reader=json.load,
     )
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e9e23c0fb72..bfff2e0ce1b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,7 +64,7 @@ endif()
 
 # This define is needed to preserve behavior given anticpated changes to
 # cccl/thrust
-# https://nvidia.github.io/libcudacxx/standard_api/numerics_library/complex.html
+# https://nvidia.github.io/cccl/libcudacxx/standard_api/numerics_library/complex.html
 string(APPEND CMAKE_CUDA_FLAGS
        " -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS")
 
diff --git a/README.md b/README.md
index 58327c176f3..47841da7368 100644
--- a/README.md
+++ b/README.md
@@ -194,7 +194,7 @@ If you want to compile with CUDA support, [select a supported version of CUDA fr
 - [NVIDIA cuDNN](https://developer.nvidia.com/cudnn) v8.5 or above
 - [Compiler](https://gist.github.com/ax3l/9489132) compatible with CUDA
 
-Note: You could refer to the [cuDNN Support Matrix](https://docs.nvidia.com/deeplearning/cudnn/reference/support-matrix.html) for cuDNN versions with the various supported CUDA, CUDA driver and NVIDIA hardware
+Note: You could refer to the [cuDNN Support Matrix](https://docs.nvidia.com/deeplearning/cudnn/backend/latest/reference/support-matrix.html) for cuDNN versions with the various supported CUDA, CUDA driver and NVIDIA hardware
 
 If you want to disable CUDA support, export the environment variable `USE_CUDA=0`.
 Other potentially useful environment variables may be found in `setup.py`.
diff --git a/RELEASE.md b/RELEASE.md
index 005a154d54b..caeab37b57d 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -221,7 +221,7 @@ Release candidates are currently stored in the following places:
 
 * Wheels: https://download.pytorch.org/whl/test/
 * Conda: https://anaconda.org/pytorch-test
-* Libtorch: https://download.pytorch.org/libtorch/test
+* Libtorch: https://download.pytorch.org/libtorch/test <!-- @lint-ignore -->
 
 Backups are stored in a non-public S3 bucket at [`s3://pytorch-backup`](https://s3.console.aws.amazon.com/s3/buckets/pytorch-backup?region=us-east-1&tab=objects)
 
@@ -322,7 +322,7 @@ Promotion should occur in two steps:
 * Promote S3 artifacts (wheels, libtorch) and Conda packages
 * Promote S3 wheels to PyPI
 
-**NOTE**: The promotion of wheels to PyPI can only be done once so take caution when attempting to promote wheels to PyPI, (see https://github.com/pypa/warehouse/issues/726 for a discussion on potential draft releases within PyPI)
+**NOTE**: The promotion of wheels to PyPI can only be done once so take caution when attempting to promote wheels to PyPI, (see https://github.com/pypi/warehouse/issues/726 for a discussion on potential draft releases within PyPI)
 
 ## Additional Steps to prepare for release day
 
diff --git a/aten/src/ATen/core/boxing/KernelFunction.cpp b/aten/src/ATen/core/boxing/KernelFunction.cpp
index b13f827b8f1..c099c456814 100644
--- a/aten/src/ATen/core/boxing/KernelFunction.cpp
+++ b/aten/src/ATen/core/boxing/KernelFunction.cpp
@@ -28,7 +28,7 @@ void ambiguous_autogradother_kernel(OperatorKernel*, const OperatorHandle& op, D
     "Autograd dispatch key for the backend.\n",
     "If you only want to run inference instead of training, in C++, add `c10::InferenceMode mode;` "
     "before model.forward(); in Python, use `torch.inference_mode()` as a context manager (see "
-    "https://pytorch.org/docs/stable/generated/torch.inference_mode.html).",
+    "https://pytorch.org/docs/stable/generated/torch.autograd.grad_mode.inference_mode.html).",
     "\nCanonical state\n~~~~~~~~~~~\n", op.dumpState(), "\n\n");
 }
 
diff --git a/aten/src/ATen/cuda/Atomic.cuh b/aten/src/ATen/cuda/Atomic.cuh
index 4106ab6f730..f16be30f8b7 100644
--- a/aten/src/ATen/cuda/Atomic.cuh
+++ b/aten/src/ATen/cuda/Atomic.cuh
@@ -410,7 +410,7 @@ template <typename T>
 __host__ __device__ T safe_max(T a, T b) {
   #if defined(__HIPCC__)
   // TODO: remove this special case for HIP when issue is fixed:
-  //       https://github.com/ROCm-Developer-Tools/HIP/issues/2209
+  //       https://github.com/ROCm/hip/issues/2209
     T max = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::max<T>(a, b));
   #else
     T max = at::_isnan(b) ? b : std::max<T>(a, b);
@@ -470,7 +470,7 @@ template <typename T>
 __host__ __device__ T safe_min(T a, T b) {
   #if defined(__HIPCC__)
   // TODO: remove this special case for HIP when issue is fixed:
-  //       https://github.com/ROCm-Developer-Tools/HIP/issues/2209
+  //       https://github.com/ROCm/hip/issues/2209
     T min = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::min<T>(a, b));
   #else
     T min = at::_isnan(b) ? b : std::min<T>(a, b);
diff --git a/aten/src/ATen/native/Math.h b/aten/src/ATen/native/Math.h
index 47c0a2be030..b6d44fca590 100644
--- a/aten/src/ATen/native/Math.h
+++ b/aten/src/ATen/native/Math.h
@@ -1680,7 +1680,7 @@ inline C10_HOST_DEVICE T calc_ndtri(T y0) {
   return x;
 }
 
-/* The next function is taken from http://ab-initio.mit.edu/Faddeev */
+/* The next function is taken from http://ab-initio.mit.edu/faddeeva */
 
 /* Copyright (c) 2012 Massachusetts Institute of Technology
  *
diff --git a/aten/src/ATen/native/SharedReduceOps.h b/aten/src/ATen/native/SharedReduceOps.h
index edaa106fc83..1de72abd588 100644
--- a/aten/src/ATen/native/SharedReduceOps.h
+++ b/aten/src/ATen/native/SharedReduceOps.h
@@ -26,7 +26,7 @@ template <typename scalar_t>
 inline C10_DEVICE scalar_t max_propagate_nan(scalar_t a, scalar_t b) {
 #if defined(__HIPCC__)
   // TODO: remove this special case for HIP when issue is fixed:
-  //       https://github.com/ROCm-Developer-Tools/HIP/issues/2209
+  //       https://github.com/ROCm/hip/issues/2209
   scalar_t max = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::max(a, b));
 #else
   scalar_t max = at::_isnan(b) ? b : std::max(a, b);
@@ -37,7 +37,7 @@ template <typename scalar_t>
 inline C10_DEVICE scalar_t min_propagate_nan(scalar_t a, scalar_t b) {
 #if defined(__HIPCC__)
   // TODO: remove this special case for HIP when issue is fixed:
-  //       https://github.com/ROCm-Developer-Tools/HIP/issues/2209
+  //       https://github.com/ROCm/hip/issues/2209
   scalar_t min = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::min(a, b));
 #else
   scalar_t min = at::_isnan(b) ? b : std::min(a, b);
diff --git a/aten/src/ATen/native/cuda/AmpKernels.cu b/aten/src/ATen/native/cuda/AmpKernels.cu
index 8c161ca6272..2be6e47df3c 100644
--- a/aten/src/ATen/native/cuda/AmpKernels.cu
+++ b/aten/src/ATen/native/cuda/AmpKernels.cu
@@ -13,7 +13,7 @@
 
 
 namespace {
-// Thin wrapper around https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__SINGLE.html#group__CUDA__MATH__SINGLE_1g57a3c8313f570282a1a7bcc78743b08e,
+// Thin wrapper around https://docs.nvidia.com/cuda/cuda-math-api/cuda_math_api/group__CUDA__MATH__SINGLE.html,
 // to ensure the Cuda math library's isfinite is actually what gets called in
 // _amp_non_finite_check_and_unscale_cuda_'s gpu_kernel lambda.
 //
diff --git a/aten/src/ATen/native/cuda/Math.cuh b/aten/src/ATen/native/cuda/Math.cuh
index 2fe8f5dd2e3..89308177bfe 100644
--- a/aten/src/ATen/native/cuda/Math.cuh
+++ b/aten/src/ATen/native/cuda/Math.cuh
@@ -766,7 +766,7 @@ const auto sinc_string = jiterator_stringify(
 ); // sinc_string
 
 const auto erfcx_string = jiterator_stringify(
-  /* The next function is taken from http://ab-initio.mit.edu/Faddeev */
+  /* The next function is taken from http://ab-initio.mit.edu/faddeeva */
 
   /* Copyright (c) 2012 Massachusetts Institute of Technology
   *
diff --git a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp
index 0853c02d6df..3cf47804e91 100644
--- a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp
+++ b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp
@@ -1865,8 +1865,6 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) {
   // We require to perform ?geqrf_gpu again due to this bug in MAGMA:
   // - ?geqrf_gpu allows fast computation of Q via ?orgqr_gpu, but doesn't give R properly.
   // - ?geqrf2_gpu gives correct R, but doesn't allow computation of Q via ?orgqr_gpu
-  // Refer to the below link for more details:
-  // http://icl.cs.utk.edu/magma/forum/viewtopic.php?f=2&t=1015&p=2800&hilit=geqrf_gpu#p2800
     case at::LinalgBackend::Magma:
       return geqrf_magma(input, tau);
     case at::LinalgBackend::Cusolver:
diff --git a/aten/src/ATen/native/cudnn/Conv_v8.cpp b/aten/src/ATen/native/cudnn/Conv_v8.cpp
index b617cf44473..740b54d6772 100644
--- a/aten/src/ATen/native/cudnn/Conv_v8.cpp
+++ b/aten/src/ATen/native/cudnn/Conv_v8.cpp
@@ -347,7 +347,7 @@ struct BenchmarkCache {
 
 // @eqy: use thread local caches as cuDNN Execution Plans are not guaranteed to
 // be thread safe across all engines see Limitations in
-// https://docs.nvidia.com/deeplearning/cudnn/release-notes/index.html
+// https://docs.nvidia.com/deeplearning/cudnn/backend/latest/release-notes.html
 thread_local BenchmarkCache<cudnn_frontend::ExecutionPlan, CacheKeyWrapper>
     benchmark_cache;
 thread_local BenchmarkCache<cudnn_frontend::ExecutionPlan, CacheKeyFusedWrapper>
diff --git a/aten/src/ATen/native/cudnn/MHA.cpp b/aten/src/ATen/native/cudnn/MHA.cpp
index c38d4a095c0..5d146edb90b 100644
--- a/aten/src/ATen/native/cudnn/MHA.cpp
+++ b/aten/src/ATen/native/cudnn/MHA.cpp
@@ -296,7 +296,7 @@ struct MHAGraphCache {
 
 // @eqy: use thread local caches as cuDNN Execution Plans are not guaranteed to
 // be thread safe across all engines see Limitations in
-// https://docs.nvidia.com/deeplearning/cudnn/release-notes/index.html
+// https://docs.nvidia.com/deeplearning/cudnn/backend/latest/release-notes.html
 thread_local MHAGraphCache<graph_and_tensors, MHACacheKeyWrapper> mhagraphcache;
 thread_local MHAGraphCache<graph_and_tensors_backward, MHACacheKeyWrapper>
     mhagraphbackwardcache;
diff --git a/aten/src/ATen/native/cudnn/RNN.cpp b/aten/src/ATen/native/cudnn/RNN.cpp
index 17039f03e64..a65a5e43ac0 100644
--- a/aten/src/ATen/native/cudnn/RNN.cpp
+++ b/aten/src/ATen/native/cudnn/RNN.cpp
@@ -1204,7 +1204,7 @@ cudnnRNNAlgo_t get_algo(
   // Persistent algos typically don't work for packed inputs with sequence
   // lengths that vary across batch elements, and will return
   // CUDNN_STATUS_NOT_SUPPORTED if attempted. See
-  // https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#features-of-rnn-functions
+  // https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-890/developer-guide/index.html#features-of-rnn-functions
   if (!tensors.is_input_packed()) {
     auto cudnnDataType = getCudnnDataType(input);
     if (cudnnDataType != CUDNN_DATA_DOUBLE) {
@@ -1274,7 +1274,7 @@ int64_t _cudnn_rnn_flatten_weight_prologue(
   rnn_desc = rnn.descriptor(handle);
 
   // Why do we pad to 5 dims here (and elsewhere)?
-  // https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNForwardTraining
+  // https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-892/api/index.html#cudnnRNNForwardTraining
   // expects descriptors padded to 3 dimensions.
   x_desc.set(flat_buf_datatype, x_geom.sizes(), x_geom.strides(), 5);
 
diff --git a/aten/src/ATen/native/mps/kernels/Quantized.metal b/aten/src/ATen/native/mps/kernels/Quantized.metal
index 1a277602aa2..4d57027a576 100644
--- a/aten/src/ATen/native/mps/kernels/Quantized.metal
+++ b/aten/src/ATen/native/mps/kernels/Quantized.metal
@@ -213,7 +213,7 @@ INSTANTIATE_INT4MV(bfloat, 256);
  * 1. Load A and B blocks (32x32 and 64x32 respectively) into shared memory.
  * 2. In 4 simdgroups, calculate the outer product of the loaded blocks. Each simdgroup produces a 2x4 8x8 result.
  *      2.1 For how to use outer product to perform matrix multiplication, refer to
- *           http://mlwiki.org/index.php/Matrix-Matrix_Multiplication#Sum_of_Outer_Products
+ *           https://web.archive.org/web/20230521063455/http://mlwiki.org/index.php/Matrix-Matrix_Multiplication#Sum_of_Outer_Products
  * 3. Repeat 1 & 2 along K axis, with K block size 32, accumulate the result in the 2x4 8x8 block.
  * 4. Dequantize the final result and store it in the output matrix.
  *
diff --git a/aten/src/ATen/native/quantized/cpu/qconv.cpp b/aten/src/ATen/native/quantized/cpu/qconv.cpp
index 41209c3c047..9acdd0937c8 100644
--- a/aten/src/ATen/native/quantized/cpu/qconv.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qconv.cpp
@@ -1923,7 +1923,7 @@ namespace {
  * FBGEMM uses vpmaddubsw instruction to multiply activations (uint8_t) and
  * weights (int8_t).
  *
- * https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maddubs_epi16&expand=3284,3530
+ * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16&expand=3284,3530&ig_expand=4236
  *
  * vpmaddubsw operates on a vector of activations and a vector of
  * weights. If these vectors are
diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/README.md b/aten/src/ATen/native/quantized/cpu/qnnpack/README.md
index ed6639c4ace..86974f1e212 100644
--- a/aten/src/ATen/native/quantized/cpu/qnnpack/README.md
+++ b/aten/src/ATen/native/quantized/cpu/qnnpack/README.md
@@ -78,10 +78,10 @@ MAX_JOBS=1 scripts/build_local.sh -DBUILD_BINARY=ON -DBUILD_PYTHON=OFF \
     -DUSE_OBSERVERS=OFF -DUSE_DISTRIBUTED=OFF
 
 # Download model weights
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb  # @lint-ignore
 
 # Download model graph
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb  # @lint-ignore
 
 # Run speed benchmark with 50 warm-up iterations and 10 measurement iterations
 build/bin/speed_benchmark --net predict_net.pb --init_net init_net.pb \
@@ -104,11 +104,11 @@ scripts/build_android.sh -DANDROID_TOOLCHAIN=clang -DBUILD_BINARY=ON
 adb push build_android/bin/speed_benchmark /data/local/tmp/speed_benchmark
 
 # Download model weights and copy them to Android device
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb  # @lint-ignore
 adb push init_net.pb /data/local/tmp/init_net.pb
 
 # Download model graph and copy it to Android device
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb  # @lint-ignore
 adb push predict_net.pb /data/local/tmp/predict_net.pb
 
 # Run speed benchmark with 50 warm-up iterations and 10 measurement iterations
@@ -134,11 +134,11 @@ scripts/build_android.sh -DANDROID_ABI=arm64-v8a -DANDROID_TOOLCHAIN=clang -DBUI
 adb push build_android/bin/speed_benchmark /data/local/tmp/speed_benchmark
 
 # Download model weights and copy them to Android device
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb  # @lint-ignore
 adb push init_net.pb /data/local/tmp/init_net.pb
 
 # Download model graph and copy it to Android device
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb  # @lint-ignore
 adb push predict_net.pb /data/local/tmp/predict_net.pb
 
 # Run speed benchmark with 50 warm-up iterations and 10 measurement iterations
diff --git a/aten/src/ATen/native/quantized/cudnn/Conv.cpp b/aten/src/ATen/native/quantized/cudnn/Conv.cpp
index edd4d0f5e76..6424000594e 100644
--- a/aten/src/ATen/native/quantized/cudnn/Conv.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Conv.cpp
@@ -53,7 +53,7 @@ std::unordered_map<CacheKey, cudnn_frontend::ExecutionPlan, at::native::ParamsHa
 } // anonymous namespace
 // TODO: we can use cudnn_frontend::ExecutionPlanCache when it supports caching
 // multiple operators
-// reference: https://github.com/NVIDIA/cudnn-frontend/blob/main/samples/conv_sample.cpp#L293
+// reference: https://github.com/NVIDIA/cudnn-frontend/blob/main/samples/legacy_samples/conv_sample.cpp#L295
 //static cudnn_frontend::ExecutionPlanCache plan_cache("sample_cache");
 
 // the parameter quantized_output is a quantized tensor
diff --git a/aten/src/ATen/native/quantized/cudnn/Linear.cpp b/aten/src/ATen/native/quantized/cudnn/Linear.cpp
index f8f7bcbcbbd..ea776fdf450 100644
--- a/aten/src/ATen/native/quantized/cudnn/Linear.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Linear.cpp
@@ -79,7 +79,7 @@ std::unordered_map<CacheKey, cudnn_frontend::ExecutionPlan, at::native::ParamsHa
 }
 // TODO: we can use cudnn_frontend::ExecutionPlanCache when it supports caching
 // multiple operators
-// reference: https://github.com/NVIDIA/cudnn-frontend/blob/main/samples/conv_sample.cpp#L293
+// reference: https://github.com/NVIDIA/cudnn-frontend/blob/main/samples/legacy_samples/conv_sample.cpp#L295
 //static cudnn_frontend::ExecutionPlanCache plan_cache("sample_cache");
 
 // currently we only support int8 symmetric (zero_point = 0 for inputs and output) quantized linear op
diff --git a/aten/src/ATen/native/quantized/cudnn/Pooling.cpp b/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
index 7e85ae9f468..ba2cc9592d6 100644
--- a/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
@@ -76,7 +76,7 @@ Tensor adaptive_avg_pool2d_quantized_cuda(
 // any 3D tensors to 4D prior to using cudnn
 // This implementation currently uses the v7 cudnn APIs as v8 cudnn APIs are not yet available for
 // pooling operations.
-// Consult https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnPoolingForward for
+// Consult https://docs.nvidia.com/deeplearning/cudnn/backend/latest/api/cudnn-ops-library.html#cudnnpoolingforward for
 // documentation on the APIs
 // Currently, it appears there is no cudnn support for dilated pooling -- we will
 // submit a feature request for this with cudnn
diff --git a/benchmarks/dynamo/ci_expected_accuracy/update_expected.py b/benchmarks/dynamo/ci_expected_accuracy/update_expected.py
index 564928223b5..b5606b89c5b 100644
--- a/benchmarks/dynamo/ci_expected_accuracy/update_expected.py
+++ b/benchmarks/dynamo/ci_expected_accuracy/update_expected.py
@@ -67,7 +67,7 @@ ORDER BY
     workflowName, jobName
 """
 ARTIFACTS_QUERY_URL = (
-    "https://console-api.clickhouse.cloud/.api/query-endpoints/"
+    "https://console-api.clickhouse.cloud/.api/query-endpoints/"  # @lint-ignore
     "c1cdfadc-6bb2-4a91-bbf9-3d19e1981cd4/run?format=JSON"
 )
 CSV_LINTER = str(
diff --git a/benchmarks/functional_autograd_benchmark/torchaudio_models.py b/benchmarks/functional_autograd_benchmark/torchaudio_models.py
index 40a3b853d6e..19fa23e5541 100644
--- a/benchmarks/functional_autograd_benchmark/torchaudio_models.py
+++ b/benchmarks/functional_autograd_benchmark/torchaudio_models.py
@@ -473,7 +473,7 @@ class TransformerModel(nn.Module):
         return F.log_softmax(output, dim=-1)
 
 
-# From https://github.com/pytorch/text/blob/master/torchtext/modules
+# From https://github.com/pytorch/text/tree/master/torchtext/nn/modules
 class MultiheadAttentionContainer(torch.nn.Module):
     def __init__(self, nhead, in_proj_container, attention_layer, out_proj):
         r"""A multi-head attention container
diff --git a/benchmarks/operator_benchmark/benchmark_utils.py b/benchmarks/operator_benchmark/benchmark_utils.py
index be9c62cb3c2..a4ff524c986 100644
--- a/benchmarks/operator_benchmark/benchmark_utils.py
+++ b/benchmarks/operator_benchmark/benchmark_utils.py
@@ -37,7 +37,7 @@ def numpy_random(dtype, *shapes):
     Args:
         shapes: int or a sequence of ints to defining the shapes of the tensor
         dtype: use the dtypes from numpy
-            (https://docs.scipy.org/doc/numpy/user/basics.types.html)
+            (https://numpy.org/doc/stable/user/basics.types.html)
     Return:
         numpy tensor of dtype
     """
diff --git a/benchmarks/sparse/dlmc/test.sh b/benchmarks/sparse/dlmc/test.sh
index 96a277ca8fe..28d8af26731 100644
--- a/benchmarks/sparse/dlmc/test.sh
+++ b/benchmarks/sparse/dlmc/test.sh
@@ -2,7 +2,7 @@
 
 DATASET_ROOT_DIR=$HOME/datasets/
 
-# wget https://storage.googleapis.com/sgk-sc2020/dlmc.tar.gz -P $DATASET_ROOT_DIR
+# wget https://storage.googleapis.com/sgk-sc2020/dlmc.tar.gz -P $DATASET_ROOT_DIR  # @lint-ignore
 # tar -xvf $DATASET_ROOT_DIR/dlmc.tar.gz
 
 echo "!! SPARSE SPMS TIME BENCHMARK!! "
diff --git a/benchmarks/tensorexpr/attention.py b/benchmarks/tensorexpr/attention.py
index 546ac4063af..d22e665653b 100644
--- a/benchmarks/tensorexpr/attention.py
+++ b/benchmarks/tensorexpr/attention.py
@@ -1,6 +1,6 @@
 # This is a copy of rnn_attention from MLPerf, with some common sizes hardcoded
 # for benchmarking and some control flow stripped out.
-# https://github.com/mlperf/training/blob/master/rnn_translator/pytorch/seq2seq/models/attention.py
+# https://github.com/mlcommons/training/blob/master/retired_benchmarks/gnmt/pytorch/seq2seq/models/attention.py
 
 import torch
 
diff --git a/benchmarks/upload_scribe.py b/benchmarks/upload_scribe.py
index 0c1f9448b15..6581cf6511e 100644
--- a/benchmarks/upload_scribe.py
+++ b/benchmarks/upload_scribe.py
@@ -48,7 +48,7 @@ class ScribeUploader:
         access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
         if not access_token:
             raise ValueError("Can't find access token from environment variable")
-        url = "https://graph.facebook.com/scribe_logs"
+        url = "https://graph.facebook.com/scribe_logs"  # @lint-ignore
         r = requests.post(
             url,
             data={
diff --git a/buckbuild.bzl b/buckbuild.bzl
index 0946ceb8eb3..efa1edcc33b 100644
--- a/buckbuild.bzl
+++ b/buckbuild.bzl
@@ -297,7 +297,7 @@ def get_pt_preprocessor_flags():
         PT_PREPROCESSOR_FLAGS.append("-DENABLE_PYTORCH_NON_PRODUCTION_BUILDS")
     return PT_PREPROCESSOR_FLAGS
 
-# This needs to be kept in sync with https://github.com/pytorch/pytorch/blob/release/1.9/torchgen/gen.py#L892
+# This needs to be kept in sync with https://github.com/pytorch/pytorch/blob/release/1.9/torchgen/gen.py#L892  @lint-ignore
 PT_BACKEND_HEADERS = [
     "CPU",
     "CUDA",
diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h
index 7e61ad7e26b..bdcd5f4e60c 100644
--- a/c10/macros/Macros.h
+++ b/c10/macros/Macros.h
@@ -241,7 +241,7 @@ using namespace c10::xpu;
 #ifdef __HIPCC__
 // Unlike CUDA, HIP requires a HIP header to be included for __host__ to work.
 // We do this #include here so that C10_HOST_DEVICE and friends will Just Work.
-// See https://github.com/ROCm-Developer-Tools/HIP/issues/441
+// See https://github.com/ROCm/hip/issues/441
 #include <hip/hip_runtime.h>
 #endif
 
diff --git a/cmake/External/aotriton.cmake b/cmake/External/aotriton.cmake
index 2678cfde3c4..9c1862f6b44 100644
--- a/cmake/External/aotriton.cmake
+++ b/cmake/External/aotriton.cmake
@@ -98,7 +98,7 @@ if(NOT __AOTRITON_INCLUDED)
                                   "${__AOTRITON_VER}-${__AOTRITON_MANYLINUX}"
                                   "_${__AOTRITON_ARCH}-rocm${__AOTRITON_ROCM}"
                                   "-shared.tar.${__AOTRITON_Z}")
-    string(CONCAT __AOTRITON_URL "https://github.com/ROCm/aotriton/releases/download/"
+    string(CONCAT __AOTRITON_URL "https://github.com/ROCm/aotriton/releases/download/"  # @lint-ignore
                                  "${__AOTRITON_VER}/${__AOTRITON_FILE}")
     ExternalProject_Add(aotriton_external
       URL "${__AOTRITON_URL}"
diff --git a/docs/cpp/source/index.rst b/docs/cpp/source/index.rst
index 02fa2a089e9..ddfa7a58d9f 100644
--- a/docs/cpp/source/index.rst
+++ b/docs/cpp/source/index.rst
@@ -137,7 +137,7 @@ about this by following `this
 API concerns itself with scenarios in which you would like to extend
 TorchScript with custom operators, which can similarly be serialized and
 invoked from C++ during inference. Lastly, the `torch::jit::compile
-<https://pytorch.org/cppdocs/api/function_namespacetorch_1_1jit_1a176d99fd5bf0233119a5f49c07a1d01d.html#exhale-function-namespacetorch-1-1jit-1a176d99fd5bf0233119a5f49c07a1d01d>`_
+<https://pytorch.org/cppdocs/api/function_namespacetorch_1_1jit_1a8660dc13a6b82336aadac667e6dccba1.html>`_
 function may be used to access the TorchScript compiler directly from C++.
 
 C++ Extensions
diff --git a/docs/source/community/contribution_guide.rst b/docs/source/community/contribution_guide.rst
index ec477ea50ea..4df422f541e 100644
--- a/docs/source/community/contribution_guide.rst
+++ b/docs/source/community/contribution_guide.rst
@@ -325,13 +325,13 @@ Python Docs
 PyTorch documentation is generated from python source using
 `Sphinx <https://www.sphinx-doc.org/en/master/>`__. Generated HTML is
 copied to the docs folder in the main branch of
-`pytorch.github.io <https://github.com/pytorch/pytorch.github.io/tree/master/docs>`__,
+`pytorch.org/docs <https://pytorch.org/docs/main>`__,
 and is served via GitHub pages.
 
 -  Site: https://pytorch.org/docs
 -  GitHub: https://github.com/pytorch/pytorch/tree/main/docs
 -  Served from:
-   `https://github.com/pytorch/pytorch.github.io/tree/master/docs <https://github.com/pytorch/pytorch.github.io/tree/master/docs>`__
+   `https://pytorch.org/docs/main <https://pytorch.org/docs/main>`__
 
 C++ Docs
 ~~~~~~~~
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 152faa45b0f..1485c80277e 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -3708,7 +3708,6 @@ def process_docstring(app, what_, name, obj, options, lines):
         lines (List[str]): the lines of the docstring, see above
 
     References:
-        https://www.sphinx-doc.org/en/1.5.1/_modules/sphinx/ext/autodoc.html
         https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html
     """
     import re
diff --git a/docs/source/cudnn_rnn_determinism.rst b/docs/source/cudnn_rnn_determinism.rst
index c002925a8c3..ffe8f1d8ccd 100644
--- a/docs/source/cudnn_rnn_determinism.rst
+++ b/docs/source/cudnn_rnn_determinism.rst
@@ -13,4 +13,4 @@
 
     See the `cuDNN 8 Release Notes`_ for more information.
 
-.. _cuDNN 8 Release Notes: https://docs.nvidia.com/deeplearning/sdk/cudnn-release-notes/rel_8.html
+.. _cuDNN 8 Release Notes: https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-880/release-notes/rel_8.html
diff --git a/docs/source/distributed.rst b/docs/source/distributed.rst
index 4dc70268cb2..f36f6218dac 100644
--- a/docs/source/distributed.rst
+++ b/docs/source/distributed.rst
@@ -141,7 +141,7 @@ network bandwidth. These two environment variables have been pre-tuned by NCCL
 for some cloud providers, such as AWS or GCP.
 
 For a full list of NCCL environment variables, please refer to
-`NVIDIA NCCL's official documentation <https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/env.html>`_
+`NVIDIA NCCL's official documentation <https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html>`_
 
 You can tune NCCL communicators even further using `torch.distributed.ProcessGroupNCCL.NCCLConfig`
 and `torch.distributed.ProcessGroupNCCL.Options`. Learn more about them using `help`
diff --git a/docs/source/notes/hip.rst b/docs/source/notes/hip.rst
index 57f08b93053..a34535d67fc 100644
--- a/docs/source/notes/hip.rst
+++ b/docs/source/notes/hip.rst
@@ -141,7 +141,7 @@ Currently, only the "nccl" and "gloo" backends for torch.distributed are support
 CUDA API to HIP API mappings in C++
 -----------------------------------
 
-Please refer: https://rocmdocs.amd.com/en/latest/Programming_Guides/HIP_API_Guide.html
+Please refer: https://rocm.docs.amd.com/projects/HIP/en/latest/reference/api_syntax.html
 
 NOTE: The CUDA_VERSION macro, cudaRuntimeGetVersion and cudaDriverGetVersion APIs do not
 semantically map to the same values as HIP_VERSION macro, hipRuntimeGetVersion and
diff --git a/docs/source/notes/windows.rst b/docs/source/notes/windows.rst
index 8fb2f913630..3cf736046a8 100644
--- a/docs/source/notes/windows.rst
+++ b/docs/source/notes/windows.rst
@@ -24,9 +24,10 @@ MKL and MAGMA. Here are the steps to build with them.
     REM 2.5.3 (CUDA 10.1 10.2 11.0) x (Debug Release)
     REM 2.5.2 (CUDA 9.2 10.0 10.1 10.2) x (Debug Release)
     REM 2.5.1 (CUDA 9.2 10.0 10.1 10.2) x (Debug Release)
-    set CUDA_PREFIX=cuda102
-    set CONFIG=release
-    curl -k https://s3.amazonaws.com/ossci-windows/magma_2.5.4_%CUDA_PREFIX%_%CONFIG%.7z -o magma.7z
+    set "CUDA_PREFIX=cuda102"
+    set "CONFIG=release"
+    set "HOST=https://s3.amazonaws.com/ossci-windows"
+    curl -k "%HOST%/magma_2.5.4_%CUDA_PREFIX%_%CONFIG%.7z" -o magma.7z
     7z x -aoa magma.7z -omagma
 
     REM Setting essential environment variables
diff --git a/docs/source/onnx_dynamo.rst b/docs/source/onnx_dynamo.rst
index 08395253a05..fb17d2706a9 100644
--- a/docs/source/onnx_dynamo.rst
+++ b/docs/source/onnx_dynamo.rst
@@ -27,7 +27,7 @@ Dependencies
 The ONNX exporter depends on extra Python packages:
 
   - `ONNX <https://onnx.ai>`_
-  - `ONNX Script <https://onnxscript.ai>`_
+  - `ONNX Script <https://microsoft.github.io/onnxscript>`_
 
 They can be installed through `pip <https://pypi.org/project/pip/>`_:
 
diff --git a/docs/source/onnx_torchscript.rst b/docs/source/onnx_torchscript.rst
index 400cc4da23d..2fa02cf78f0 100644
--- a/docs/source/onnx_torchscript.rst
+++ b/docs/source/onnx_torchscript.rst
@@ -452,7 +452,7 @@ ONNX operators that represent the function's behavior in ONNX. For example::
 .. . ``torch::jit::Value::setType``). This is not required, but it can help the exporter's
 .. shape and type inference for down-stream nodes. For a non-trivial example of ``setType``, see
 .. ``test_aten_embedding_2`` in
-.. `test_operators.py <https://github.com/pytorch/pytorch/blob/main/test/onnx/test_operators.py>`_.
+.. `test_operators.py <https://github.com/pytorch/pytorch/blob/release/2.5/test/onnx/test_operators.py#L1179>`_.
 
 .. The example below shows how you can access ``requires_grad`` via the ``Node`` object:
 
diff --git a/docs/source/rpc/rref.rst b/docs/source/rpc/rref.rst
index 3f858e58686..a5177e08ef6 100644
--- a/docs/source/rpc/rref.rst
+++ b/docs/source/rpc/rref.rst
@@ -204,7 +204,7 @@ will create the ``OwnerRRef``, and returns an ACK to acknowledge ``{100, 1}``
 **G2**, the ``OwnerRRef`` is a child of the ``UserRRef``, and the ``UserRRef``
 is not deleted until it receives the ACK from the owner.
 
-.. image:: https://user-images\.githubusercontent\.com/16999635/69164772-98181300-0abe-11ea-93a7-9ad9f757cd94.png
+.. image:: https://user-images.githubusercontent.com/16999635/69164772-98181300-0abe-11ea-93a7-9ad9f757cd94.png
     :alt: user_to_owner_ret.png
     :width: 500 px
 
diff --git a/docs/source/torch.compiler_troubleshooting_old.rst b/docs/source/torch.compiler_troubleshooting_old.rst
index 7a4a35dffa3..5f693741e94 100644
--- a/docs/source/torch.compiler_troubleshooting_old.rst
+++ b/docs/source/torch.compiler_troubleshooting_old.rst
@@ -209,7 +209,7 @@ Diagnosing TorchInductor Errors
 
 If the error does not occur with the ``"eager"`` backend, then the
 backend compiler is the source of the error (`example
-error <https://gist.github.com/mlazos/2f13681e3cc6c43b3911f336327032de%5D>`__).
+error <https://gist.github.com/mlazos/2f13681e3cc6c43b3911f336327032de>`__).
 There are `different choices <./torch.compiler.rst>`__
 for backend compilers for TorchDynamo, with TorchInductor
 fitting the needs of most users. This section focuses on TorchInductor
diff --git a/docs/source/type_info.rst b/docs/source/type_info.rst
index a807398c5fb..29a5ca28269 100644
--- a/docs/source/type_info.rst
+++ b/docs/source/type_info.rst
@@ -15,7 +15,7 @@ torch.finfo
 .. class:: torch.finfo
 
 A :class:`torch.finfo` is an object that represents the numerical properties of a floating point
-:class:`torch.dtype`, (i.e. ``torch.float32``, ``torch.float64``, ``torch.float16``, and ``torch.bfloat16``). This is similar to `numpy.finfo <https://docs.scipy.org/doc/numpy/reference/generated/numpy.finfo.html>`_.
+:class:`torch.dtype`, (i.e. ``torch.float32``, ``torch.float64``, ``torch.float16``, and ``torch.bfloat16``). This is similar to `numpy.finfo <https://numpy.org/doc/stable/reference/generated/numpy.finfo.html>`_.
 
 A :class:`torch.finfo` provides the following attributes:
 
@@ -49,7 +49,7 @@ torch.iinfo
 
 
 A :class:`torch.iinfo` is an object that represents the numerical properties of a integer
-:class:`torch.dtype` (i.e. ``torch.uint8``, ``torch.int8``, ``torch.int16``, ``torch.int32``, and ``torch.int64``). This is similar to `numpy.iinfo <https://docs.scipy.org/doc/numpy/reference/generated/numpy.iinfo.html>`_.
+:class:`torch.dtype` (i.e. ``torch.uint8``, ``torch.int8``, ``torch.int16``, ``torch.int32``, and ``torch.int64``). This is similar to `numpy.iinfo <https://numpy.org/doc/stable/reference/generated/numpy.iinfo.html>`_.
 
 A :class:`torch.iinfo` provides the following attributes:
 
diff --git a/scripts/build_tizen.sh b/scripts/build_tizen.sh
index ce64b6c4298..2262a2503c1 100755
--- a/scripts/build_tizen.sh
+++ b/scripts/build_tizen.sh
@@ -9,7 +9,7 @@
 
 setup_environment(){
 # The rootfs image for a Tizen target (RPi3)is located at the below webpage:
-# http://download.tizen.org/releases/milestone/tizen/4.0.m1/tizen-unified_20170529.1/images/
+# https://cdn.download.tizen.org/archive/releases/milestone/tizen/4.0.m1/tizen-unified_20170529.1/images/
 # If you do not have a Tizen device, Please, run qemu-arm-static and chroot command.
 # $ sudo chroot ~/tizen-rootfs qemu-arm-static /usr/bin/bash
 
@@ -25,7 +25,7 @@ caffe2_lite_dep_packages(){
 # You can set-up a rpm repository with zypper, yum, and dnf because Tizen
 # software platform officially support rpm format such as Fedora, OpenSUSE.
 # The official Tizen repository is as following:
-# http://download.tizen.org/releases/milestone/tizen/4.0.m1/
+# https://cdn.download.tizen.org/archive/releases/milestone/tizen/4.0.m1/
 echo "Installing dependencies."
 sudo zypper install \
   make \
@@ -69,7 +69,7 @@ caffe2_full_dep_packages(){
 # You can set-up a rpm repository with zypper, yum, and dnf because Tizen
 # software platform officially support rpm format such as Fedora, OpenSUSE.
 # The official Tizen repository is as following:
-# http://download.tizen.org/releases/milestone/tizen/4.0.m1/
+# https://cdn.download.tizen.org/archive/releases/milestone/tizen/4.0.m1/
 echo "Installing dependencies."
 sudo zypper install \
   cmake \
diff --git a/scripts/release_notes/common.py b/scripts/release_notes/common.py
index 9143fd672fb..8e6eda25520 100644
--- a/scripts/release_notes/common.py
+++ b/scripts/release_notes/common.py
@@ -212,7 +212,9 @@ headers = {"Authorization": f"token {token}"}
 
 def run_query(query):
     request = requests.post(
-        "https://api.github.com/graphql", json={"query": query}, headers=headers
+        "https://api.github.com/graphql",  # @lint-ignore
+        json={"query": query},
+        headers=headers,
     )
     if request.status_code == 200:
         return request.json()
diff --git a/test/cpp/api/rnn.cpp b/test/cpp/api/rnn.cpp
index fef879b7983..c01b83898b4 100644
--- a/test/cpp/api/rnn.cpp
+++ b/test/cpp/api/rnn.cpp
@@ -441,7 +441,7 @@ lstm_output_to_device(
 }
 
 // This test is a port of python code introduced here:
-// https://towardsdatascience.com/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66
+// https://medium.com/data-science/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66
 // Reverse forward of bidirectional GRU should act
 // as regular forward of unidirectional GRU
 void BidirectionalGRUReverseForward(bool cuda) {
diff --git a/test/distributed/tensor/test_dtensor_ops.py b/test/distributed/tensor/test_dtensor_ops.py
index 4fa6bdeed8e..665f87f797e 100644
--- a/test/distributed/tensor/test_dtensor_ops.py
+++ b/test/distributed/tensor/test_dtensor_ops.py
@@ -594,7 +594,7 @@ class TestDTensorOps(DTensorOpTestBase):
                         dtensor_rs = func(*dtensor_args, **dtensor_kwargs)
 
                         # we need to skip tests containing tensors of zero elements for now.
-                        # see issue: https://github.com/pytorch/tau/issues/470
+                        # see issue: https://github.com/pytorch/PiPPy/issues/470
                         # TODO remove this once issue above fixed.
                         flat_args = pytree.tree_leaves(dtensor_rs)
                         if any(
diff --git a/test/distributed/tensor/test_pointwise_ops.py b/test/distributed/tensor/test_pointwise_ops.py
index f30b700b366..3e3cd378165 100644
--- a/test/distributed/tensor/test_pointwise_ops.py
+++ b/test/distributed/tensor/test_pointwise_ops.py
@@ -192,7 +192,9 @@ class DistElementwiseOpsTest(DTensorOpTestBase):
             op=torch.sigmoid,
         )
 
-    @skip("testing RNG based ops is broken: https://github.com/pytorch/tau/issues/494")
+    @skip(
+        "testing RNG based ops is broken: https://github.com/pytorch/PiPPy/issues/494"
+    )
     def test_dropout(self):
         device_mesh = self.build_device_mesh()
 
diff --git a/test/functorch/test_vmap.py b/test/functorch/test_vmap.py
index 894aa6f544d..2f1d1416b63 100644
--- a/test/functorch/test_vmap.py
+++ b/test/functorch/test_vmap.py
@@ -5169,7 +5169,6 @@ class TestVmapOperatorsOpInfo(TestCase):
             xfail("linalg.vecdot"),
             # throws in vmap on CUDA
             # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got -2)
-            # https://github.com/pytorch/pytorch/runs/8110653462?check_suite_focus=true
             # but it passes locally
             xfail("linalg.diagonal"),
             skip("linalg.matrix_norm", ""),
diff --git a/test/inductor/test_cuda_repro.py b/test/inductor/test_cuda_repro.py
index 39b5f589712..2b9ff6ec2c8 100644
--- a/test/inductor/test_cuda_repro.py
+++ b/test/inductor/test_cuda_repro.py
@@ -581,7 +581,7 @@ class CudaReproTests(TestCase):
         """
         This UT tests autotune on an inplace kernel. The autotune should not contaminate
         the input buffers when tuning with multiple configs. For more details, refer to
-        https://github.com/openai/triton/issues/781
+        https://github.com/triton-lang/triton/issues/781
         https://github.com/pytorch/torchdynamo/issues/1670
         """
         from torch._C import _cuda_getCurrentRawStream as get_cuda_stream
@@ -1587,7 +1587,7 @@ class CudaReproTests(TestCase):
 
     @config.patch("triton.use_block_ptr", True)
     def test_selecsls42b_misaligned_address(self):
-        # https://github.com/openai/triton/issues/2836
+        # https://github.com/triton-lang/triton/issues/2836
 
         @torch.compile(fullgraph=True)
         def fn(arg207_1, arg208_1, convert_element_type_40, expand, full, mul_3):
diff --git a/test/inductor/test_foreach.py b/test/inductor/test_foreach.py
index e68ed88a4f2..da243adfdcf 100644
--- a/test/inductor/test_foreach.py
+++ b/test/inductor/test_foreach.py
@@ -431,7 +431,7 @@ class ForeachTests(TestCase):
     @requires_cuda
     @scalar_bin_ops
     @unittest.skip(
-        "Triton recursion depth exceeded: https://github.com/openai/triton/issues/1763"
+        "Triton recursion depth exceeded: https://github.com/triton-lang/triton/issues/1763"
     )
     def test_kernel_split_arg_limit_scalar(self, op):
         def fn(a):
diff --git a/test/inductor/test_indexing.py b/test/inductor/test_indexing.py
index 954452882ca..7369d6c9fad 100644
--- a/test/inductor/test_indexing.py
+++ b/test/inductor/test_indexing.py
@@ -95,7 +95,7 @@ class TestIndexingSimplification(InductorTestCase):
             ModularIndexing(i0 + i1 * i2 * r3, i2, r3), ModularIndexing(i0, i2, r3)
         )
 
-        # if there are negative terms, we cannot optimize away zero terms due to https://github.com/openai/triton/issues/619
+        # if there are negative terms, we cannot optimize away zero terms due to https://github.com/triton-lang/triton/issues/619
         self.assertEqual(
             ModularIndexing(-i0 + i1 * 20, 2, 10), ModularIndexing(-i0 + i1 * 20, 2, 10)
         )
diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py
index 6476989f675..96b7ade81d0 100644
--- a/test/inductor/test_max_autotune.py
+++ b/test/inductor/test_max_autotune.py
@@ -166,7 +166,7 @@ class TestMaxAutotune(TestCase):
     @parametrize("autotune_multi_device", (True, False))
     def test_max_autotune_mm_plus_mm(self, autotune_in_subproc, autotune_multi_device):
         """
-        This crash previously due to a triton issue: https://github.com/openai/triton/issues/1298 .
+        This crash previously due to a triton issue: https://github.com/triton-lang/triton/issues/1298 .
         With autotuning in subprocess, we don't crash anymore.
         """
         m, n, k = 2048, 1536, 64
diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
index 9745cd94134..b90244600dd 100644
--- a/test/inductor/test_torchinductor.py
+++ b/test/inductor/test_torchinductor.py
@@ -2969,7 +2969,7 @@ class CommonTemplate:
             return torch.round(a), torch.round(b + 1), torch.round(a, decimals=2)
 
         # without manual_seed, there is some chance this test fails due to:
-        # https://github.com/openai/triton/issues/530
+        # https://github.com/triton-lang/triton/issues/530
         torch.manual_seed(0)
 
         # with *100 we are always getting a number exactly at .5 which we don't do right in half
@@ -7957,7 +7957,7 @@ def forward(self, arg0_1: "Sym(s77)", arg1_1: "Sym(s27)", arg2_1: "Sym(s53)", ar
                 torch.randint(0, 100, size=[600], dtype=torch.int64),
                 torch.randn([600, 256, 7, 7]),
             ],
-            # workaround for https://github.com/openai/triton/issues/558
+            # workaround for https://github.com/triton-lang/triton/issues/558
             check_lowp=False,
         )
 
diff --git a/test/quantization/core/test_quantized_functional.py b/test/quantization/core/test_quantized_functional.py
index b14aaf465b0..e593b113b27 100644
--- a/test/quantization/core/test_quantized_functional.py
+++ b/test/quantization/core/test_quantized_functional.py
@@ -52,7 +52,7 @@ class TestQuantizedFunctionalOps(QuantizationTestCase):
         # Make sure the results match
         # assert_array_almost_equal compares using the following formula:
         #     abs(desired-actual) < 1.5 * 10**(-decimal)
-        # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html)
+        # (https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_almost_equal.html)
         # We use decimal = 0 to ignore off-by-1 differences between reference
         # and test. Off-by-1 differences arise due to the order of round and
         # zero_point addition operation, i.e., if addition followed by round is
diff --git a/test/quantization/core/test_quantized_module.py b/test/quantization/core/test_quantized_module.py
index c31fe44fa29..8918696078a 100644
--- a/test/quantization/core/test_quantized_module.py
+++ b/test/quantization/core/test_quantized_module.py
@@ -320,7 +320,7 @@ class TestStaticQuantizedModule(QuantizationTestCase):
         # Make sure the results match
         # assert_array_almost_equal compares using the following formula:
         #     abs(desired-actual) < 1.5 * 10**(-decimal)
-        # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html)
+        # (https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_almost_equal.html)
         # We use decimal = 0 to ignore off-by-1 differences between reference
         # and test. Off-by-1 differences arise due to the order of round and
         # zero_point addition operation, i.e., if addition followed by round is
diff --git a/test/quantization/core/test_quantized_op.py b/test/quantization/core/test_quantized_op.py
index d9809647d27..1be420ee2ed 100644
--- a/test/quantization/core/test_quantized_op.py
+++ b/test/quantization/core/test_quantized_op.py
@@ -5200,7 +5200,7 @@ class TestQuantizedConv(TestCase):
         # Make sure the results match
         # assert_array_almost_equal compares using the following formula:
         #     abs(desired-actual) < 1.5 * 10**(-decimal)
-        # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html)
+        # (https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_almost_equal.html)
         # We use decimal = 0 to ignore off-by-1 differences between
         # reference and test. Off-by-1 differences arise due to the order of
         # round and zero_point addition operation, i.e., if addition
@@ -6935,7 +6935,7 @@ class TestQuantizedConv(TestCase):
         # Make sure the results match
         # assert_array_almost_equal compares using the following formula:
         #     abs(desired-actual) < 1.5 * 10**(-decimal)
-        # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html)
+        # (https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_almost_equal.html)
         # We use decimal = 0 to ignore off-by-1 differences between
         # reference and test. Off-by-1 differences arise due to the order of
         # round and zero_point addition operation, i.e., if addition
diff --git a/test/test_nn.py b/test/test_nn.py
index f3aad843521..f16ace91d69 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -11752,7 +11752,7 @@ class TestNNDeviceType(NNTestCase):
             with self.assertRaisesRegex(RuntimeError, msg):
                 F.nll_loss(x, t, weight=weight)
 
-    # Ref: https://github.com/pytorch/pytorch/issue/85005
+    # Ref: https://github.com/pytorch/pytorch/issues/85005
     @onlyCUDA
     @largeTensorTest("120GB", "cpu")
     @largeTensorTest("45GB", "cuda")
@@ -11785,7 +11785,7 @@ class TestNNDeviceType(NNTestCase):
             with torch.no_grad():
                 self.assertTrue(torch.allclose(input.grad.cpu(), input_cpu.grad, rtol=rtol, atol=atol))
 
-    # Ref: https://github.com/pytorch/pytorch/issue/108345
+    # Ref: https://github.com/pytorch/pytorch/issues/108345
     @onlyCUDA
     @largeTensorTest("20GB", "cpu")
     @largeTensorTest("20GB", "cuda")
diff --git a/test/test_numba_integration.py b/test/test_numba_integration.py
index dc63d4910f5..f42dd4176da 100644
--- a/test/test_numba_integration.py
+++ b/test/test_numba_integration.py
@@ -36,7 +36,7 @@ class TestNumbaIntegration(common.TestCase):
             version: (int) Version 0
 
         See:
-        https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html
+        https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html
         """
 
         types = [
@@ -250,7 +250,7 @@ class TestNumbaIntegration(common.TestCase):
         will use the exposed device memory.
 
         See:
-        https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html
+        https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html
         """
 
         dtypes = [
diff --git a/test/test_reductions.py b/test/test_reductions.py
index dc84432777d..a931717d475 100644
--- a/test/test_reductions.py
+++ b/test/test_reductions.py
@@ -1759,7 +1759,6 @@ class TestReductions(TestCase):
         # On Windows CI, the current version of `numpy` promotes all lower integers
         # dtypes to int32 while `torch` promotes them to int64. Hence we skip on checking
         # the exact dtype.
-        # Reference : https://dr.pytorch.org/api/view-log-full?build_id=122051580
         # PR : https://github.com/pytorch/pytorch/pull/38628#issuecomment-655905370
         if IS_WINDOWS and is_integral(dtype):
             exact_dtype = False
diff --git a/test/test_unary_ufuncs.py b/test/test_unary_ufuncs.py
index ca5ab1e7df3..8d29c504d87 100644
--- a/test/test_unary_ufuncs.py
+++ b/test/test_unary_ufuncs.py
@@ -547,7 +547,7 @@ class TestUnaryUfuncs(TestCase):
         # sqrt Test Reference: https://github.com/pytorch/pytorch/pull/47424
         x = torch.tensor(0.0 - 1.0e20j, dtype=dtype, device=device)
         self.compare_with_numpy(torch.sqrt, np.sqrt, x)
-        # acos test reference: https://github.com/pytorch/pytorch/issue/42952
+        # acos test reference: https://github.com/pytorch/pytorch/issues/42952
         if not (dtype == torch.cdouble and "cuda" in device):
             self.compare_with_numpy(torch.acos, np.arccos, x)
 
diff --git a/test/torch_np/numpy_tests/core/test_multiarray.py b/test/torch_np/numpy_tests/core/test_multiarray.py
index 1ccf5ca4ffe..44503bf0fa3 100644
--- a/test/torch_np/numpy_tests/core/test_multiarray.py
+++ b/test/torch_np/numpy_tests/core/test_multiarray.py
@@ -4328,7 +4328,7 @@ class TestFromBuffer(TestCase):
     @skipif(
         IS_PYPY,
         reason="PyPy's memoryview currently does not track exports. See: "
-        "https://foss.heptapod.net/pypy/pypy/-/issues/3724",
+        "https://github.com/pypy/pypy/issues/3723",
     )
     def test_mmap_close(self):
         # The old buffer protocol was not safe for some things that the new
diff --git a/tools/download_mnist.py b/tools/download_mnist.py
index 4fe6068fed9..c8eeb4ec1a9 100644
--- a/tools/download_mnist.py
+++ b/tools/download_mnist.py
@@ -8,7 +8,7 @@ from urllib.request import urlretrieve
 
 MIRRORS = [
     "http://yann.lecun.com/exdb/mnist/",
-    "https://ossci-datasets.s3.amazonaws.com/mnist/",
+    "https://ossci-datasets.s3.amazonaws.com/mnist/",  # @lint-ignore
 ]
 
 RESOURCES = [
diff --git a/tools/stats/upload_external_contrib_stats.py b/tools/stats/upload_external_contrib_stats.py
index 93634c4ad5e..6de0e495214 100644
--- a/tools/stats/upload_external_contrib_stats.py
+++ b/tools/stats/upload_external_contrib_stats.py
@@ -81,7 +81,7 @@ def get_external_pr_data(
             response = cast(
                 dict[str, Any],
                 fetch_json(
-                    "https://api.github.com/search/issues",
+                    "https://api.github.com/search/issues",  # @lint-ignore
                     params={
                         "q": f'repo:pytorch/pytorch is:pr is:closed \
                             label:"open source" label:Merged -label:Reverted closed:{period_begin_date}..{period_end_date}',
diff --git a/tools/stats/utilization_stats_lib.py b/tools/stats/utilization_stats_lib.py
index 50bb9312c05..424808f7be7 100644
--- a/tools/stats/utilization_stats_lib.py
+++ b/tools/stats/utilization_stats_lib.py
@@ -87,7 +87,7 @@ class OssCiUtilizationMetadataV1:
 
 
 # this data model is for the time series data:
-# https://github.com/pytorch/test-infra/blob/main/clickhouse_db_schema/oss_ci_utilization/oss_ci_utilization_time_series_schema.sql
+# https://github.com/pytorch/test-infra/blob/main/clickhouse_db_schema/oss_ci_utilization/oss_ci_time_series_schema.sql
 @dataclass
 class OssCiUtilizationTimeSeriesV1:
     created_at: int
diff --git a/tools/test/test_create_alerts.py b/tools/test/test_create_alerts.py
index 11afebf8557..56a81603638 100644
--- a/tools/test/test_create_alerts.py
+++ b/tools/test/test_create_alerts.py
@@ -12,7 +12,7 @@ MOCK_TEST_DATA = [
         "sha": "f02f3046571d21b48af3067e308a1e0f29b43af9",
         "id": 7819529276,
         "conclusion": "failure",
-        "htmlUrl": "https://github.com/pytorch/pytorch/runs/7819529276?check_suite_focus=true",
+        "htmlUrl": "https://github.com/pytorch/pytorch/runs/7819529276?check_suite_focus=true",  # @lint-ignore
         "logUrl": "https://ossci-raw-job-status.s3.amazonaws.com/log/7819529276",
         "durationS": 14876,
         "failureLine": "##[error]The action has timed out.",
@@ -25,7 +25,7 @@ MOCK_TEST_DATA = [
         "sha": "d0d6b1f2222bf90f478796d84a525869898f55b6",
         "id": 7818399623,
         "conclusion": "failure",
-        "htmlUrl": "https://github.com/pytorch/pytorch/runs/7818399623?check_suite_focus=true",
+        "htmlUrl": "https://github.com/pytorch/pytorch/runs/7818399623?check_suite_focus=true",  # @lint-ignore
         "logUrl": "https://ossci-raw-job-status.s3.amazonaws.com/log/7818399623",
         "durationS": 14882,
         "failureLine": "##[error]The action has timed out.",
diff --git a/tools/testing/upload_artifacts.py b/tools/testing/upload_artifacts.py
index 4ebfd03a146..a8b6d15fb39 100644
--- a/tools/testing/upload_artifacts.py
+++ b/tools/testing/upload_artifacts.py
@@ -94,7 +94,7 @@ def trigger_upload_test_stats_intermediate_workflow() -> None:
     # The GITHUB_TOKEN cannot trigger workflow so this isn't used for now
     print("Triggering upload_test_stats_intermediate workflow")
     x = requests.post(
-        "https://api.github.com/repos/pytorch/pytorch/actions/workflows/upload_test_stats_intermediate.yml/dispatches",
+        "https://api.github.com/repos/pytorch/pytorch/actions/workflows/upload_test_stats_intermediate.yml/dispatches",  # noqa: B950 @lint-ignore
         headers={
             "Accept": "application/vnd.github.v3+json",
             "Authorization": f"Bearer {os.environ.get('GITHUB_TOKEN')}",
diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in
index 089bd38f057..3bbd7f628ba 100644
--- a/torch/_C/__init__.pyi.in
+++ b/torch/_C/__init__.pyi.in
@@ -1267,7 +1267,7 @@ def _should_allow_numbers_as_tensors(func_name: str) -> _bool: ...
 def _group_tensors_by_device_and_dtype(nested_tensorlists: List[List[Optional[Tensor]]], with_indices: _bool = False) -> Dict[Tuple[torch.device, torch.dtype], Tuple[List[List[Optional[Tensor]]], List[_int]]]: ...
 
 # NB: There is no Capsule type in typing, see
-# https://code.activestate.com/lists/python-dev/139675/
+# https://github.com/python/cpython/issues/109562
 def _to_dlpack(data: Tensor) -> Any: ...  # THPModule_toDLPack
 def _from_dlpack(data: Any) -> Tensor: ...  # THPModule_fromDLPack
 def _get_cpp_backtrace(
diff --git a/torch/_appdirs.py b/torch/_appdirs.py
index 64d81139d7a..291963f6f6f 100644
--- a/torch/_appdirs.py
+++ b/torch/_appdirs.py
@@ -41,9 +41,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 See <https://github.com/ActiveState/appdirs> for details and usage.
 """
 # Dev Notes:
-# - MSDN on where to store app data files:
-#   http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
-# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
+# - Windows "Known Folders": https://learn.microsoft.com/en-us/windows/win32/shell/csidl
+# - macOS File System Programming Guide: https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/Introduction/Introduction.html
 # - XDG spec for Un*x: https://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
 
 __version__ = "1.4.4"
diff --git a/torch/_decomp/decompositions.py b/torch/_decomp/decompositions.py
index b7c3fb0c9db..c57d240c4a1 100644
--- a/torch/_decomp/decompositions.py
+++ b/torch/_decomp/decompositions.py
@@ -4389,8 +4389,7 @@ def should_fold(tensor1: torch.Tensor, tensor2: torch.Tensor, is_out: bool) -> b
     t1_stride = t1.stride()
 
     # Check the contiguous, we can skip the dim with size of 1
-    # as aten: https://github.com/pytorch/pytorch/blob/
-    # e201460f8aa1510b4c4686627d57b69756c4b916/aten/src/ATen/TensorGeometry.cpp#L17
+    # as aten: https://github.com/pytorch/pytorch/blob/e201460f8aa1510b4c4686627d57b69756c4b916/aten/src/ATen/TensorGeometry.cpp#L17
     expected_stride = [1]
     for size in reversed(t1_shape[1:]):
         expected_stride.append(size * expected_stride[-1])
diff --git a/torch/_inductor/codegen/cpp.py b/torch/_inductor/codegen/cpp.py
index 28a546eb1c5..d406880df3f 100644
--- a/torch/_inductor/codegen/cpp.py
+++ b/torch/_inductor/codegen/cpp.py
@@ -4881,9 +4881,8 @@ class CppScheduling(BaseScheduling):
                 len(get_call_ranges(_node)) == node.outer_loop_fusion_depth + 1
                 for _node in node.get_outer_nodes()
             ):
-                # Ref to the typical case of local buffer
-                # in https://github.com/pytorch/pytorch/blob/
-                # 1115a25c36340554442f28f9570abd42f0aface2/aten/src/ATen/native/cpu/SoftMaxKernel.cpp#L159
+                # Ref to the typical case of local buffer in
+                # https://github.com/pytorch/pytorch/blob/1115a25c36340554442f28f9570abd42f0aface2/aten/src/ATen/native/cpu/SoftMaxKernel.cpp#L159 # noqa: B950
                 # where the buffer is with size of last dim and contiguous.
                 # Only support this typical case at first.
                 visited_scheduler_nodes = OrderedSet[str]()
diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py
index d9a3fae9220..8f3ddb77129 100644
--- a/torch/_inductor/codegen/triton.py
+++ b/torch/_inductor/codegen/triton.py
@@ -504,7 +504,7 @@ class BlockPtrOptions:
 def triton_reshape(
     value: str, old_shape: Sequence[sympy.Expr], new_shape: Sequence[sympy.Expr]
 ) -> str:
-    """Workaround https://github.com/openai/triton/issues/2836"""
+    """Workaround https://github.com/triton-lang/triton/issues/2836"""
     assert isinstance(old_shape, list) and isinstance(new_shape, list)
 
     old_shape_str = [V.kernel.index_to_str(shape) for shape in old_shape]
@@ -841,7 +841,7 @@ class TritonOverrides(OpOverrides):
 
             # fp8 data type conversions has min_elem_per_thread requirements.
             # Refer to Triton implementations here:
-            # https://github.com/openai/triton/blob/10f59d8ce04052521c1bc0cb3a3f8b98918fc7e3/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp#L10.
+            # https://github.com/triton-lang/triton/blob/10f59d8ce04052521c1bc0cb3a3f8b98918fc7e3/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp#L10.
             fp8_dtypes = (
                 torch.float8_e4m3fn,
                 torch.float8_e5m2,
@@ -1828,7 +1828,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
             and len(mask_vars - dense_mask_vars) == 0
             and not self.is_indirect_indexing(index)
             and have_loop_vars
-            # workaround https://github.com/openai/triton/issues/2821
+            # workaround https://github.com/triton-lang/triton/issues/2821
             and self.index_dtype == "tl.int32"
         ):
 
@@ -2053,7 +2053,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
     ) -> tuple[str, str]:
         check = indexing.boundary_check()
         if not check:
-            # workaround https://github.com/openai/triton/issues/2813
+            # workaround https://github.com/triton-lang/triton/issues/2813
             other = ""
         elif other:
             assert other == ", other=0.0"
@@ -2114,7 +2114,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
             value, indexing.final_shape, indexing.block_shape, False
         )
 
-        # workaround https://github.com/openai/triton/issues/2814
+        # workaround https://github.com/triton-lang/triton/issues/2814
         value = f"{value}.to({triton_store_type(V.graph.get_dtype(name))})"
         return f"tl.store({block_ptr}, {value}{other})"
 
@@ -2260,7 +2260,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
                 line += ".to(tl.float32)"
                 dtype = torch.float32
             if dtype == torch.bool and torch.version.hip is None:
-                # Workaround for https://github.com/openai/triton/issues/2151
+                # Workaround for https://github.com/triton-lang/triton/issues/2151
                 # tl.load returns int8 when loading from pointer to int1
                 # NOTE: Currently causes hangs on bool UTs for ROCm
                 line += ".to(tl.int1)"
@@ -2302,7 +2302,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
         indexing = self.indexing(index, dense_indexing=True, block_ptr=mode is None)
 
         # Guard against write-after-read corruption in triton.
-        # See # https://github.com/openai/triton/issues/1615
+        # See # https://github.com/triton-lang/triton/issues/1615
         # This triton bug means that a load which is broadcasted over multiple
         # warps may see the result of a store that happens later in the triton
         # program. The workaround is to add a barrier before storing, which
@@ -3655,7 +3655,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
         # when they are not constexpr. otherwise there may be a segfault
         # during launching the Inductor-compiled Triton kernel.
         # https://github.com/pytorch/pytorch/issues/120478#issuecomment-1962822307
-        # https://github.com/openai/triton/blob/231efe9ed2d200be0f69a07c298e4342b08efe3d/python/triton/runtime/jit.py#L384
+        # https://github.com/triton-lang/triton/blob/231efe9ed2d200be0f69a07c298e4342b08efe3d/python/triton/runtime/jit.py#L384
         for arg_num in equal_1_arg_indices(signature):  # type: ignore[index]
             triton_meta["constants"][signature[arg_num].name] = 1  # type: ignore[index,union-attr]
 
diff --git a/torch/_inductor/codegen/triton_utils.py b/torch/_inductor/codegen/triton_utils.py
index ddd4ec51551..7f4d72ee71b 100644
--- a/torch/_inductor/codegen/triton_utils.py
+++ b/torch/_inductor/codegen/triton_utils.py
@@ -34,7 +34,7 @@ def should_unwrap_unspec_arg(name: str):
 def signature_of(arg: KernelArgType, *, size_dtype: Optional[str]) -> str:
     if isinstance(arg, TensorArg):
         # TODO: Remove fp8 special handling when Triton supports PyTorch fp8 dtypes.
-        # Related PR: https://github.com/openai/triton/pull/2279/
+        # Related PR: https://github.com/triton-lang/triton/pull/2279/
         if arg.dtype == torch.float8_e4m3fn:
             tye = "*fp8e4nv"
         elif arg.dtype == torch.float8_e5m2:
@@ -184,7 +184,7 @@ def config_of(
     def is_aligned(x: KernelArgType, alignment: int, include_tensor: bool) -> bool:
         """
         Roughly follow triton code here:
-        https://github.com/openai/triton/blob/5282ed890d453e10b9ee30076ef89115dd197761/python/triton/runtime/jit.py#L208-L222
+        https://github.com/triton-lang/triton/blob/5282ed890d453e10b9ee30076ef89115dd197761/python/triton/runtime/jit.py#L208-L222
         """
         if isinstance(x, TensorArg):
             if include_tensor:
diff --git a/torch/_inductor/codegen/wrapper.py b/torch/_inductor/codegen/wrapper.py
index 94aab698e23..906ddbf3b6c 100644
--- a/torch/_inductor/codegen/wrapper.py
+++ b/torch/_inductor/codegen/wrapper.py
@@ -1985,7 +1985,7 @@ class PythonWrapperCodegen(CodeGen):
             # TODO(aakhundov): add None args to constants, too. currently, this
             # causes CUDA errors in test_aot_inductor.test_triton_kernel_with_none_input.
             # https://github.com/pytorch/pytorch/issues/120478#issuecomment-1962822307
-            # https://github.com/openai/triton/blob/231efe9ed2d200be0f69a07c298e4342b08efe3d/python/triton/runtime/jit.py#L384
+            # https://github.com/triton-lang/triton/blob/231efe9ed2d200be0f69a07c298e4342b08efe3d/python/triton/runtime/jit.py#L384
             "constants": {
                 **constants,
                 **dict.fromkeys(equal_to_1_args, 1),
diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py
index bef6783ad33..aa6729ebfcc 100644
--- a/torch/_inductor/config.py
+++ b/torch/_inductor/config.py
@@ -1158,7 +1158,7 @@ class triton:
     # of registers being benchmarked.
     #
     # NOTE: triton will always report >0 register spills for kernels using sin/cos.
-    # (check this issue https://github.com/openai/triton/issues/1756 )
+    # (check this issue https://github.com/triton-lang/triton/issues/1756 )
     # So far we see a fixed 8 spilled registers for kernels using sin/cos.
     # Raise the threshold to 16 to be safe.
     # We should revisit this once we understand more of the source of register spills.
diff --git a/torch/_inductor/fx_passes/pad_mm.py b/torch/_inductor/fx_passes/pad_mm.py
index 40c0670a965..655a0e44d24 100644
--- a/torch/_inductor/fx_passes/pad_mm.py
+++ b/torch/_inductor/fx_passes/pad_mm.py
@@ -326,7 +326,7 @@ def should_exclude_padding_time(match: Match, arg_name: str) -> bool:
     if not fetch_fake_tensors(match, (arg_name,))[0].is_contiguous():
         return False
 
-    # TODO - see issue https://githpub.com/pytorch/pytorch/issues/128889
+    # TODO - see issue https://github.com/pytorch/pytorch/issues/128889
     # We would only able to completely plan these out if we were only doing
     # first dimension padding. non-first we would still need a copy
     # because these outputs are fixed dense.
diff --git a/torch/_inductor/fx_passes/quantization.py b/torch/_inductor/fx_passes/quantization.py
index 8df1c1e1f2a..88c5f8497ac 100644
--- a/torch/_inductor/fx_passes/quantization.py
+++ b/torch/_inductor/fx_passes/quantization.py
@@ -2185,8 +2185,7 @@ def _register_qlinear_weight_prepack():
     # Step 2: register patterns from bmm
     # Linear might be decomposed into bmm when input dim exceeds 2 and not contiguous
     # refer to:
-    # https://github.com/pytorch/pytorch/blob/
-    # 80c07df659362a95da7cd4f3ec367abfdace38c4/torch/_decomp/decompositions.py#L3965-L3968
+    # https://github.com/pytorch/pytorch/blob/80c07df659362a95da7cd4f3ec367abfdace38c4/torch/_decomp/decompositions.py#L3965-L3968
     # in this case, we can convert it back to qlinear
     for dtype, with_bias, is_tensor_overload in itertools.product(
         [torch.float32, torch.bfloat16], [True, False], [True, False]
diff --git a/torch/_inductor/kernel/conv.py b/torch/_inductor/kernel/conv.py
index 9e6c5e8d42b..4b14989c372 100644
--- a/torch/_inductor/kernel/conv.py
+++ b/torch/_inductor/kernel/conv.py
@@ -620,7 +620,7 @@ def convolution(
                     PADDING_W=padding[1],
                     GROUPS=groups,
                     # TODO(jansel): try unroll for bigger kernels once fixed:
-                    #               https://github.com/openai/triton/issues/1254
+                    #               https://github.com/triton-lang/triton/issues/1254
                     UNROLL=is_ones(kernel_shape),
                     ALLOW_TF32=torch.backends.cudnn.allow_tf32,
                     num_stages=cfg.num_stages,
@@ -643,7 +643,7 @@ def convolution(
                     PADDING_W=padding[2],
                     GROUPS=groups,
                     # TODO(jansel): try unroll for bigger kernels once fixed:
-                    #               https://github.com/openai/triton/issues/1254
+                    #               https://github.com/triton-lang/triton/issues/1254
                     UNROLL=is_ones(kernel_shape),
                     ALLOW_TF32=torch.backends.cudnn.allow_tf32,
                     num_stages=cfg.num_stages,
diff --git a/torch/_inductor/kernel/mm_plus_mm.py b/torch/_inductor/kernel/mm_plus_mm.py
index 2e190595c0d..5447c27f4f0 100644
--- a/torch/_inductor/kernel/mm_plus_mm.py
+++ b/torch/_inductor/kernel/mm_plus_mm.py
@@ -134,7 +134,7 @@ def tuned_mm_plus_mm(mat1, mat2, mat3, mat4, *, layout=None):
         )
     ):
         # TODO(jansel): support different K values when this is fixed:
-        # https://github.com/openai/triton/issues/967
+        # https://github.com/triton-lang/triton/issues/967
         return lowerings[aten.add](
             lowerings[aten.mm](mat1, mat2), lowerings[aten.mm](mat3, mat4)
         )
@@ -151,7 +151,7 @@ def tuned_mm_plus_mm(mat1, mat2, mat3, mat4, *, layout=None):
 
     if use_triton_template(layout1):
         for config in mm_configs():
-            # see https://github.com/openai/triton/issues/1298
+            # see https://github.com/triton-lang/triton/issues/1298
             # BLOCK_K = K causes llvm error
             if V.graph.sizevars.statically_known_lt(config.kwargs["BLOCK_K"], k1):
                 mm_plus_mm_template.maybe_append_choice(
diff --git a/torch/_inductor/lowering.py b/torch/_inductor/lowering.py
index 7b6d2681b70..225600561ed 100644
--- a/torch/_inductor/lowering.py
+++ b/torch/_inductor/lowering.py
@@ -6092,7 +6092,7 @@ def div_mode(a, b, rounding_mode=None):
     both_boolean = is_boolean_type(a) and is_boolean_type(b)
 
     # floordiv and truncdiv need special handling for integer tensors on Triton,
-    # see the discussion at https://github.com/openai/triton/issues/605
+    # see the discussion at https://github.com/triton-lang/triton/issues/605
     if rounding_mode == "floor":
         assert not both_boolean, "floordiv operands can not be boolean at the same time"
         return floordiv(a, b) if both_integer else floor(div(a, b))
diff --git a/torch/_inductor/mkldnn_lowerings.py b/torch/_inductor/mkldnn_lowerings.py
index 06c54c18820..7e364e139ad 100644
--- a/torch/_inductor/mkldnn_lowerings.py
+++ b/torch/_inductor/mkldnn_lowerings.py
@@ -707,8 +707,8 @@ def register_onednn_fusion_ops():
             assert x_zp.get_numel() == 1, "x_zp is incompatible with oneDNN qlinear"
 
             # When channels less than 8, w_scale/w_zp is Pointwise instead of ConstantBuffer
-            # Refer to https://github.com/pytorch/pytorch/blob
-            # /f353d17755ed23b02924c962a86ff99a3405fe10/torch/_inductor/graph.py#L570-L577
+            # Refer to
+            # https://github.com/pytorch/pytorch/blob/f353d17755ed23b02924c962a86ff99a3405fe10/torch/_inductor/graph.py#L570-L577  # noqa: B950
             if w_zp is None:
                 # If w_zp is None, then it's a dummy tensor created to denote the
                 # absence of a zero point, and thus w is int8 symmetrically quantized.
@@ -1018,8 +1018,8 @@ def register_onednn_fusion_ops():
                 x_zp.realize()
 
             # When channels less than 8, w_scale/w_zp is Pointwise instead of ConstantBuffer
-            # Refer to https://github.com/pytorch/pytorch/blob
-            # /f353d17755ed23b02924c962a86ff99a3405fe10/torch/_inductor/graph.py#L570-L577
+            # Refer to
+            # https://github.com/pytorch/pytorch/blob/f353d17755ed23b02924c962a86ff99a3405fe10/torch/_inductor/graph.py#L570-L577  # noqa: B950
             w_scale.realize()
             w_zp.realize()
             if w_zp.get_dtype() != torch.int32 and isinstance(
diff --git a/torch/_inductor/runtime/triton_compat.py b/torch/_inductor/runtime/triton_compat.py
index 831898d8d8f..d6e45b72ce4 100644
--- a/torch/_inductor/runtime/triton_compat.py
+++ b/torch/_inductor/runtime/triton_compat.py
@@ -44,7 +44,7 @@ if triton is not None:
             return (backend, arch)
 
     # In the latest triton, math functions were shuffled around into different modules:
-    # https://github.com/openai/triton/pull/3172
+    # https://github.com/triton-lang/triton/pull/3172
     try:
         from triton.language.extra import libdevice
 
diff --git a/torch/_inductor/runtime/triton_heuristics.py b/torch/_inductor/runtime/triton_heuristics.py
index 0e4557268d8..93fb36e12bb 100644
--- a/torch/_inductor/runtime/triton_heuristics.py
+++ b/torch/_inductor/runtime/triton_heuristics.py
@@ -1472,7 +1472,7 @@ class TritonCompileResult(CompileResult[CompiledKernel]):
                 "metadata",
                 *call_args,
             ]
-        else:  # args after CompiledKernel.launch_metadata: https://github.com/openai/triton/pull/3492
+        else:  # args after CompiledKernel.launch_metadata: https://github.com/triton-lang/triton/pull/3492
             # Getting the kernel launch args is extremely perf-sensitive.  Evaluating
             # `bin.launch_metadata` is relatively expensive, and returns None unless a
             # `launch_enter_hook` is installed.  So if we don't have that hook installed,
diff --git a/torch/_inductor/scheduler.py b/torch/_inductor/scheduler.py
index cffcd22ab46..f97bd0582e7 100644
--- a/torch/_inductor/scheduler.py
+++ b/torch/_inductor/scheduler.py
@@ -4560,7 +4560,7 @@ class Scheduler:
                     )
                     return False
             except CompilationError as e:
-                # workaround triton issue: https://github.com/openai/triton/issues/2151
+                # workaround triton issue: https://github.com/triton-lang/triton/issues/2151
                 if "Loop-carried variable" in str(e):
                     fusion_log.debug(
                         "ComboKernel benchmark: return True because of loop-carried variable"
@@ -4574,7 +4574,7 @@ class Scheduler:
         try:
             ms2, ms2_clone, _path2_list = self.benchmark_combo_kernel(subkernel_nodes)
         except CompilationError as e:
-            # workaround triton issue: https://github.com/openai/triton/issues/2151
+            # workaround triton issue: https://github.com/triton-lang/triton/issues/2151
             if "Loop-carried variable" in str(e):
                 fusion_log.debug(
                     "ComboKernel benchmark: return True because of loop-carried variable"
diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py
index 855b81c9d20..d48206f091f 100644
--- a/torch/_inductor/utils.py
+++ b/torch/_inductor/utils.py
@@ -390,7 +390,7 @@ def ceildiv(
 def _type_of(key: Optional[torch.dtype]) -> str:
     # Use the function here to get rid of dependencies on the Triton during the codegen.
     # Refer to Triton implementation here:
-    # https://github.com/openai/triton/blob/98b5945d2aef679e00ebca8e07c35c3658ec76de/python/triton/runtime/jit.py#L238
+    # https://github.com/triton-lang/triton/blob/98b5945d2aef679e00ebca8e07c35c3658ec76de/python/triton/runtime/jit.py#L238
     # `None` is nullptr.  Implicitly convert to *i8.
     if key is None:
         return "*i8"
@@ -1981,7 +1981,7 @@ def get_device_tflops(dtype: torch.dtype) -> int:
     assert dtype in (torch.float16, torch.bfloat16, torch.float32)
 
     if inspect.signature(get_max_simd_tflops).parameters.get("clock_rate"):
-        # Triton API change in https://github.com/openai/triton/pull/2293
+        # Triton API change in https://github.com/triton-lang/triton/pull/2293
         from torch._utils_internal import max_clock_rate
 
         sm_clock = max_clock_rate()
diff --git a/torch/_numpy/_ndarray.py b/torch/_numpy/_ndarray.py
index 20ebd9db818..fe2410a9f4e 100644
--- a/torch/_numpy/_ndarray.py
+++ b/torch/_numpy/_ndarray.py
@@ -435,7 +435,7 @@ class ndarray:
     def item(self, *args):
         # Mimic NumPy's implementation with three special cases (no arguments,
         # a flat index and a multi-index):
-        # https://github.com/numpy/numpy/blob/main/numpy/core/src/multiarray/methods.c#L702
+        # https://github.com/numpy/numpy/blob/main/numpy/_core/src/multiarray/methods.c#L702
         if args == ():
             return self.tensor.item()
         elif len(args) == 1:
diff --git a/torch/_tensor.py b/torch/_tensor.py
index 5bf70c2eca8..271a76111b6 100644
--- a/torch/_tensor.py
+++ b/torch/_tensor.py
@@ -1262,7 +1262,7 @@ class Tensor(torch._C.TensorBase):
         """Array view description for cuda tensors.
 
         See:
-        https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html
+        https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html
         """
         if has_torch_function_unary(self):
             # TODO mypy doesn't support @property, see: https://github.com/python/mypy/issues/6185
diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py
index 188c7198666..2a4d684ba85 100644
--- a/torch/_tensor_docs.py
+++ b/torch/_tensor_docs.py
@@ -4163,9 +4163,9 @@ Unlike :meth:`~Tensor.expand`, this function copies the tensor's data.
 .. warning::
 
     :meth:`~Tensor.repeat` behaves differently from
-    `numpy.repeat <https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html>`_,
+    `numpy.repeat <https://numpy.org/doc/stable/reference/generated/numpy.repeat.html>`_,
     but is more similar to
-    `numpy.tile <https://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html>`_.
+    `numpy.tile <https://numpy.org/doc/stable/reference/generated/numpy.tile.html>`_.
     For the operator similar to `numpy.repeat`, see :func:`torch.repeat_interleave`.
 
 Args:
diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py
index 2b046844e9e..4570dd81b94 100644
--- a/torch/_torch_docs.py
+++ b/torch/_torch_docs.py
@@ -767,7 +767,7 @@ This function checks if :attr:`input` and :attr:`other` satisfy the condition:
 """
     + r"""
 elementwise, for all elements of :attr:`input` and :attr:`other`. The behaviour of this function is analogous to
-`numpy.allclose <https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html>`_
+`numpy.allclose <https://numpy.org/doc/stable/reference/generated/numpy.allclose.html>`_
 
 Args:
     input (Tensor): first tensor to compare
@@ -13826,7 +13826,7 @@ Returns the indices of the buckets to which each value in the :attr:`input` belo
 boundaries of the buckets are set by :attr:`boundaries`. Return a new tensor with the same size
 as :attr:`input`. If :attr:`right` is False (default), then the left boundary is open. Note that
 this behavior is opposite the behavior of
-`numpy.digitize <https://docs.scipy.org/doc/numpy/reference/generated/numpy.digitize.html>`_.
+`numpy.digitize <https://numpy.org/doc/stable/reference/generated/numpy.digitize.html>`_.
 More formally, the returned index satisfies the following rules:
 
 .. list-table::
diff --git a/torch/_vmap_internals.py b/torch/_vmap_internals.py
index 1ea8f520123..6baee77ade5 100644
--- a/torch/_vmap_internals.py
+++ b/torch/_vmap_internals.py
@@ -219,7 +219,7 @@ def _vmap(
     # The `allow_none_pass_through` argument is a temporary workaround may be removed.
     # Currently it enables us to wrap the call in `autograd.grad` to the autograd engine,
     # which may return None if any of the inputs are unused. See the issue discussing this:
-    # https://github.com/facebookresearch/functorch/issues/159.
+    # https://github.com/pytorch/functorch/issues/159.
     @functools.wraps(func)
     def wrapped(*args):
         _check_out_dims_is_int_or_int_tuple(out_dims, func)
diff --git a/torch/ao/quantization/quantizer/x86_inductor_quantizer.py b/torch/ao/quantization/quantizer/x86_inductor_quantizer.py
index 3f91c2ddd13..df4d94b3fbf 100644
--- a/torch/ao/quantization/quantizer/x86_inductor_quantizer.py
+++ b/torch/ao/quantization/quantizer/x86_inductor_quantizer.py
@@ -703,8 +703,8 @@ class X86InductorQuantizer(Quantizer):
         # Once we've annotated the model with quantization configurations, we also need to annotate
         # the output of quantizable operations. For example, if we annotated `maxpool2d` to quantize its inputs,
         # we will quantize its output accordingly. This enables us to fuse the dq-operator-q into a quantized op.
-        # Refer to https://github.com/intel/intel-extension-for-pytorch/blob/
-        # 90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_recipe.py#L487
+        # Refer to
+        # https://github.com/intel/intel-extension-for-pytorch/blob/90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_recipe.py#L487  # noqa: B950
 
         self._annotate_output_for_int8_in_int8_out_pattern_entry(model)
 
@@ -732,8 +732,8 @@ class X86InductorQuantizer(Quantizer):
 
         # Step2: Recipe to propagate annotation for patterns beside conv/linear.
         # Go through all the nodes from start to end.
-        # Recipe refer to https://github.com/intel/intel-extension-for-pytorch/blob/
-        # 90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_recipe.py#L538
+        # Recipe refer to
+        # https://github.com/intel/intel-extension-for-pytorch/blob/90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_recipe.py#L538  # noqa: B950
 
         self._annotate_propagation_quantizable_pattern_entry(
             model, quantization_config, filter_fn
@@ -1381,9 +1381,9 @@ class X86InductorQuantizer(Quantizer):
     ) -> None:
         r"""
         Check and insert observer at output of node in int8_in_int8_out_ops if needed.
-        Recipe refers to https://github.com/intel/intel-extension-for-pytorch/blob/
-        90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_utils.py#L495
-        """
+        Recipe refers to
+        https://github.com/intel/intel-extension-for-pytorch/blob/90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_utils.py#L495
+        """  # noqa: B950
         edge_or_node: tuple[Node, Node]
         if (node.target in int8_in_int8_out_ops) and (_is_any_annotated([node])):
             if node.target == torch.ops.aten.max_pool2d.default:
diff --git a/torch/csrc/PyInterpreter.cpp b/torch/csrc/PyInterpreter.cpp
index ce7414d31b7..f944bb5c546 100644
--- a/torch/csrc/PyInterpreter.cpp
+++ b/torch/csrc/PyInterpreter.cpp
@@ -635,7 +635,7 @@ static c10::ArrayRef<T> get_set_cached_attr(
   // is also to <=5 elements, we don't need to reallocate.
   // Note: I tried removing this optimization and tripped ASAN
   // in a batchnorm kernel here:
-  // https://pipelinesghubeus21.actions.githubusercontent.com/mBh68xKhi8LyM7tp3vECvYXNFvuV4gyVGgmYCteuEZP9JH92QN/_apis/pipelines/1/runs/3373307/signedlogcontent/790?urlExpires=2023-09-15T21%3A13%3A51.4327798Z&urlSigningMethod=HMACV1&urlSignature=tDeX7ZqaARVU5NNwyr5yYqqkWq3A2j4z8FFdqYwGr0Q%3D
+  // https://pipelinesghubeus21.actions.githubusercontent.com/mBh68xKhi8LyM7tp3vECvYXNFvuV4gyVGgmYCteuEZP9JH92QN/_apis/pipelines/1/runs/3373307/signedlogcontent/790?urlExpires=2023-09-15T21%3A13%3A51.4327798Z&urlSigningMethod=HMACV1&urlSignature=tDeX7ZqaARVU5NNwyr5yYqqkWq3A2j4z8FFdqYwGr0Q%3D@lint-ignore
   // We should fix this instead.
   bool needs_resize = false;
   // We need to resize if:
diff --git a/torch/csrc/api/src/nn/modules/rnn.cpp b/torch/csrc/api/src/nn/modules/rnn.cpp
index eff69a32a85..da1ab02aa6a 100644
--- a/torch/csrc/api/src/nn/modules/rnn.cpp
+++ b/torch/csrc/api/src/nn/modules/rnn.cpp
@@ -21,7 +21,7 @@ using namespace torch::nn::utils::rnn;
 namespace torch::nn {
 
 /// These must line up with the CUDNN mode codes:
-/// https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnRNNMode_t
+/// https://docs.nvidia.com/deeplearning/cudnn/backend/latest/api/cudnn-adv-library.html#cudnnrnnmode-t
 enum class CuDNNMode { RNN_RELU = 0, RNN_TANH = 1, LSTM = 2, GRU = 3 };
 
 static CuDNNMode get_cudnn_mode_for_rnn(
diff --git a/torch/csrc/autograd/FunctionsManual.cpp b/torch/csrc/autograd/FunctionsManual.cpp
index 498259c8fa1..aaaadc49672 100644
--- a/torch/csrc/autograd/FunctionsManual.cpp
+++ b/torch/csrc/autograd/FunctionsManual.cpp
@@ -891,8 +891,8 @@ Tensor logcumsumexp_backward(
     return grad;
   }
 
-  // Reference: https://github.com/tensorflow/tensorflow/blob/
-  // 2a5910906a0e0f3dbc186ff9db6386d81a63448c/tensorflow/python/ops/math_grad.py#L1832-L1863
+  // Reference:
+  // https://github.com/tensorflow/tensorflow/blob/2a5910906a0e0f3dbc186ff9db6386d81a63448c/tensorflow/python/ops/math_grad.py#L1832-L1863
 
   auto scalar_min = AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(
       at::ScalarType::BFloat16,
diff --git a/torch/csrc/jit/codegen/onednn/LlgaTensorImpl.h b/torch/csrc/jit/codegen/onednn/LlgaTensorImpl.h
index 9b38cd525e7..24be190ec53 100644
--- a/torch/csrc/jit/codegen/onednn/LlgaTensorImpl.h
+++ b/torch/csrc/jit/codegen/onednn/LlgaTensorImpl.h
@@ -15,7 +15,8 @@ namespace torch::jit::fuser::onednn {
 // being created for each device. The device handle passed from PyTorch allows
 // oneDNN Graph implementation to work on the device specified by PyTorch, which
 // is currently CPU, so we only have one engine.
-// Ref: https://spec.oneapi.io/onednn-graph/latest/programming_model.html#engine
+// Ref:
+// https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onednn/source/graph/programming_model#engine
 struct Engine {
   // CPU engine singleton
   static dnnl::engine& getEngine();
diff --git a/torch/csrc/jit/codegen/onednn/README.md b/torch/csrc/jit/codegen/onednn/README.md
index e3f3ec66734..fb309abc3bc 100644
--- a/torch/csrc/jit/codegen/onednn/README.md
+++ b/torch/csrc/jit/codegen/onednn/README.md
@@ -1,5 +1,5 @@
 # Pytorch - oneDNN Graph API Bridge
-This is a PyTorch JIT graph fuser based on [oneDNN Graph API](https://spec.oneapi.io/onednn-graph/latest/programming_model.html), which provides a flexible API for aggressive fusion. Float & BFloat16 inference is supported. However, BFloat16 only performs well on Intel Xeon Cooper Lake platform & beyond, as they have native BFloat16 support. Also, currently, PyTorch has divergent AMP support in JIT & eager modes, so one should disable JIT AMP support & leverage eager mode AMP support to use BFloat16. Please refer to the BFloat16 example below.
+This is a PyTorch JIT graph fuser based on [oneDNN Graph API](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onednn/source/graph/programming_model), which provides a flexible API for aggressive fusion. Float & BFloat16 inference is supported. However, BFloat16 only performs well on Intel Xeon Cooper Lake platform & beyond, as they have native BFloat16 support. Also, currently, PyTorch has divergent AMP support in JIT & eager modes, so one should disable JIT AMP support & leverage eager mode AMP support to use BFloat16. Please refer to the BFloat16 example below.
 
 Currently, speedup is achieved only for static shapes, although we'd soon add dynamic-shape support. When oneDNN Graph is enabled, weights are cached, as they're constant during inference.
 
@@ -29,7 +29,7 @@ We have registered optimization passes in the custom pre-passes set of PyTorch:
 
 ## Graph Executor
 During runtime execution of a (re-written) PyTorch JIT graph, oneDNN graph partitions will be dispatched to the oneDNN graph JIT variadic Operator.
-Inside the oneDNN graph JIT Op, input PyTorch tensors of each partition will be mapped to oneDNN graph tensors. The partition will then be [compiled](https://spec.oneapi.io/onednn-graph/latest/programming_model.html#partition) and [executed](https://spec.oneapi.io/onednn-graph/latest/programming_model.html#compiled-partition). The output oneDNN graph tensor will be mapped back to PyTorch tensors to be fed to the next operator on the PyTorch JIT graph.
+Inside the oneDNN graph JIT Op, input PyTorch tensors of each partition will be mapped to oneDNN graph tensors. The partition will then be [compiled](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onednn/source/graph/programming_model#partition) and [executed](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onednn/source/graph/programming_model#compiled-partition). The output oneDNN graph tensor will be mapped back to PyTorch tensors to be fed to the next operator on the PyTorch JIT graph.
 
 
 ## Tests
diff --git a/torch/csrc/jit/tensorexpr/expr.cpp b/torch/csrc/jit/tensorexpr/expr.cpp
index ece08a2f08b..52701665d06 100644
--- a/torch/csrc/jit/tensorexpr/expr.cpp
+++ b/torch/csrc/jit/tensorexpr/expr.cpp
@@ -143,7 +143,7 @@ ExprHandle abs(const ExprHandle& v) {
 }
 
 // The default tanh is quite slow, use the Eigen version from here:
-// https://bitbucket.org/eigen/eigen/src/94875feeeeb9abe5509b314197da1991ba2070f5/Eigen/src/Core/MathFunctionsImpl.h#lines-26
+// https://github.com/TUW-VieVS/VieSchedpp/blob/master/Eigen/src/Core/MathFunctionsImpl.h#L26
 ExprHandle fast_tanh(const ExprHandle& v) {
   // TODO: use a dedicated bind-var to make sure v is not evaluated multiple
   // times. Clamp the input expression to [-9, 9]
@@ -205,7 +205,7 @@ ExprHandle fast_sigmoid(const ExprHandle& x) {
 
 ExprHandle fast_log(const ExprHandle& v) {
   // this implementation is taken from sleef:
-  // https://github.com/shibatch/sleef/blob/master/src/libm/sleefsp.c#L1131
+  // https://github.com/shibatch/sleef/blob/master/src/libm/sleefsimdsp.c#L1277
   // to generate coefficients, this tool is provided
   // https://github.com/shibatch/sleef/blob/master/src/gencoef/gencoef.txt
   auto ilogb2kf = [](const ExprHandle& x) {
diff --git a/torch/csrc/lazy/core/cache.h b/torch/csrc/lazy/core/cache.h
index 5b2160c6778..6aad77b85e5 100644
--- a/torch/csrc/lazy/core/cache.h
+++ b/torch/csrc/lazy/core/cache.h
@@ -1,6 +1,6 @@
 /**
  * Cache utils in this file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/cache.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/cache.h
  */
 
 #pragma once
diff --git a/torch/csrc/lazy/core/metrics.h b/torch/csrc/lazy/core/metrics.h
index 05b525778d9..83b388d7740 100644
--- a/torch/csrc/lazy/core/metrics.h
+++ b/torch/csrc/lazy/core/metrics.h
@@ -1,6 +1,6 @@
 /**
  * This file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/metrics.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/metrics.h
  */
 
 #pragma once
diff --git a/torch/csrc/lazy/core/multi_wait.h b/torch/csrc/lazy/core/multi_wait.h
index a3a33ee3975..df3faf8d806 100644
--- a/torch/csrc/lazy/core/multi_wait.h
+++ b/torch/csrc/lazy/core/multi_wait.h
@@ -1,6 +1,6 @@
 /**
  * This file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/multi_wait.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/multi_wait.h
  */
 
 #pragma once
diff --git a/torch/csrc/lazy/core/thread_pool.h b/torch/csrc/lazy/core/thread_pool.h
index 2e0ae8f89d8..5bffe6ca3a0 100644
--- a/torch/csrc/lazy/core/thread_pool.h
+++ b/torch/csrc/lazy/core/thread_pool.h
@@ -1,6 +1,6 @@
 /**
  * This file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/metrics.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/metrics.h
  */
 
 #pragma once
diff --git a/torch/csrc/lazy/core/unique.h b/torch/csrc/lazy/core/unique.h
index 7f38c258658..1375f45aa19 100644
--- a/torch/csrc/lazy/core/unique.h
+++ b/torch/csrc/lazy/core/unique.h
@@ -1,6 +1,6 @@
 /**
  * Unique in this file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/unique.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/unique.h
  */
 
 #pragma once
diff --git a/torch/csrc/lazy/core/util.h b/torch/csrc/lazy/core/util.h
index 694cda379a2..865b28d8f3c 100644
--- a/torch/csrc/lazy/core/util.h
+++ b/torch/csrc/lazy/core/util.h
@@ -1,6 +1,6 @@
 /**
  * Most of the utils in this file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/util.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/util.h
  */
 
 #pragma once
diff --git a/torch/distributed/_tools/sac_estimator.py b/torch/distributed/_tools/sac_estimator.py
index 2c1f4f5e937..962f5864c22 100644
--- a/torch/distributed/_tools/sac_estimator.py
+++ b/torch/distributed/_tools/sac_estimator.py
@@ -50,7 +50,7 @@ def _display_stats_tabular(headers: list[str], table_data: list[list[Any]]) -> N
 
 
 # Based on:
-# https://github.com/fairinternal/xformers/blob/0ded5697a2ea15711ce45131002d04e72053cc6d/xformers/checkpoint.py#L62
+# https://github.com/facebookresearch/xformers/blob/main/xformers/checkpoint.py#L71
 @dataclass
 class _SACMetadata:
     """
diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py
index 5b5ff3434ad..5db84f50b5a 100644
--- a/torch/distributed/distributed_c10d.py
+++ b/torch/distributed/distributed_c10d.py
@@ -5142,9 +5142,9 @@ def new_group(
         group, they must be synchronized with other cuda streams by calling `work.wait()`
         before using another process group.
 
-        See `Using multiple NCCL communicators concurrently <https://docs.nvid
-        ia.com/deeplearning/nccl/user-guide/docs/usage/communicators.html#using
-        -multiple-nccl-communicators-concurrently>`_ for more details.
+        See `Using multiple NCCL communicators concurrently
+        <https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/communicators.html#using-multiple-nccl-communicators-concurrently>`
+        for more details.
 
     Args:
         ranks (list[int]): List of ranks of group members. If ``None``, will be
@@ -5163,10 +5163,9 @@ def new_group(
             the construction of specific process groups. i.e. for the ``nccl``
             backend, ``is_high_priority_stream`` can be specified so that
             process group can pick up high priority cuda streams. For other availble options to config nccl,
-            See https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/types.html#ncclconfig-t
-        use_local_synchronization (bool, optional): perform a group-local
-            barrier at the end of the process group creation. This is different
-            in that non-member ranks don't need to call into API and don't
+            See https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/types.html#ncclconfig-tuse_local_synchronization
+            (bool, optional): perform a group-local barrier at the end of the process group creation.
+            This is different in that non-member ranks don't need to call into API and don't
             join the barrier.
         group_desc (str, optional): a string to describe the process group.
         device_id (torch.device, optional): a single, specific device
diff --git a/torch/jit/annotations.py b/torch/jit/annotations.py
index 922a177b203..48d5eb8a644 100644
--- a/torch/jit/annotations.py
+++ b/torch/jit/annotations.py
@@ -331,7 +331,7 @@ def try_real_annotations(fn, loc):
     try:
         # Note: anything annotated as `Optional[T]` will automatically
         # be returned as `Union[T, None]` per
-        # https://github.com/python/typing/blob/master/src/typing.py#L850
+        # https://github.com/python/cpython/blob/main/Lib/typing.py#L732
         sig = inspect.signature(fn)
     except ValueError:
         return None
diff --git a/torch/linalg/__init__.py b/torch/linalg/__init__.py
index 1aa04204164..4d40718bcd0 100644
--- a/torch/linalg/__init__.py
+++ b/torch/linalg/__init__.py
@@ -329,7 +329,7 @@ Examples::
     tensor(0, dtype=torch.int32)
 
 .. _LAPACK's getrf:
-    https://www.netlib.org/lapack/explore-html/dd/d9a/group__double_g_ecomputational_ga0019443faea08275ca60a734d0593e60.html
+    https://www.netlib.org/lapack/explore-html-3.6.1/dd/d9a/group__double_g_ecomputational_ga0019443faea08275ca60a734d0593e60.html
 """,
 )
 
@@ -967,7 +967,7 @@ Examples::
     tensor([1, 2, 3], dtype=torch.int32)
 
 .. _LAPACK's sytrf:
-    https://www.netlib.org/lapack/explore-html/d3/db6/group__double_s_ycomputational_gad91bde1212277b3e909eb6af7f64858a.html
+    https://www.netlib.org/lapack/explore-html-3.6.1/d3/db6/group__double_s_ycomputational_gad91bde1212277b3e909eb6af7f64858a.html
 """,
 )
 
@@ -1025,7 +1025,7 @@ Examples::
     tensor(0, dtype=torch.int32)
 
 .. _LAPACK's sytrf:
-    https://www.netlib.org/lapack/explore-html/d3/db6/group__double_s_ycomputational_gad91bde1212277b3e909eb6af7f64858a.html
+    https://www.netlib.org/lapack/explore-html-3.6.1/d3/db6/group__double_s_ycomputational_gad91bde1212277b3e909eb6af7f64858a.html
 """,
 )
 
@@ -2513,7 +2513,7 @@ Returns:
     A named tuple `(LU, pivots, info)`.
 
 .. _LAPACK's getrf:
-    https://www.netlib.org/lapack/explore-html/dd/d9a/group__double_g_ecomputational_ga0019443faea08275ca60a734d0593e60.html
+    https://www.netlib.org/lapack/explore-html-3.6.1/dd/d9a/group__double_g_ecomputational_ga0019443faea08275ca60a734d0593e60.html
 """,
 )
 
diff --git a/torch/onnx/symbolic_opset9.py b/torch/onnx/symbolic_opset9.py
index 371745664f4..fa295418504 100644
--- a/torch/onnx/symbolic_opset9.py
+++ b/torch/onnx/symbolic_opset9.py
@@ -5315,7 +5315,7 @@ def index(g: jit_utils.GraphContext, self, index):
         #   2. prim::Constant[value=...] or tensor output
         #           representing advanced indexing. E.g. tensor[[0, 1], [2, 0]].
         # For more info on advanced indexing,
-        # check https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing
+        # check https://numpy.org/doc/stable/user/basics.indexing.html#advanced-indexing
 
         # Consider a general case of
         #       t: [x_1, y_1, y_2, ..., x_m, ..., y_n]
@@ -5389,7 +5389,7 @@ def index(g: jit_utils.GraphContext, self, index):
 
             cum_adv_index_shape_tensor = _shape_as_tensor(g, cum_adv_index)
             # check if all advanced indices are consecutive.
-            # Refer to https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#combining-advanced-and-basic-indexing
+            # Refer to https://numpy.org/doc/stable/user/basics.indexing.html#combining-advanced-and-basic-indexing
             # to understand how the subarray position is decided.
             if adv_idx_indices == list(
                 range(adv_idx_indices[0], adv_idx_indices[-1] + 1)
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
index fdaf31cbdd1..5d03deebcbc 100644
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@@ -20751,7 +20751,6 @@ op_db: list[OpInfo] = [
             DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
             # AssertionError: Tensor-likes are not close!
             # Fails in cuda11.7
-            # Error Log: https://github.com/pytorch/pytorch/actions/runs/3440108478/jobs/5738475757
             DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu', device_type='cuda'),
             DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),),),
     # In training mode, feature_alpha_dropout currently doesn't support inputs of complex dtype
diff --git a/torch/utils/_sympy/functions.py b/torch/utils/_sympy/functions.py
index 39069f3d06c..61c63ece236 100644
--- a/torch/utils/_sympy/functions.py
+++ b/torch/utils/_sympy/functions.py
@@ -342,9 +342,9 @@ class ModularIndexing(sympy.Function):
                         and isinstance(term.args[0], sympy.Integer)
                         and term.args[0] < 0
                     ):
-                        # workaround for https://github.com/openai/triton/issues/619,
+                        # workaround for https://github.com/triton-lang/triton/issues/619,
                         # if there are negative terms, // produces wrong result
-                        # TODO if https://github.com/openai/triton/issues/619 is fixed
+                        # TODO if https://github.com/triton-lang/triton/issues/619 is fixed
                         # this optimization would become valid
                         all_positive = False
                         break
diff --git a/torch/utils/collect_env.py b/torch/utils/collect_env.py
index 747a6c1249c..5fa84295767 100644
--- a/torch/utils/collect_env.py
+++ b/torch/utils/collect_env.py
@@ -199,8 +199,8 @@ def get_cudnn_version(run_lambda):
         cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
     elif get_platform() == 'darwin':
         # CUDA libraries and drivers can be found in /usr/local/cuda/. See
-        # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
-        # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
+        # https://docs.nvidia.com/cuda/archive/9.0/cuda-installation-guide-mac-os-x/index.html#installation
+        # https://docs.nvidia.com/deeplearning/cudnn/installation/latest/
         # Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
         cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
     else:
diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py
index c425e9edc9f..81db11fd285 100644
--- a/torch/utils/hipify/cuda_to_hip_mappings.py
+++ b/torch/utils/hipify/cuda_to_hip_mappings.py
@@ -16,7 +16,7 @@ from .constants import (API_BLAS, API_C10, API_CAFFE2, API_DRIVER, API_FFT,
 
 """ Mapping of CUDA functions, include files, constants, and types to ROCm/HIP equivalents
 This closely follows the implementation in hipify-clang
-https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/src/CUDA2HipMap.cpp
+https://github.com/ROCm/hip/blob/59071b895ed1c86d9698b4c859cefcdd5acda06f/hipify-clang/src/CUDA2HipMap.cpp
 and its structure.
 There are different maps for fundamental names, include files, identifies, sparse, and
 PyTorch specific translations.
diff --git a/torch/utils/tensorboard/_pytorch_graph.py b/torch/utils/tensorboard/_pytorch_graph.py
index 0e9e453183d..85427162fc7 100644
--- a/torch/utils/tensorboard/_pytorch_graph.py
+++ b/torch/utils/tensorboard/_pytorch_graph.py
@@ -341,7 +341,7 @@ def graph(model, args, verbose=False, use_strict_trace=True):
     # and pass it correctly to TensorBoard.
     #
     # Definition of StepStats and DeviceStepStats can be found at
-    # https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/graph/tf_graph_common/test/graph-test.ts
+    # https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/graph/tf_graph_common/proto.ts
     # and
     # https://github.com/tensorflow/tensorboard/blob/master/tensorboard/compat/proto/step_stats.proto
     stepstats = RunMetadata(
diff --git a/torch/utils/tensorboard/writer.py b/torch/utils/tensorboard/writer.py
index a6792c5b8ab..129281cb8ac 100644
--- a/torch/utils/tensorboard/writer.py
+++ b/torch/utils/tensorboard/writer.py
@@ -472,7 +472,7 @@ class SummaryWriter:
             values (torch.Tensor, numpy.ndarray, or string/blobname): Values to build histogram
             global_step (int): Global step value to record
             bins (str): One of {'tensorflow','auto', 'fd', ...}. This determines how the bins are made. You can find
-              other options in: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
+              other options in: https://numpy.org/doc/stable/reference/generated/numpy.histogram.html
             walltime (float): Optional override default walltime (time.time())
               seconds after epoch of event
 
diff --git a/torchgen/_autoheuristic/mixed_mm/get_mixedmm_dataset.sh b/torchgen/_autoheuristic/mixed_mm/get_mixedmm_dataset.sh
index fd50b2e79fb..531b698bde6 100644
--- a/torchgen/_autoheuristic/mixed_mm/get_mixedmm_dataset.sh
+++ b/torchgen/_autoheuristic/mixed_mm/get_mixedmm_dataset.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-base_url='https://github.com/AlnisM/autoheuristic-datasets/raw/main/'
+base_url='https://github.com/AlnisM/autoheuristic-datasets/raw/main/'  # @lint-ignore
 a100_data='mixedmm_a100_data.zip'
 h100_data='mixedmm_h100_data.zip'
 datasets=("${a100_data}" "${h100_data}")
diff --git a/torchgen/_autoheuristic/mm/get_mm_dataset.sh b/torchgen/_autoheuristic/mm/get_mm_dataset.sh
index 7461dec41dd..60280104ebf 100644
--- a/torchgen/_autoheuristic/mm/get_mm_dataset.sh
+++ b/torchgen/_autoheuristic/mm/get_mm_dataset.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-base_url='https://github.com/AlnisM/autoheuristic-datasets/raw/main/'
+base_url='https://github.com/AlnisM/autoheuristic-datasets/raw/main/'  # @lint-ignore
 a100_data='a100_mm.zip'
 h100_data='h100_mm.zip'
 datasets=("${a100_data}" "${h100_data}")
diff --git a/torchgen/_autoheuristic/pad_mm/get_padmm_dataset.sh b/torchgen/_autoheuristic/pad_mm/get_padmm_dataset.sh
index b8ab60d943e..ba8ed904660 100644
--- a/torchgen/_autoheuristic/pad_mm/get_padmm_dataset.sh
+++ b/torchgen/_autoheuristic/pad_mm/get_padmm_dataset.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 a100_zip="pad_mm_a100_data.zip"
-a100_data="https://github.com/AlnisM/autoheuristic-datasets/raw/main/${a100_zip}"
+a100_data="https://github.com/AlnisM/autoheuristic-datasets/raw/main/${a100_zip}"  # @lint-ignore
 rm -f ${a100_zip}
 wget ${a100_data}
 unzip -o ${a100_zip}