Fix broken URLs (#152237)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/152237 Approved by: https://github.com/huydhn, https://github.com/malfet
2025-12-06 00:20:18 +01:00 · 2025-04-27 09:56:42 +00:00 · 2025-04-27 09:56:42 +00:00 · e2f9759bd0
commit e2f9759bd0
parent cbcc03c2ad
115 changed files with 176 additions and 181 deletions
--- a/.github/scripts/github_utils.py
+++ b/.github/scripts/github_utils.py
@ -128,7 +128,7 @@ def gh_fetch_json_dict(

 def gh_graphql(query: str, **kwargs: Any) -> dict[str, Any]:
    rc = gh_fetch_url(
-        "https://api.github.com/graphql",
+        "https://api.github.com/graphql",  # @lint-ignore
        data={"query": query, "variables": kwargs},
        reader=json.load,
    )
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -64,7 +64,7 @@ endif()

 # This define is needed to preserve behavior given anticpated changes to
 # cccl/thrust
-# https://nvidia.github.io/libcudacxx/standard_api/numerics_library/complex.html
+# https://nvidia.github.io/cccl/libcudacxx/standard_api/numerics_library/complex.html
 string(APPEND CMAKE_CUDA_FLAGS
       " -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS")

--- a/README.md
+++ b/README.md
@ -194,7 +194,7 @@ If you want to compile with CUDA support, [select a supported version of CUDA fr
 - [NVIDIA cuDNN](https://developer.nvidia.com/cudnn) v8.5 or above
 - [Compiler](https://gist.github.com/ax3l/9489132) compatible with CUDA

-Note: You could refer to the [cuDNN Support Matrix](https://docs.nvidia.com/deeplearning/cudnn/reference/support-matrix.html) for cuDNN versions with the various supported CUDA, CUDA driver and NVIDIA hardware
+Note: You could refer to the [cuDNN Support Matrix](https://docs.nvidia.com/deeplearning/cudnn/backend/latest/reference/support-matrix.html) for cuDNN versions with the various supported CUDA, CUDA driver and NVIDIA hardware

 If you want to disable CUDA support, export the environment variable `USE_CUDA=0`.
 Other potentially useful environment variables may be found in `setup.py`.
--- a/RELEASE.md
+++ b/RELEASE.md
@ -221,7 +221,7 @@ Release candidates are currently stored in the following places:

 * Wheels: https://download.pytorch.org/whl/test/
 * Conda: https://anaconda.org/pytorch-test
-* Libtorch: https://download.pytorch.org/libtorch/test
+* Libtorch: https://download.pytorch.org/libtorch/test <!-- @lint-ignore -->

 Backups are stored in a non-public S3 bucket at [`s3://pytorch-backup`](https://s3.console.aws.amazon.com/s3/buckets/pytorch-backup?region=us-east-1&tab=objects)

@ -322,7 +322,7 @@ Promotion should occur in two steps:
 * Promote S3 artifacts (wheels, libtorch) and Conda packages
 * Promote S3 wheels to PyPI

-**NOTE**: The promotion of wheels to PyPI can only be done once so take caution when attempting to promote wheels to PyPI, (see https://github.com/pypa/warehouse/issues/726 for a discussion on potential draft releases within PyPI)
+**NOTE**: The promotion of wheels to PyPI can only be done once so take caution when attempting to promote wheels to PyPI, (see https://github.com/pypi/warehouse/issues/726 for a discussion on potential draft releases within PyPI)

 ## Additional Steps to prepare for release day

--- a/aten/src/ATen/core/boxing/KernelFunction.cpp
+++ b/aten/src/ATen/core/boxing/KernelFunction.cpp
@ -28,7 +28,7 @@ void ambiguous_autogradother_kernel(OperatorKernel*, const OperatorHandle& op, D
    "Autograd dispatch key for the backend.\n",
    "If you only want to run inference instead of training, in C++, add `c10::InferenceMode mode;` "
    "before model.forward(); in Python, use `torch.inference_mode()` as a context manager (see "
-    "https://pytorch.org/docs/stable/generated/torch.inference_mode.html).",
+    "https://pytorch.org/docs/stable/generated/torch.autograd.grad_mode.inference_mode.html).",
    "\nCanonical state\n~~~~~~~~~~~\n", op.dumpState(), "\n\n");
 }

--- a/aten/src/ATen/cuda/Atomic.cuh
+++ b/aten/src/ATen/cuda/Atomic.cuh
@ -410,7 +410,7 @@ template <typename T>
 __host__ __device__ T safe_max(T a, T b) {
  #if defined(__HIPCC__)
  // TODO: remove this special case for HIP when issue is fixed:
-  //       https://github.com/ROCm-Developer-Tools/HIP/issues/2209
+  //       https://github.com/ROCm/hip/issues/2209
    T max = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::max<T>(a, b));
  #else
    T max = at::_isnan(b) ? b : std::max<T>(a, b);
@ -470,7 +470,7 @@ template <typename T>
 __host__ __device__ T safe_min(T a, T b) {
  #if defined(__HIPCC__)
  // TODO: remove this special case for HIP when issue is fixed:
-  //       https://github.com/ROCm-Developer-Tools/HIP/issues/2209
+  //       https://github.com/ROCm/hip/issues/2209
    T min = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::min<T>(a, b));
  #else
    T min = at::_isnan(b) ? b : std::min<T>(a, b);
--- a/aten/src/ATen/native/Math.h
+++ b/aten/src/ATen/native/Math.h
@ -1680,7 +1680,7 @@ inline C10_HOST_DEVICE T calc_ndtri(T y0) {
  return x;
 }

-/* The next function is taken from http://ab-initio.mit.edu/Faddeev */
+/* The next function is taken from http://ab-initio.mit.edu/faddeeva */

 /* Copyright (c) 2012 Massachusetts Institute of Technology
 *
--- a/aten/src/ATen/native/SharedReduceOps.h
+++ b/aten/src/ATen/native/SharedReduceOps.h
@ -26,7 +26,7 @@ template <typename scalar_t>
 inline C10_DEVICE scalar_t max_propagate_nan(scalar_t a, scalar_t b) {
 #if defined(__HIPCC__)
  // TODO: remove this special case for HIP when issue is fixed:
-  //       https://github.com/ROCm-Developer-Tools/HIP/issues/2209
+  //       https://github.com/ROCm/hip/issues/2209
  scalar_t max = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::max(a, b));
 #else
  scalar_t max = at::_isnan(b) ? b : std::max(a, b);
@ -37,7 +37,7 @@ template <typename scalar_t>
 inline C10_DEVICE scalar_t min_propagate_nan(scalar_t a, scalar_t b) {
 #if defined(__HIPCC__)
  // TODO: remove this special case for HIP when issue is fixed:
-  //       https://github.com/ROCm-Developer-Tools/HIP/issues/2209
+  //       https://github.com/ROCm/hip/issues/2209
  scalar_t min = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::min(a, b));
 #else
  scalar_t min = at::_isnan(b) ? b : std::min(a, b);
--- a/aten/src/ATen/native/cuda/AmpKernels.cu
+++ b/aten/src/ATen/native/cuda/AmpKernels.cu
@ -13,7 +13,7 @@


 namespace {
-// Thin wrapper around https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__SINGLE.html#group__CUDA__MATH__SINGLE_1g57a3c8313f570282a1a7bcc78743b08e,
+// Thin wrapper around https://docs.nvidia.com/cuda/cuda-math-api/cuda_math_api/group__CUDA__MATH__SINGLE.html,
 // to ensure the Cuda math library's isfinite is actually what gets called in
 // _amp_non_finite_check_and_unscale_cuda_'s gpu_kernel lambda.
 //
--- a/aten/src/ATen/native/cuda/Math.cuh
+++ b/aten/src/ATen/native/cuda/Math.cuh
@ -766,7 +766,7 @@ const auto sinc_string = jiterator_stringify(
 ); // sinc_string

 const auto erfcx_string = jiterator_stringify(
-  /* The next function is taken from http://ab-initio.mit.edu/Faddeev */
+  /* The next function is taken from http://ab-initio.mit.edu/faddeeva */

  /* Copyright (c) 2012 Massachusetts Institute of Technology
  *
--- a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp
+++ b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp
@ -1865,8 +1865,6 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) {
  // We require to perform ?geqrf_gpu again due to this bug in MAGMA:
  // - ?geqrf_gpu allows fast computation of Q via ?orgqr_gpu, but doesn't give R properly.
  // - ?geqrf2_gpu gives correct R, but doesn't allow computation of Q via ?orgqr_gpu
-  // Refer to the below link for more details:
-  // http://icl.cs.utk.edu/magma/forum/viewtopic.php?f=2&t=1015&p=2800&hilit=geqrf_gpu#p2800
    case at::LinalgBackend::Magma:
      return geqrf_magma(input, tau);
    case at::LinalgBackend::Cusolver:
--- a/aten/src/ATen/native/cudnn/Conv_v8.cpp
+++ b/aten/src/ATen/native/cudnn/Conv_v8.cpp
@ -347,7 +347,7 @@ struct BenchmarkCache {

 // @eqy: use thread local caches as cuDNN Execution Plans are not guaranteed to
 // be thread safe across all engines see Limitations in
-// https://docs.nvidia.com/deeplearning/cudnn/release-notes/index.html
+// https://docs.nvidia.com/deeplearning/cudnn/backend/latest/release-notes.html
 thread_local BenchmarkCache<cudnn_frontend::ExecutionPlan, CacheKeyWrapper>
    benchmark_cache;
 thread_local BenchmarkCache<cudnn_frontend::ExecutionPlan, CacheKeyFusedWrapper>
--- a/aten/src/ATen/native/cudnn/MHA.cpp
+++ b/aten/src/ATen/native/cudnn/MHA.cpp
@ -296,7 +296,7 @@ struct MHAGraphCache {

 // @eqy: use thread local caches as cuDNN Execution Plans are not guaranteed to
 // be thread safe across all engines see Limitations in
-// https://docs.nvidia.com/deeplearning/cudnn/release-notes/index.html
+// https://docs.nvidia.com/deeplearning/cudnn/backend/latest/release-notes.html
 thread_local MHAGraphCache<graph_and_tensors, MHACacheKeyWrapper> mhagraphcache;
 thread_local MHAGraphCache<graph_and_tensors_backward, MHACacheKeyWrapper>
    mhagraphbackwardcache;
--- a/aten/src/ATen/native/cudnn/RNN.cpp
+++ b/aten/src/ATen/native/cudnn/RNN.cpp
@ -1204,7 +1204,7 @@ cudnnRNNAlgo_t get_algo(
  // Persistent algos typically don't work for packed inputs with sequence
  // lengths that vary across batch elements, and will return
  // CUDNN_STATUS_NOT_SUPPORTED if attempted. See
-  // https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#features-of-rnn-functions
+  // https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-890/developer-guide/index.html#features-of-rnn-functions
  if (!tensors.is_input_packed()) {
    auto cudnnDataType = getCudnnDataType(input);
    if (cudnnDataType != CUDNN_DATA_DOUBLE) {
@ -1274,7 +1274,7 @@ int64_t _cudnn_rnn_flatten_weight_prologue(
  rnn_desc = rnn.descriptor(handle);

  // Why do we pad to 5 dims here (and elsewhere)?
-  // https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNForwardTraining
+  // https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-892/api/index.html#cudnnRNNForwardTraining
  // expects descriptors padded to 3 dimensions.
  x_desc.set(flat_buf_datatype, x_geom.sizes(), x_geom.strides(), 5);

--- a/aten/src/ATen/native/mps/kernels/Quantized.metal
+++ b/aten/src/ATen/native/mps/kernels/Quantized.metal
@ -213,7 +213,7 @@ INSTANTIATE_INT4MV(bfloat, 256);
 * 1. Load A and B blocks (32x32 and 64x32 respectively) into shared memory.
 * 2. In 4 simdgroups, calculate the outer product of the loaded blocks. Each simdgroup produces a 2x4 8x8 result.
 *      2.1 For how to use outer product to perform matrix multiplication, refer to
- *           http://mlwiki.org/index.php/Matrix-Matrix_Multiplication#Sum_of_Outer_Products
+ *           https://web.archive.org/web/20230521063455/http://mlwiki.org/index.php/Matrix-Matrix_Multiplication#Sum_of_Outer_Products
 * 3. Repeat 1 & 2 along K axis, with K block size 32, accumulate the result in the 2x4 8x8 block.
 * 4. Dequantize the final result and store it in the output matrix.
 *
--- a/aten/src/ATen/native/quantized/cpu/qconv.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qconv.cpp
@ -1923,7 +1923,7 @@ namespace {
 * FBGEMM uses vpmaddubsw instruction to multiply activations (uint8_t) and
 * weights (int8_t).
 *
- * https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maddubs_epi16&expand=3284,3530
+ * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16&expand=3284,3530&ig_expand=4236
 *
 * vpmaddubsw operates on a vector of activations and a vector of
 * weights. If these vectors are
--- a/aten/src/ATen/native/quantized/cpu/qnnpack/README.md
+++ b/aten/src/ATen/native/quantized/cpu/qnnpack/README.md
@ -78,10 +78,10 @@ MAX_JOBS=1 scripts/build_local.sh -DBUILD_BINARY=ON -DBUILD_PYTHON=OFF \
    -DUSE_OBSERVERS=OFF -DUSE_DISTRIBUTED=OFF

 # Download model weights
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb  # @lint-ignore

 # Download model graph
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb  # @lint-ignore

 # Run speed benchmark with 50 warm-up iterations and 10 measurement iterations
 build/bin/speed_benchmark --net predict_net.pb --init_net init_net.pb \
@ -104,11 +104,11 @@ scripts/build_android.sh -DANDROID_TOOLCHAIN=clang -DBUILD_BINARY=ON
 adb push build_android/bin/speed_benchmark /data/local/tmp/speed_benchmark

 # Download model weights and copy them to Android device
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb  # @lint-ignore
 adb push init_net.pb /data/local/tmp/init_net.pb

 # Download model graph and copy it to Android device
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb  # @lint-ignore
 adb push predict_net.pb /data/local/tmp/predict_net.pb

 # Run speed benchmark with 50 warm-up iterations and 10 measurement iterations
@ -134,11 +134,11 @@ scripts/build_android.sh -DANDROID_ABI=arm64-v8a -DANDROID_TOOLCHAIN=clang -DBUI
 adb push build_android/bin/speed_benchmark /data/local/tmp/speed_benchmark

 # Download model weights and copy them to Android device
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/init_net.pb  # @lint-ignore
 adb push init_net.pb /data/local/tmp/init_net.pb

 # Download model graph and copy it to Android device
-wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb
+wget https://s3.amazonaws.com/download.caffe2.ai/models/mobilenet_v2_1.0_224_quant/predict_net.pb  # @lint-ignore
 adb push predict_net.pb /data/local/tmp/predict_net.pb

 # Run speed benchmark with 50 warm-up iterations and 10 measurement iterations
--- a/aten/src/ATen/native/quantized/cudnn/Conv.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Conv.cpp
@ -53,7 +53,7 @@ std::unordered_map<CacheKey, cudnn_frontend::ExecutionPlan, at::native::ParamsHa
 } // anonymous namespace
 // TODO: we can use cudnn_frontend::ExecutionPlanCache when it supports caching
 // multiple operators
-// reference: https://github.com/NVIDIA/cudnn-frontend/blob/main/samples/conv_sample.cpp#L293
+// reference: https://github.com/NVIDIA/cudnn-frontend/blob/main/samples/legacy_samples/conv_sample.cpp#L295
 //static cudnn_frontend::ExecutionPlanCache plan_cache("sample_cache");

 // the parameter quantized_output is a quantized tensor
--- a/aten/src/ATen/native/quantized/cudnn/Linear.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Linear.cpp
@ -79,7 +79,7 @@ std::unordered_map<CacheKey, cudnn_frontend::ExecutionPlan, at::native::ParamsHa
 }
 // TODO: we can use cudnn_frontend::ExecutionPlanCache when it supports caching
 // multiple operators
-// reference: https://github.com/NVIDIA/cudnn-frontend/blob/main/samples/conv_sample.cpp#L293
+// reference: https://github.com/NVIDIA/cudnn-frontend/blob/main/samples/legacy_samples/conv_sample.cpp#L295
 //static cudnn_frontend::ExecutionPlanCache plan_cache("sample_cache");

 // currently we only support int8 symmetric (zero_point = 0 for inputs and output) quantized linear op
--- a/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
@ -76,7 +76,7 @@ Tensor adaptive_avg_pool2d_quantized_cuda(
 // any 3D tensors to 4D prior to using cudnn
 // This implementation currently uses the v7 cudnn APIs as v8 cudnn APIs are not yet available for
 // pooling operations.
-// Consult https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnPoolingForward for
+// Consult https://docs.nvidia.com/deeplearning/cudnn/backend/latest/api/cudnn-ops-library.html#cudnnpoolingforward for
 // documentation on the APIs
 // Currently, it appears there is no cudnn support for dilated pooling -- we will
 // submit a feature request for this with cudnn
--- a/benchmarks/dynamo/ci_expected_accuracy/update_expected.py
+++ b/benchmarks/dynamo/ci_expected_accuracy/update_expected.py
@ -67,7 +67,7 @@ ORDER BY
    workflowName, jobName
 """
 ARTIFACTS_QUERY_URL = (
-    "https://console-api.clickhouse.cloud/.api/query-endpoints/"
+    "https://console-api.clickhouse.cloud/.api/query-endpoints/"  # @lint-ignore
    "c1cdfadc-6bb2-4a91-bbf9-3d19e1981cd4/run?format=JSON"
 )
 CSV_LINTER = str(
--- a/benchmarks/functional_autograd_benchmark/torchaudio_models.py
+++ b/benchmarks/functional_autograd_benchmark/torchaudio_models.py
@ -473,7 +473,7 @@ class TransformerModel(nn.Module):
        return F.log_softmax(output, dim=-1)


-# From https://github.com/pytorch/text/blob/master/torchtext/modules
+# From https://github.com/pytorch/text/tree/master/torchtext/nn/modules
 class MultiheadAttentionContainer(torch.nn.Module):
    def __init__(self, nhead, in_proj_container, attention_layer, out_proj):
        r"""A multi-head attention container
--- a/benchmarks/operator_benchmark/benchmark_utils.py
+++ b/benchmarks/operator_benchmark/benchmark_utils.py
@ -37,7 +37,7 @@ def numpy_random(dtype, *shapes):
    Args:
        shapes: int or a sequence of ints to defining the shapes of the tensor
        dtype: use the dtypes from numpy
-            (https://docs.scipy.org/doc/numpy/user/basics.types.html)
+            (https://numpy.org/doc/stable/user/basics.types.html)
    Return:
        numpy tensor of dtype
    """
--- a/benchmarks/sparse/dlmc/test.sh
+++ b/benchmarks/sparse/dlmc/test.sh
@ -2,7 +2,7 @@

 DATASET_ROOT_DIR=$HOME/datasets/

-# wget https://storage.googleapis.com/sgk-sc2020/dlmc.tar.gz -P $DATASET_ROOT_DIR
+# wget https://storage.googleapis.com/sgk-sc2020/dlmc.tar.gz -P $DATASET_ROOT_DIR  # @lint-ignore
 # tar -xvf $DATASET_ROOT_DIR/dlmc.tar.gz

 echo "!! SPARSE SPMS TIME BENCHMARK!! "
--- a/benchmarks/tensorexpr/attention.py
+++ b/benchmarks/tensorexpr/attention.py
@ -1,6 +1,6 @@
 # This is a copy of rnn_attention from MLPerf, with some common sizes hardcoded
 # for benchmarking and some control flow stripped out.
-# https://github.com/mlperf/training/blob/master/rnn_translator/pytorch/seq2seq/models/attention.py
+# https://github.com/mlcommons/training/blob/master/retired_benchmarks/gnmt/pytorch/seq2seq/models/attention.py

 import torch

--- a/benchmarks/upload_scribe.py
+++ b/benchmarks/upload_scribe.py
@ -48,7 +48,7 @@ class ScribeUploader:
        access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
        if not access_token:
            raise ValueError("Can't find access token from environment variable")
-        url = "https://graph.facebook.com/scribe_logs"
+        url = "https://graph.facebook.com/scribe_logs"  # @lint-ignore
        r = requests.post(
            url,
            data={
--- a/buckbuild.bzl
+++ b/buckbuild.bzl
@ -297,7 +297,7 @@ def get_pt_preprocessor_flags():
        PT_PREPROCESSOR_FLAGS.append("-DENABLE_PYTORCH_NON_PRODUCTION_BUILDS")
    return PT_PREPROCESSOR_FLAGS

-# This needs to be kept in sync with https://github.com/pytorch/pytorch/blob/release/1.9/torchgen/gen.py#L892
+# This needs to be kept in sync with https://github.com/pytorch/pytorch/blob/release/1.9/torchgen/gen.py#L892  @lint-ignore
 PT_BACKEND_HEADERS = [
    "CPU",
    "CUDA",
--- a/c10/macros/Macros.h
+++ b/c10/macros/Macros.h
@ -241,7 +241,7 @@ using namespace c10::xpu;
 #ifdef __HIPCC__
 // Unlike CUDA, HIP requires a HIP header to be included for __host__ to work.
 // We do this #include here so that C10_HOST_DEVICE and friends will Just Work.
-// See https://github.com/ROCm-Developer-Tools/HIP/issues/441
+// See https://github.com/ROCm/hip/issues/441
 #include <hip/hip_runtime.h>
 #endif

--- a/cmake/External/aotriton.cmake
+++ b/cmake/External/aotriton.cmake
@ -98,7 +98,7 @@ if(NOT __AOTRITON_INCLUDED)
                                  "${__AOTRITON_VER}-${__AOTRITON_MANYLINUX}"
                                  "_${__AOTRITON_ARCH}-rocm${__AOTRITON_ROCM}"
                                  "-shared.tar.${__AOTRITON_Z}")
-    string(CONCAT __AOTRITON_URL "https://github.com/ROCm/aotriton/releases/download/"
+    string(CONCAT __AOTRITON_URL "https://github.com/ROCm/aotriton/releases/download/"  # @lint-ignore
                                 "${__AOTRITON_VER}/${__AOTRITON_FILE}")
    ExternalProject_Add(aotriton_external
      URL "${__AOTRITON_URL}"
--- a/docs/cpp/source/index.rst
+++ b/docs/cpp/source/index.rst
@ -137,7 +137,7 @@ about this by following `this
 API concerns itself with scenarios in which you would like to extend
 TorchScript with custom operators, which can similarly be serialized and
 invoked from C++ during inference. Lastly, the `torch::jit::compile
-<https://pytorch.org/cppdocs/api/function_namespacetorch_1_1jit_1a176d99fd5bf0233119a5f49c07a1d01d.html#exhale-function-namespacetorch-1-1jit-1a176d99fd5bf0233119a5f49c07a1d01d>`_
+<https://pytorch.org/cppdocs/api/function_namespacetorch_1_1jit_1a8660dc13a6b82336aadac667e6dccba1.html>`_
 function may be used to access the TorchScript compiler directly from C++.

 C++ Extensions
--- a/docs/source/community/contribution_guide.rst
+++ b/docs/source/community/contribution_guide.rst
@ -325,13 +325,13 @@ Python Docs
 PyTorch documentation is generated from python source using
 `Sphinx <https://www.sphinx-doc.org/en/master/>`__. Generated HTML is
 copied to the docs folder in the main branch of
-`pytorch.github.io <https://github.com/pytorch/pytorch.github.io/tree/master/docs>`__,
+`pytorch.org/docs <https://pytorch.org/docs/main>`__,
 and is served via GitHub pages.

 -  Site: https://pytorch.org/docs
 -  GitHub: https://github.com/pytorch/pytorch/tree/main/docs
 -  Served from:
-   `https://github.com/pytorch/pytorch.github.io/tree/master/docs <https://github.com/pytorch/pytorch.github.io/tree/master/docs>`__
+   `https://pytorch.org/docs/main <https://pytorch.org/docs/main>`__

 C++ Docs
 ~~~~~~~~
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -3708,7 +3708,6 @@ def process_docstring(app, what_, name, obj, options, lines):
        lines (List[str]): the lines of the docstring, see above

    References:
-        https://www.sphinx-doc.org/en/1.5.1/_modules/sphinx/ext/autodoc.html
        https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html
    """
    import re
--- a/docs/source/cudnn_rnn_determinism.rst
+++ b/docs/source/cudnn_rnn_determinism.rst
@ -13,4 +13,4 @@

    See the `cuDNN 8 Release Notes`_ for more information.

-.. _cuDNN 8 Release Notes: https://docs.nvidia.com/deeplearning/sdk/cudnn-release-notes/rel_8.html
+.. _cuDNN 8 Release Notes: https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-880/release-notes/rel_8.html
--- a/docs/source/distributed.rst
+++ b/docs/source/distributed.rst
@ -141,7 +141,7 @@ network bandwidth. These two environment variables have been pre-tuned by NCCL
 for some cloud providers, such as AWS or GCP.

 For a full list of NCCL environment variables, please refer to
-`NVIDIA NCCL's official documentation <https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/env.html>`_
+`NVIDIA NCCL's official documentation <https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html>`_

 You can tune NCCL communicators even further using `torch.distributed.ProcessGroupNCCL.NCCLConfig`
 and `torch.distributed.ProcessGroupNCCL.Options`. Learn more about them using `help`
--- a/docs/source/notes/hip.rst
+++ b/docs/source/notes/hip.rst
@ -141,7 +141,7 @@ Currently, only the "nccl" and "gloo" backends for torch.distributed are support
 CUDA API to HIP API mappings in C++
 -----------------------------------

-Please refer: https://rocmdocs.amd.com/en/latest/Programming_Guides/HIP_API_Guide.html
+Please refer: https://rocm.docs.amd.com/projects/HIP/en/latest/reference/api_syntax.html

 NOTE: The CUDA_VERSION macro, cudaRuntimeGetVersion and cudaDriverGetVersion APIs do not
 semantically map to the same values as HIP_VERSION macro, hipRuntimeGetVersion and
--- a/docs/source/notes/windows.rst
+++ b/docs/source/notes/windows.rst
@ -24,9 +24,10 @@ MKL and MAGMA. Here are the steps to build with them.
    REM 2.5.3 (CUDA 10.1 10.2 11.0) x (Debug Release)
    REM 2.5.2 (CUDA 9.2 10.0 10.1 10.2) x (Debug Release)
    REM 2.5.1 (CUDA 9.2 10.0 10.1 10.2) x (Debug Release)
-    set CUDA_PREFIX=cuda102
-    set CONFIG=release
-    curl -k https://s3.amazonaws.com/ossci-windows/magma_2.5.4_%CUDA_PREFIX%_%CONFIG%.7z -o magma.7z
+    set "CUDA_PREFIX=cuda102"
+    set "CONFIG=release"
+    set "HOST=https://s3.amazonaws.com/ossci-windows"
+    curl -k "%HOST%/magma_2.5.4_%CUDA_PREFIX%_%CONFIG%.7z" -o magma.7z
    7z x -aoa magma.7z -omagma

    REM Setting essential environment variables
--- a/docs/source/onnx_dynamo.rst
+++ b/docs/source/onnx_dynamo.rst
@ -27,7 +27,7 @@ Dependencies
 The ONNX exporter depends on extra Python packages:

  - `ONNX <https://onnx.ai>`_
-  - `ONNX Script <https://onnxscript.ai>`_
+  - `ONNX Script <https://microsoft.github.io/onnxscript>`_

 They can be installed through `pip <https://pypi.org/project/pip/>`_:

--- a/docs/source/onnx_torchscript.rst
+++ b/docs/source/onnx_torchscript.rst
@ -452,7 +452,7 @@ ONNX operators that represent the function's behavior in ONNX. For example::
 .. . ``torch::jit::Value::setType``). This is not required, but it can help the exporter's
 .. shape and type inference for down-stream nodes. For a non-trivial example of ``setType``, see
 .. ``test_aten_embedding_2`` in
-.. `test_operators.py <https://github.com/pytorch/pytorch/blob/main/test/onnx/test_operators.py>`_.
+.. `test_operators.py <https://github.com/pytorch/pytorch/blob/release/2.5/test/onnx/test_operators.py#L1179>`_.

 .. The example below shows how you can access ``requires_grad`` via the ``Node`` object:

--- a/docs/source/rpc/rref.rst
+++ b/docs/source/rpc/rref.rst
@ -204,7 +204,7 @@ will create the ``OwnerRRef``, and returns an ACK to acknowledge ``{100, 1}``
 **G2**, the ``OwnerRRef`` is a child of the ``UserRRef``, and the ``UserRRef``
 is not deleted until it receives the ACK from the owner.

-.. image:: https://user-images\.githubusercontent\.com/16999635/69164772-98181300-0abe-11ea-93a7-9ad9f757cd94.png
+.. image:: https://user-images.githubusercontent.com/16999635/69164772-98181300-0abe-11ea-93a7-9ad9f757cd94.png
    :alt: user_to_owner_ret.png
    :width: 500 px

--- a/docs/source/torch.compiler_troubleshooting_old.rst
+++ b/docs/source/torch.compiler_troubleshooting_old.rst
@ -209,7 +209,7 @@ Diagnosing TorchInductor Errors

 If the error does not occur with the ``"eager"`` backend, then the
 backend compiler is the source of the error (`example
-error <https://gist.github.com/mlazos/2f13681e3cc6c43b3911f336327032de%5D>`__).
+error <https://gist.github.com/mlazos/2f13681e3cc6c43b3911f336327032de>`__).
 There are `different choices <./torch.compiler.rst>`__
 for backend compilers for TorchDynamo, with TorchInductor
 fitting the needs of most users. This section focuses on TorchInductor
--- a/docs/source/type_info.rst
+++ b/docs/source/type_info.rst
@ -15,7 +15,7 @@ torch.finfo
 .. class:: torch.finfo

 A :class:`torch.finfo` is an object that represents the numerical properties of a floating point
-:class:`torch.dtype`, (i.e. ``torch.float32``, ``torch.float64``, ``torch.float16``, and ``torch.bfloat16``). This is similar to `numpy.finfo <https://docs.scipy.org/doc/numpy/reference/generated/numpy.finfo.html>`_.
+:class:`torch.dtype`, (i.e. ``torch.float32``, ``torch.float64``, ``torch.float16``, and ``torch.bfloat16``). This is similar to `numpy.finfo <https://numpy.org/doc/stable/reference/generated/numpy.finfo.html>`_.

 A :class:`torch.finfo` provides the following attributes:

@ -49,7 +49,7 @@ torch.iinfo


 A :class:`torch.iinfo` is an object that represents the numerical properties of a integer
-:class:`torch.dtype` (i.e. ``torch.uint8``, ``torch.int8``, ``torch.int16``, ``torch.int32``, and ``torch.int64``). This is similar to `numpy.iinfo <https://docs.scipy.org/doc/numpy/reference/generated/numpy.iinfo.html>`_.
+:class:`torch.dtype` (i.e. ``torch.uint8``, ``torch.int8``, ``torch.int16``, ``torch.int32``, and ``torch.int64``). This is similar to `numpy.iinfo <https://numpy.org/doc/stable/reference/generated/numpy.iinfo.html>`_.

 A :class:`torch.iinfo` provides the following attributes:

--- a/scripts/build_tizen.sh
+++ b/scripts/build_tizen.sh
@ -9,7 +9,7 @@

 setup_environment(){
 # The rootfs image for a Tizen target (RPi3)is located at the below webpage:
-# http://download.tizen.org/releases/milestone/tizen/4.0.m1/tizen-unified_20170529.1/images/
+# https://cdn.download.tizen.org/archive/releases/milestone/tizen/4.0.m1/tizen-unified_20170529.1/images/
 # If you do not have a Tizen device, Please, run qemu-arm-static and chroot command.
 # $ sudo chroot ~/tizen-rootfs qemu-arm-static /usr/bin/bash

@ -25,7 +25,7 @@ caffe2_lite_dep_packages(){
 # You can set-up a rpm repository with zypper, yum, and dnf because Tizen
 # software platform officially support rpm format such as Fedora, OpenSUSE.
 # The official Tizen repository is as following:
-# http://download.tizen.org/releases/milestone/tizen/4.0.m1/
+# https://cdn.download.tizen.org/archive/releases/milestone/tizen/4.0.m1/
 echo "Installing dependencies."
 sudo zypper install \
  make \
@ -69,7 +69,7 @@ caffe2_full_dep_packages(){
 # You can set-up a rpm repository with zypper, yum, and dnf because Tizen
 # software platform officially support rpm format such as Fedora, OpenSUSE.
 # The official Tizen repository is as following:
-# http://download.tizen.org/releases/milestone/tizen/4.0.m1/
+# https://cdn.download.tizen.org/archive/releases/milestone/tizen/4.0.m1/
 echo "Installing dependencies."
 sudo zypper install \
  cmake \
--- a/scripts/release_notes/common.py
+++ b/scripts/release_notes/common.py
@ -212,7 +212,9 @@ headers = {"Authorization": f"token {token}"}

 def run_query(query):
    request = requests.post(
-        "https://api.github.com/graphql", json={"query": query}, headers=headers
+        "https://api.github.com/graphql",  # @lint-ignore
+        json={"query": query},
+        headers=headers,
    )
    if request.status_code == 200:
        return request.json()
--- a/test/cpp/api/rnn.cpp
+++ b/test/cpp/api/rnn.cpp
@ -441,7 +441,7 @@ lstm_output_to_device(
 }

 // This test is a port of python code introduced here:
-// https://towardsdatascience.com/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66
+// https://medium.com/data-science/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66
 // Reverse forward of bidirectional GRU should act
 // as regular forward of unidirectional GRU
 void BidirectionalGRUReverseForward(bool cuda) {
--- a/test/distributed/tensor/test_dtensor_ops.py
+++ b/test/distributed/tensor/test_dtensor_ops.py
@ -594,7 +594,7 @@ class TestDTensorOps(DTensorOpTestBase):
                        dtensor_rs = func(*dtensor_args, **dtensor_kwargs)

                        # we need to skip tests containing tensors of zero elements for now.
-                        # see issue: https://github.com/pytorch/tau/issues/470
+                        # see issue: https://github.com/pytorch/PiPPy/issues/470
                        # TODO remove this once issue above fixed.
                        flat_args = pytree.tree_leaves(dtensor_rs)
                        if any(
--- a/test/distributed/tensor/test_pointwise_ops.py
+++ b/test/distributed/tensor/test_pointwise_ops.py
@ -192,7 +192,9 @@ class DistElementwiseOpsTest(DTensorOpTestBase):
            op=torch.sigmoid,
        )

-    @skip("testing RNG based ops is broken: https://github.com/pytorch/tau/issues/494")
+    @skip(
+        "testing RNG based ops is broken: https://github.com/pytorch/PiPPy/issues/494"
+    )
    def test_dropout(self):
        device_mesh = self.build_device_mesh()

--- a/test/functorch/test_vmap.py
+++ b/test/functorch/test_vmap.py
@ -5169,7 +5169,6 @@ class TestVmapOperatorsOpInfo(TestCase):
            xfail("linalg.vecdot"),
            # throws in vmap on CUDA
            # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got -2)
-            # https://github.com/pytorch/pytorch/runs/8110653462?check_suite_focus=true
            # but it passes locally
            xfail("linalg.diagonal"),
            skip("linalg.matrix_norm", ""),
--- a/test/inductor/test_cuda_repro.py
+++ b/test/inductor/test_cuda_repro.py
@ -581,7 +581,7 @@ class CudaReproTests(TestCase):
        """
        This UT tests autotune on an inplace kernel. The autotune should not contaminate
        the input buffers when tuning with multiple configs. For more details, refer to
-        https://github.com/openai/triton/issues/781
+        https://github.com/triton-lang/triton/issues/781
        https://github.com/pytorch/torchdynamo/issues/1670
        """
        from torch._C import _cuda_getCurrentRawStream as get_cuda_stream
@ -1587,7 +1587,7 @@ class CudaReproTests(TestCase):

    @config.patch("triton.use_block_ptr", True)
    def test_selecsls42b_misaligned_address(self):
-        # https://github.com/openai/triton/issues/2836
+        # https://github.com/triton-lang/triton/issues/2836

        @torch.compile(fullgraph=True)
        def fn(arg207_1, arg208_1, convert_element_type_40, expand, full, mul_3):
--- a/test/inductor/test_foreach.py
+++ b/test/inductor/test_foreach.py
@ -431,7 +431,7 @@ class ForeachTests(TestCase):
    @requires_cuda
    @scalar_bin_ops
    @unittest.skip(
-        "Triton recursion depth exceeded: https://github.com/openai/triton/issues/1763"
+        "Triton recursion depth exceeded: https://github.com/triton-lang/triton/issues/1763"
    )
    def test_kernel_split_arg_limit_scalar(self, op):
        def fn(a):
--- a/test/inductor/test_indexing.py
+++ b/test/inductor/test_indexing.py
@ -95,7 +95,7 @@ class TestIndexingSimplification(InductorTestCase):
            ModularIndexing(i0 + i1 * i2 * r3, i2, r3), ModularIndexing(i0, i2, r3)
        )

-        # if there are negative terms, we cannot optimize away zero terms due to https://github.com/openai/triton/issues/619
+        # if there are negative terms, we cannot optimize away zero terms due to https://github.com/triton-lang/triton/issues/619
        self.assertEqual(
            ModularIndexing(-i0 + i1 * 20, 2, 10), ModularIndexing(-i0 + i1 * 20, 2, 10)
        )
--- a/test/inductor/test_max_autotune.py
+++ b/test/inductor/test_max_autotune.py
@ -166,7 +166,7 @@ class TestMaxAutotune(TestCase):
    @parametrize("autotune_multi_device", (True, False))
    def test_max_autotune_mm_plus_mm(self, autotune_in_subproc, autotune_multi_device):
        """
-        This crash previously due to a triton issue: https://github.com/openai/triton/issues/1298 .
+        This crash previously due to a triton issue: https://github.com/triton-lang/triton/issues/1298 .
        With autotuning in subprocess, we don't crash anymore.
        """
        m, n, k = 2048, 1536, 64
--- a/test/inductor/test_torchinductor.py
+++ b/test/inductor/test_torchinductor.py
@ -2969,7 +2969,7 @@ class CommonTemplate:
            return torch.round(a), torch.round(b + 1), torch.round(a, decimals=2)

        # without manual_seed, there is some chance this test fails due to:
-        # https://github.com/openai/triton/issues/530
+        # https://github.com/triton-lang/triton/issues/530
        torch.manual_seed(0)

        # with *100 we are always getting a number exactly at .5 which we don't do right in half
@ -7957,7 +7957,7 @@ def forward(self, arg0_1: "Sym(s77)", arg1_1: "Sym(s27)", arg2_1: "Sym(s53)", ar
                torch.randint(0, 100, size=[600], dtype=torch.int64),
                torch.randn([600, 256, 7, 7]),
            ],
-            # workaround for https://github.com/openai/triton/issues/558
+            # workaround for https://github.com/triton-lang/triton/issues/558
            check_lowp=False,
        )

--- a/test/quantization/core/test_quantized_functional.py
+++ b/test/quantization/core/test_quantized_functional.py
@ -52,7 +52,7 @@ class TestQuantizedFunctionalOps(QuantizationTestCase):
        # Make sure the results match
        # assert_array_almost_equal compares using the following formula:
        #     abs(desired-actual) < 1.5 * 10**(-decimal)
-        # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html)
+        # (https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_almost_equal.html)
        # We use decimal = 0 to ignore off-by-1 differences between reference
        # and test. Off-by-1 differences arise due to the order of round and
        # zero_point addition operation, i.e., if addition followed by round is
--- a/test/quantization/core/test_quantized_module.py
+++ b/test/quantization/core/test_quantized_module.py
@ -320,7 +320,7 @@ class TestStaticQuantizedModule(QuantizationTestCase):
        # Make sure the results match
        # assert_array_almost_equal compares using the following formula:
        #     abs(desired-actual) < 1.5 * 10**(-decimal)
-        # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html)
+        # (https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_almost_equal.html)
        # We use decimal = 0 to ignore off-by-1 differences between reference
        # and test. Off-by-1 differences arise due to the order of round and
        # zero_point addition operation, i.e., if addition followed by round is
--- a/test/quantization/core/test_quantized_op.py
+++ b/test/quantization/core/test_quantized_op.py
@ -5200,7 +5200,7 @@ class TestQuantizedConv(TestCase):
        # Make sure the results match
        # assert_array_almost_equal compares using the following formula:
        #     abs(desired-actual) < 1.5 * 10**(-decimal)
-        # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html)
+        # (https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_almost_equal.html)
        # We use decimal = 0 to ignore off-by-1 differences between
        # reference and test. Off-by-1 differences arise due to the order of
        # round and zero_point addition operation, i.e., if addition
@ -6935,7 +6935,7 @@ class TestQuantizedConv(TestCase):
        # Make sure the results match
        # assert_array_almost_equal compares using the following formula:
        #     abs(desired-actual) < 1.5 * 10**(-decimal)
-        # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html)
+        # (https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_almost_equal.html)
        # We use decimal = 0 to ignore off-by-1 differences between
        # reference and test. Off-by-1 differences arise due to the order of
        # round and zero_point addition operation, i.e., if addition
--- a/test/test_nn.py
+++ b/test/test_nn.py
@ -11752,7 +11752,7 @@ class TestNNDeviceType(NNTestCase):
            with self.assertRaisesRegex(RuntimeError, msg):
                F.nll_loss(x, t, weight=weight)

-    # Ref: https://github.com/pytorch/pytorch/issue/85005
+    # Ref: https://github.com/pytorch/pytorch/issues/85005
    @onlyCUDA
    @largeTensorTest("120GB", "cpu")
    @largeTensorTest("45GB", "cuda")
@ -11785,7 +11785,7 @@ class TestNNDeviceType(NNTestCase):
            with torch.no_grad():
                self.assertTrue(torch.allclose(input.grad.cpu(), input_cpu.grad, rtol=rtol, atol=atol))

-    # Ref: https://github.com/pytorch/pytorch/issue/108345
+    # Ref: https://github.com/pytorch/pytorch/issues/108345
    @onlyCUDA
    @largeTensorTest("20GB", "cpu")
    @largeTensorTest("20GB", "cuda")
--- a/test/test_numba_integration.py
+++ b/test/test_numba_integration.py
@ -36,7 +36,7 @@ class TestNumbaIntegration(common.TestCase):
            version: (int) Version 0

        See:
-        https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html
+        https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html
        """

        types = [
@ -250,7 +250,7 @@ class TestNumbaIntegration(common.TestCase):
        will use the exposed device memory.

        See:
-        https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html
+        https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html
        """

        dtypes = [
--- a/test/test_reductions.py
+++ b/test/test_reductions.py
@ -1759,7 +1759,6 @@ class TestReductions(TestCase):
        # On Windows CI, the current version of `numpy` promotes all lower integers
        # dtypes to int32 while `torch` promotes them to int64. Hence we skip on checking
        # the exact dtype.
-        # Reference : https://dr.pytorch.org/api/view-log-full?build_id=122051580
        # PR : https://github.com/pytorch/pytorch/pull/38628#issuecomment-655905370
        if IS_WINDOWS and is_integral(dtype):
            exact_dtype = False
--- a/test/test_unary_ufuncs.py
+++ b/test/test_unary_ufuncs.py
@ -547,7 +547,7 @@ class TestUnaryUfuncs(TestCase):
        # sqrt Test Reference: https://github.com/pytorch/pytorch/pull/47424
        x = torch.tensor(0.0 - 1.0e20j, dtype=dtype, device=device)
        self.compare_with_numpy(torch.sqrt, np.sqrt, x)
-        # acos test reference: https://github.com/pytorch/pytorch/issue/42952
+        # acos test reference: https://github.com/pytorch/pytorch/issues/42952
        if not (dtype == torch.cdouble and "cuda" in device):
            self.compare_with_numpy(torch.acos, np.arccos, x)

--- a/test/torch_np/numpy_tests/core/test_multiarray.py
+++ b/test/torch_np/numpy_tests/core/test_multiarray.py
@ -4328,7 +4328,7 @@ class TestFromBuffer(TestCase):
    @skipif(
        IS_PYPY,
        reason="PyPy's memoryview currently does not track exports. See: "
-        "https://foss.heptapod.net/pypy/pypy/-/issues/3724",
+        "https://github.com/pypy/pypy/issues/3723",
    )
    def test_mmap_close(self):
        # The old buffer protocol was not safe for some things that the new
--- a/tools/download_mnist.py
+++ b/tools/download_mnist.py
@ -8,7 +8,7 @@ from urllib.request import urlretrieve

 MIRRORS = [
    "http://yann.lecun.com/exdb/mnist/",
-    "https://ossci-datasets.s3.amazonaws.com/mnist/",
+    "https://ossci-datasets.s3.amazonaws.com/mnist/",  # @lint-ignore
 ]

 RESOURCES = [
--- a/tools/stats/upload_external_contrib_stats.py
+++ b/tools/stats/upload_external_contrib_stats.py
@ -81,7 +81,7 @@ def get_external_pr_data(
            response = cast(
                dict[str, Any],
                fetch_json(
-                    "https://api.github.com/search/issues",
+                    "https://api.github.com/search/issues",  # @lint-ignore
                    params={
                        "q": f'repo:pytorch/pytorch is:pr is:closed \
                            label:"open source" label:Merged -label:Reverted closed:{period_begin_date}..{period_end_date}',
--- a/tools/stats/utilization_stats_lib.py
+++ b/tools/stats/utilization_stats_lib.py
@ -87,7 +87,7 @@ class OssCiUtilizationMetadataV1:


 # this data model is for the time series data:
-# https://github.com/pytorch/test-infra/blob/main/clickhouse_db_schema/oss_ci_utilization/oss_ci_utilization_time_series_schema.sql
+# https://github.com/pytorch/test-infra/blob/main/clickhouse_db_schema/oss_ci_utilization/oss_ci_time_series_schema.sql
@dataclass
 class OssCiUtilizationTimeSeriesV1:
    created_at: int
--- a/tools/test/test_create_alerts.py
+++ b/tools/test/test_create_alerts.py
@ -12,7 +12,7 @@ MOCK_TEST_DATA = [
        "sha": "f02f3046571d21b48af3067e308a1e0f29b43af9",
        "id": 7819529276,
        "conclusion": "failure",
-        "htmlUrl": "https://github.com/pytorch/pytorch/runs/7819529276?check_suite_focus=true",
+        "htmlUrl": "https://github.com/pytorch/pytorch/runs/7819529276?check_suite_focus=true",  # @lint-ignore
        "logUrl": "https://ossci-raw-job-status.s3.amazonaws.com/log/7819529276",
        "durationS": 14876,
        "failureLine": "##[error]The action has timed out.",
@ -25,7 +25,7 @@ MOCK_TEST_DATA = [
        "sha": "d0d6b1f2222bf90f478796d84a525869898f55b6",
        "id": 7818399623,
        "conclusion": "failure",
-        "htmlUrl": "https://github.com/pytorch/pytorch/runs/7818399623?check_suite_focus=true",
+        "htmlUrl": "https://github.com/pytorch/pytorch/runs/7818399623?check_suite_focus=true",  # @lint-ignore
        "logUrl": "https://ossci-raw-job-status.s3.amazonaws.com/log/7818399623",
        "durationS": 14882,
        "failureLine": "##[error]The action has timed out.",
--- a/tools/testing/upload_artifacts.py
+++ b/tools/testing/upload_artifacts.py
@ -94,7 +94,7 @@ def trigger_upload_test_stats_intermediate_workflow() -> None:
    # The GITHUB_TOKEN cannot trigger workflow so this isn't used for now
    print("Triggering upload_test_stats_intermediate workflow")
    x = requests.post(
-        "https://api.github.com/repos/pytorch/pytorch/actions/workflows/upload_test_stats_intermediate.yml/dispatches",
+        "https://api.github.com/repos/pytorch/pytorch/actions/workflows/upload_test_stats_intermediate.yml/dispatches",  # noqa: B950 @lint-ignore
        headers={
            "Accept": "application/vnd.github.v3+json",
            "Authorization": f"Bearer {os.environ.get('GITHUB_TOKEN')}",
--- a/torch/_C/init.pyi.in
+++ b/torch/_C/init.pyi.in
@ -1267,7 +1267,7 @@ def _should_allow_numbers_as_tensors(func_name: str) -> _bool: ...
 def _group_tensors_by_device_and_dtype(nested_tensorlists: List[List[Optional[Tensor]]], with_indices: _bool = False) -> Dict[Tuple[torch.device, torch.dtype], Tuple[List[List[Optional[Tensor]]], List[_int]]]: ...

 # NB: There is no Capsule type in typing, see
-# https://code.activestate.com/lists/python-dev/139675/
+# https://github.com/python/cpython/issues/109562
 def _to_dlpack(data: Tensor) -> Any: ...  # THPModule_toDLPack
 def _from_dlpack(data: Any) -> Tensor: ...  # THPModule_fromDLPack
 def _get_cpp_backtrace(
--- a/torch/_appdirs.py
+++ b/torch/_appdirs.py
@ -41,9 +41,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 See <https://github.com/ActiveState/appdirs> for details and usage.
 """
 # Dev Notes:
-# - MSDN on where to store app data files:
-#   http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
-# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
+# - Windows "Known Folders": https://learn.microsoft.com/en-us/windows/win32/shell/csidl
+# - macOS File System Programming Guide: https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/Introduction/Introduction.html
 # - XDG spec for Un*x: https://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html

 __version__ = "1.4.4"
--- a/torch/_decomp/decompositions.py
+++ b/torch/_decomp/decompositions.py
@ -4389,8 +4389,7 @@ def should_fold(tensor1: torch.Tensor, tensor2: torch.Tensor, is_out: bool) -> b
    t1_stride = t1.stride()

    # Check the contiguous, we can skip the dim with size of 1
-    # as aten: https://github.com/pytorch/pytorch/blob/
-    # e201460f8aa1510b4c4686627d57b69756c4b916/aten/src/ATen/TensorGeometry.cpp#L17
+    # as aten: https://github.com/pytorch/pytorch/blob/e201460f8aa1510b4c4686627d57b69756c4b916/aten/src/ATen/TensorGeometry.cpp#L17
    expected_stride = [1]
    for size in reversed(t1_shape[1:]):
        expected_stride.append(size * expected_stride[-1])
--- a/torch/_inductor/codegen/cpp.py
+++ b/torch/_inductor/codegen/cpp.py
@ -4881,9 +4881,8 @@ class CppScheduling(BaseScheduling):
                len(get_call_ranges(_node)) == node.outer_loop_fusion_depth + 1
                for _node in node.get_outer_nodes()
            ):
-                # Ref to the typical case of local buffer
-                # in https://github.com/pytorch/pytorch/blob/
-                # 1115a25c36340554442f28f9570abd42f0aface2/aten/src/ATen/native/cpu/SoftMaxKernel.cpp#L159
+                # Ref to the typical case of local buffer in
+                # https://github.com/pytorch/pytorch/blob/1115a25c36340554442f28f9570abd42f0aface2/aten/src/ATen/native/cpu/SoftMaxKernel.cpp#L159 # noqa: B950
                # where the buffer is with size of last dim and contiguous.
                # Only support this typical case at first.
                visited_scheduler_nodes = OrderedSet[str]()
--- a/torch/_inductor/codegen/triton.py
+++ b/torch/_inductor/codegen/triton.py
@ -504,7 +504,7 @@ class BlockPtrOptions:
 def triton_reshape(
    value: str, old_shape: Sequence[sympy.Expr], new_shape: Sequence[sympy.Expr]
 ) -> str:
-    """Workaround https://github.com/openai/triton/issues/2836"""
+    """Workaround https://github.com/triton-lang/triton/issues/2836"""
    assert isinstance(old_shape, list) and isinstance(new_shape, list)

    old_shape_str = [V.kernel.index_to_str(shape) for shape in old_shape]
@ -841,7 +841,7 @@ class TritonOverrides(OpOverrides):

            # fp8 data type conversions has min_elem_per_thread requirements.
            # Refer to Triton implementations here:
-            # https://github.com/openai/triton/blob/10f59d8ce04052521c1bc0cb3a3f8b98918fc7e3/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp#L10.
+            # https://github.com/triton-lang/triton/blob/10f59d8ce04052521c1bc0cb3a3f8b98918fc7e3/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp#L10.
            fp8_dtypes = (
                torch.float8_e4m3fn,
                torch.float8_e5m2,
@ -1828,7 +1828,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
            and len(mask_vars - dense_mask_vars) == 0
            and not self.is_indirect_indexing(index)
            and have_loop_vars
-            # workaround https://github.com/openai/triton/issues/2821
+            # workaround https://github.com/triton-lang/triton/issues/2821
            and self.index_dtype == "tl.int32"
        ):

@ -2053,7 +2053,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
    ) -> tuple[str, str]:
        check = indexing.boundary_check()
        if not check:
-            # workaround https://github.com/openai/triton/issues/2813
+            # workaround https://github.com/triton-lang/triton/issues/2813
            other = ""
        elif other:
            assert other == ", other=0.0"
@ -2114,7 +2114,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
            value, indexing.final_shape, indexing.block_shape, False
        )

-        # workaround https://github.com/openai/triton/issues/2814
+        # workaround https://github.com/triton-lang/triton/issues/2814
        value = f"{value}.to({triton_store_type(V.graph.get_dtype(name))})"
        return f"tl.store({block_ptr}, {value}{other})"

@ -2260,7 +2260,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
                line += ".to(tl.float32)"
                dtype = torch.float32
            if dtype == torch.bool and torch.version.hip is None:
-                # Workaround for https://github.com/openai/triton/issues/2151
+                # Workaround for https://github.com/triton-lang/triton/issues/2151
                # tl.load returns int8 when loading from pointer to int1
                # NOTE: Currently causes hangs on bool UTs for ROCm
                line += ".to(tl.int1)"
@ -2302,7 +2302,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
        indexing = self.indexing(index, dense_indexing=True, block_ptr=mode is None)

        # Guard against write-after-read corruption in triton.
-        # See # https://github.com/openai/triton/issues/1615
+        # See # https://github.com/triton-lang/triton/issues/1615
        # This triton bug means that a load which is broadcasted over multiple
        # warps may see the result of a store that happens later in the triton
        # program. The workaround is to add a barrier before storing, which
@ -3655,7 +3655,7 @@ class TritonKernel(SIMDKernel[TritonCSEVariable]):
        # when they are not constexpr. otherwise there may be a segfault
        # during launching the Inductor-compiled Triton kernel.
        # https://github.com/pytorch/pytorch/issues/120478#issuecomment-1962822307
-        # https://github.com/openai/triton/blob/231efe9ed2d200be0f69a07c298e4342b08efe3d/python/triton/runtime/jit.py#L384
+        # https://github.com/triton-lang/triton/blob/231efe9ed2d200be0f69a07c298e4342b08efe3d/python/triton/runtime/jit.py#L384
        for arg_num in equal_1_arg_indices(signature):  # type: ignore[index]
            triton_meta["constants"][signature[arg_num].name] = 1  # type: ignore[index,union-attr]

--- a/torch/_inductor/codegen/triton_utils.py
+++ b/torch/_inductor/codegen/triton_utils.py
@ -34,7 +34,7 @@ def should_unwrap_unspec_arg(name: str):
 def signature_of(arg: KernelArgType, *, size_dtype: Optional[str]) -> str:
    if isinstance(arg, TensorArg):
        # TODO: Remove fp8 special handling when Triton supports PyTorch fp8 dtypes.
-        # Related PR: https://github.com/openai/triton/pull/2279/
+        # Related PR: https://github.com/triton-lang/triton/pull/2279/
        if arg.dtype == torch.float8_e4m3fn:
            tye = "*fp8e4nv"
        elif arg.dtype == torch.float8_e5m2:
@ -184,7 +184,7 @@ def config_of(
    def is_aligned(x: KernelArgType, alignment: int, include_tensor: bool) -> bool:
        """
        Roughly follow triton code here:
-        https://github.com/openai/triton/blob/5282ed890d453e10b9ee30076ef89115dd197761/python/triton/runtime/jit.py#L208-L222
+        https://github.com/triton-lang/triton/blob/5282ed890d453e10b9ee30076ef89115dd197761/python/triton/runtime/jit.py#L208-L222
        """
        if isinstance(x, TensorArg):
            if include_tensor:
--- a/torch/_inductor/codegen/wrapper.py
+++ b/torch/_inductor/codegen/wrapper.py
@ -1985,7 +1985,7 @@ class PythonWrapperCodegen(CodeGen):
            # TODO(aakhundov): add None args to constants, too. currently, this
            # causes CUDA errors in test_aot_inductor.test_triton_kernel_with_none_input.
            # https://github.com/pytorch/pytorch/issues/120478#issuecomment-1962822307
-            # https://github.com/openai/triton/blob/231efe9ed2d200be0f69a07c298e4342b08efe3d/python/triton/runtime/jit.py#L384
+            # https://github.com/triton-lang/triton/blob/231efe9ed2d200be0f69a07c298e4342b08efe3d/python/triton/runtime/jit.py#L384
            "constants": {
                **constants,
                **dict.fromkeys(equal_to_1_args, 1),
--- a/torch/_inductor/config.py
+++ b/torch/_inductor/config.py
@ -1158,7 +1158,7 @@ class triton:
    # of registers being benchmarked.
    #
    # NOTE: triton will always report >0 register spills for kernels using sin/cos.
-    # (check this issue https://github.com/openai/triton/issues/1756 )
+    # (check this issue https://github.com/triton-lang/triton/issues/1756 )
    # So far we see a fixed 8 spilled registers for kernels using sin/cos.
    # Raise the threshold to 16 to be safe.
    # We should revisit this once we understand more of the source of register spills.
--- a/torch/_inductor/fx_passes/pad_mm.py
+++ b/torch/_inductor/fx_passes/pad_mm.py
@ -326,7 +326,7 @@ def should_exclude_padding_time(match: Match, arg_name: str) -> bool:
    if not fetch_fake_tensors(match, (arg_name,))[0].is_contiguous():
        return False

-    # TODO - see issue https://githpub.com/pytorch/pytorch/issues/128889
+    # TODO - see issue https://github.com/pytorch/pytorch/issues/128889
    # We would only able to completely plan these out if we were only doing
    # first dimension padding. non-first we would still need a copy
    # because these outputs are fixed dense.
--- a/torch/_inductor/fx_passes/quantization.py
+++ b/torch/_inductor/fx_passes/quantization.py
@ -2185,8 +2185,7 @@ def _register_qlinear_weight_prepack():
    # Step 2: register patterns from bmm
    # Linear might be decomposed into bmm when input dim exceeds 2 and not contiguous
    # refer to:
-    # https://github.com/pytorch/pytorch/blob/
-    # 80c07df659362a95da7cd4f3ec367abfdace38c4/torch/_decomp/decompositions.py#L3965-L3968
+    # https://github.com/pytorch/pytorch/blob/80c07df659362a95da7cd4f3ec367abfdace38c4/torch/_decomp/decompositions.py#L3965-L3968
    # in this case, we can convert it back to qlinear
    for dtype, with_bias, is_tensor_overload in itertools.product(
        [torch.float32, torch.bfloat16], [True, False], [True, False]
--- a/torch/_inductor/kernel/conv.py
+++ b/torch/_inductor/kernel/conv.py
@ -620,7 +620,7 @@ def convolution(
                    PADDING_W=padding[1],
                    GROUPS=groups,
                    # TODO(jansel): try unroll for bigger kernels once fixed:
-                    #               https://github.com/openai/triton/issues/1254
+                    #               https://github.com/triton-lang/triton/issues/1254
                    UNROLL=is_ones(kernel_shape),
                    ALLOW_TF32=torch.backends.cudnn.allow_tf32,
                    num_stages=cfg.num_stages,
@ -643,7 +643,7 @@ def convolution(
                    PADDING_W=padding[2],
                    GROUPS=groups,
                    # TODO(jansel): try unroll for bigger kernels once fixed:
-                    #               https://github.com/openai/triton/issues/1254
+                    #               https://github.com/triton-lang/triton/issues/1254
                    UNROLL=is_ones(kernel_shape),
                    ALLOW_TF32=torch.backends.cudnn.allow_tf32,
                    num_stages=cfg.num_stages,
--- a/torch/_inductor/kernel/mm_plus_mm.py
+++ b/torch/_inductor/kernel/mm_plus_mm.py
@ -134,7 +134,7 @@ def tuned_mm_plus_mm(mat1, mat2, mat3, mat4, *, layout=None):
        )
    ):
        # TODO(jansel): support different K values when this is fixed:
-        # https://github.com/openai/triton/issues/967
+        # https://github.com/triton-lang/triton/issues/967
        return lowerings[aten.add](
            lowerings[aten.mm](mat1, mat2), lowerings[aten.mm](mat3, mat4)
        )
@ -151,7 +151,7 @@ def tuned_mm_plus_mm(mat1, mat2, mat3, mat4, *, layout=None):

    if use_triton_template(layout1):
        for config in mm_configs():
-            # see https://github.com/openai/triton/issues/1298
+            # see https://github.com/triton-lang/triton/issues/1298
            # BLOCK_K = K causes llvm error
            if V.graph.sizevars.statically_known_lt(config.kwargs["BLOCK_K"], k1):
                mm_plus_mm_template.maybe_append_choice(
--- a/torch/_inductor/lowering.py
+++ b/torch/_inductor/lowering.py
@ -6092,7 +6092,7 @@ def div_mode(a, b, rounding_mode=None):
    both_boolean = is_boolean_type(a) and is_boolean_type(b)

    # floordiv and truncdiv need special handling for integer tensors on Triton,
-    # see the discussion at https://github.com/openai/triton/issues/605
+    # see the discussion at https://github.com/triton-lang/triton/issues/605
    if rounding_mode == "floor":
        assert not both_boolean, "floordiv operands can not be boolean at the same time"
        return floordiv(a, b) if both_integer else floor(div(a, b))
--- a/torch/_inductor/mkldnn_lowerings.py
+++ b/torch/_inductor/mkldnn_lowerings.py
@ -707,8 +707,8 @@ def register_onednn_fusion_ops():
            assert x_zp.get_numel() == 1, "x_zp is incompatible with oneDNN qlinear"

            # When channels less than 8, w_scale/w_zp is Pointwise instead of ConstantBuffer
-            # Refer to https://github.com/pytorch/pytorch/blob
-            # /f353d17755ed23b02924c962a86ff99a3405fe10/torch/_inductor/graph.py#L570-L577
+            # Refer to
+            # https://github.com/pytorch/pytorch/blob/f353d17755ed23b02924c962a86ff99a3405fe10/torch/_inductor/graph.py#L570-L577  # noqa: B950
            if w_zp is None:
                # If w_zp is None, then it's a dummy tensor created to denote the
                # absence of a zero point, and thus w is int8 symmetrically quantized.
@ -1018,8 +1018,8 @@ def register_onednn_fusion_ops():
                x_zp.realize()

            # When channels less than 8, w_scale/w_zp is Pointwise instead of ConstantBuffer
-            # Refer to https://github.com/pytorch/pytorch/blob
-            # /f353d17755ed23b02924c962a86ff99a3405fe10/torch/_inductor/graph.py#L570-L577
+            # Refer to
+            # https://github.com/pytorch/pytorch/blob/f353d17755ed23b02924c962a86ff99a3405fe10/torch/_inductor/graph.py#L570-L577  # noqa: B950
            w_scale.realize()
            w_zp.realize()
            if w_zp.get_dtype() != torch.int32 and isinstance(
--- a/torch/_inductor/runtime/triton_compat.py
+++ b/torch/_inductor/runtime/triton_compat.py
@ -44,7 +44,7 @@ if triton is not None:
            return (backend, arch)

    # In the latest triton, math functions were shuffled around into different modules:
-    # https://github.com/openai/triton/pull/3172
+    # https://github.com/triton-lang/triton/pull/3172
    try:
        from triton.language.extra import libdevice

--- a/torch/_inductor/runtime/triton_heuristics.py
+++ b/torch/_inductor/runtime/triton_heuristics.py
@ -1472,7 +1472,7 @@ class TritonCompileResult(CompileResult[CompiledKernel]):
                "metadata",
                *call_args,
            ]
-        else:  # args after CompiledKernel.launch_metadata: https://github.com/openai/triton/pull/3492
+        else:  # args after CompiledKernel.launch_metadata: https://github.com/triton-lang/triton/pull/3492
            # Getting the kernel launch args is extremely perf-sensitive.  Evaluating
            # `bin.launch_metadata` is relatively expensive, and returns None unless a
            # `launch_enter_hook` is installed.  So if we don't have that hook installed,
--- a/torch/_inductor/scheduler.py
+++ b/torch/_inductor/scheduler.py
@ -4560,7 +4560,7 @@ class Scheduler:
                    )
                    return False
            except CompilationError as e:
-                # workaround triton issue: https://github.com/openai/triton/issues/2151
+                # workaround triton issue: https://github.com/triton-lang/triton/issues/2151
                if "Loop-carried variable" in str(e):
                    fusion_log.debug(
                        "ComboKernel benchmark: return True because of loop-carried variable"
@ -4574,7 +4574,7 @@ class Scheduler:
        try:
            ms2, ms2_clone, _path2_list = self.benchmark_combo_kernel(subkernel_nodes)
        except CompilationError as e:
-            # workaround triton issue: https://github.com/openai/triton/issues/2151
+            # workaround triton issue: https://github.com/triton-lang/triton/issues/2151
            if "Loop-carried variable" in str(e):
                fusion_log.debug(
                    "ComboKernel benchmark: return True because of loop-carried variable"
--- a/torch/_inductor/utils.py
+++ b/torch/_inductor/utils.py
@ -390,7 +390,7 @@ def ceildiv(
 def _type_of(key: Optional[torch.dtype]) -> str:
    # Use the function here to get rid of dependencies on the Triton during the codegen.
    # Refer to Triton implementation here:
-    # https://github.com/openai/triton/blob/98b5945d2aef679e00ebca8e07c35c3658ec76de/python/triton/runtime/jit.py#L238
+    # https://github.com/triton-lang/triton/blob/98b5945d2aef679e00ebca8e07c35c3658ec76de/python/triton/runtime/jit.py#L238
    # `None` is nullptr.  Implicitly convert to *i8.
    if key is None:
        return "*i8"
@ -1981,7 +1981,7 @@ def get_device_tflops(dtype: torch.dtype) -> int:
    assert dtype in (torch.float16, torch.bfloat16, torch.float32)

    if inspect.signature(get_max_simd_tflops).parameters.get("clock_rate"):
-        # Triton API change in https://github.com/openai/triton/pull/2293
+        # Triton API change in https://github.com/triton-lang/triton/pull/2293
        from torch._utils_internal import max_clock_rate

        sm_clock = max_clock_rate()
--- a/torch/_numpy/_ndarray.py
+++ b/torch/_numpy/_ndarray.py
@ -435,7 +435,7 @@ class ndarray:
    def item(self, *args):
        # Mimic NumPy's implementation with three special cases (no arguments,
        # a flat index and a multi-index):
-        # https://github.com/numpy/numpy/blob/main/numpy/core/src/multiarray/methods.c#L702
+        # https://github.com/numpy/numpy/blob/main/numpy/_core/src/multiarray/methods.c#L702
        if args == ():
            return self.tensor.item()
        elif len(args) == 1:
--- a/torch/_tensor.py
+++ b/torch/_tensor.py
@ -1262,7 +1262,7 @@ class Tensor(torch._C.TensorBase):
        """Array view description for cuda tensors.

        See:
-        https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html
+        https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html
        """
        if has_torch_function_unary(self):
            # TODO mypy doesn't support @property, see: https://github.com/python/mypy/issues/6185
--- a/torch/_tensor_docs.py
+++ b/torch/_tensor_docs.py
@ -4163,9 +4163,9 @@ Unlike :meth:`~Tensor.expand`, this function copies the tensor's data.
 .. warning::

    :meth:`~Tensor.repeat` behaves differently from
-    `numpy.repeat <https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html>`_,
+    `numpy.repeat <https://numpy.org/doc/stable/reference/generated/numpy.repeat.html>`_,
    but is more similar to
-    `numpy.tile <https://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html>`_.
+    `numpy.tile <https://numpy.org/doc/stable/reference/generated/numpy.tile.html>`_.
    For the operator similar to `numpy.repeat`, see :func:`torch.repeat_interleave`.

 Args:
--- a/torch/_torch_docs.py
+++ b/torch/_torch_docs.py
@ -767,7 +767,7 @@ This function checks if :attr:`input` and :attr:`other` satisfy the condition:
 """
    + r"""
 elementwise, for all elements of :attr:`input` and :attr:`other`. The behaviour of this function is analogous to
-`numpy.allclose <https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html>`_
+`numpy.allclose <https://numpy.org/doc/stable/reference/generated/numpy.allclose.html>`_

 Args:
    input (Tensor): first tensor to compare
@ -13826,7 +13826,7 @@ Returns the indices of the buckets to which each value in the :attr:`input` belo
 boundaries of the buckets are set by :attr:`boundaries`. Return a new tensor with the same size
 as :attr:`input`. If :attr:`right` is False (default), then the left boundary is open. Note that
 this behavior is opposite the behavior of
-`numpy.digitize <https://docs.scipy.org/doc/numpy/reference/generated/numpy.digitize.html>`_.
+`numpy.digitize <https://numpy.org/doc/stable/reference/generated/numpy.digitize.html>`_.
 More formally, the returned index satisfies the following rules:

 .. list-table::
--- a/torch/_vmap_internals.py
+++ b/torch/_vmap_internals.py
@ -219,7 +219,7 @@ def _vmap(
    # The `allow_none_pass_through` argument is a temporary workaround may be removed.
    # Currently it enables us to wrap the call in `autograd.grad` to the autograd engine,
    # which may return None if any of the inputs are unused. See the issue discussing this:
-    # https://github.com/facebookresearch/functorch/issues/159.
+    # https://github.com/pytorch/functorch/issues/159.
    @functools.wraps(func)
    def wrapped(*args):
        _check_out_dims_is_int_or_int_tuple(out_dims, func)
--- a/torch/ao/quantization/quantizer/x86_inductor_quantizer.py
+++ b/torch/ao/quantization/quantizer/x86_inductor_quantizer.py
@ -703,8 +703,8 @@ class X86InductorQuantizer(Quantizer):
        # Once we've annotated the model with quantization configurations, we also need to annotate
        # the output of quantizable operations. For example, if we annotated `maxpool2d` to quantize its inputs,
        # we will quantize its output accordingly. This enables us to fuse the dq-operator-q into a quantized op.
-        # Refer to https://github.com/intel/intel-extension-for-pytorch/blob/
-        # 90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_recipe.py#L487
+        # Refer to
+        # https://github.com/intel/intel-extension-for-pytorch/blob/90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_recipe.py#L487  # noqa: B950

        self._annotate_output_for_int8_in_int8_out_pattern_entry(model)

@ -732,8 +732,8 @@ class X86InductorQuantizer(Quantizer):

        # Step2: Recipe to propagate annotation for patterns beside conv/linear.
        # Go through all the nodes from start to end.
-        # Recipe refer to https://github.com/intel/intel-extension-for-pytorch/blob/
-        # 90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_recipe.py#L538
+        # Recipe refer to
+        # https://github.com/intel/intel-extension-for-pytorch/blob/90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_recipe.py#L538  # noqa: B950

        self._annotate_propagation_quantizable_pattern_entry(
            model, quantization_config, filter_fn
@ -1381,9 +1381,9 @@ class X86InductorQuantizer(Quantizer):
    ) -> None:
        r"""
        Check and insert observer at output of node in int8_in_int8_out_ops if needed.
-        Recipe refers to https://github.com/intel/intel-extension-for-pytorch/blob/
-        90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_utils.py#L495
-        """
+        Recipe refers to
+        https://github.com/intel/intel-extension-for-pytorch/blob/90d19323d96afc53fcc22ba5a7bb3fb07fdd6c1c/intel_extension_for_pytorch/quantization/_utils.py#L495
+        """  # noqa: B950
        edge_or_node: tuple[Node, Node]
        if (node.target in int8_in_int8_out_ops) and (_is_any_annotated([node])):
            if node.target == torch.ops.aten.max_pool2d.default:
--- a/torch/csrc/PyInterpreter.cpp
+++ b/torch/csrc/PyInterpreter.cpp
@ -635,7 +635,7 @@ static c10::ArrayRef<T> get_set_cached_attr(
  // is also to <=5 elements, we don't need to reallocate.
  // Note: I tried removing this optimization and tripped ASAN
  // in a batchnorm kernel here:
-  // https://pipelinesghubeus21.actions.githubusercontent.com/mBh68xKhi8LyM7tp3vECvYXNFvuV4gyVGgmYCteuEZP9JH92QN/_apis/pipelines/1/runs/3373307/signedlogcontent/790?urlExpires=2023-09-15T21%3A13%3A51.4327798Z&urlSigningMethod=HMACV1&urlSignature=tDeX7ZqaARVU5NNwyr5yYqqkWq3A2j4z8FFdqYwGr0Q%3D
+  // https://pipelinesghubeus21.actions.githubusercontent.com/mBh68xKhi8LyM7tp3vECvYXNFvuV4gyVGgmYCteuEZP9JH92QN/_apis/pipelines/1/runs/3373307/signedlogcontent/790?urlExpires=2023-09-15T21%3A13%3A51.4327798Z&urlSigningMethod=HMACV1&urlSignature=tDeX7ZqaARVU5NNwyr5yYqqkWq3A2j4z8FFdqYwGr0Q%3D@lint-ignore
  // We should fix this instead.
  bool needs_resize = false;
  // We need to resize if:
--- a/torch/csrc/api/src/nn/modules/rnn.cpp
+++ b/torch/csrc/api/src/nn/modules/rnn.cpp
@ -21,7 +21,7 @@ using namespace torch::nn::utils::rnn;
 namespace torch::nn {

 /// These must line up with the CUDNN mode codes:
-/// https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnRNNMode_t
+/// https://docs.nvidia.com/deeplearning/cudnn/backend/latest/api/cudnn-adv-library.html#cudnnrnnmode-t
 enum class CuDNNMode { RNN_RELU = 0, RNN_TANH = 1, LSTM = 2, GRU = 3 };

 static CuDNNMode get_cudnn_mode_for_rnn(
--- a/torch/csrc/autograd/FunctionsManual.cpp
+++ b/torch/csrc/autograd/FunctionsManual.cpp
@ -891,8 +891,8 @@ Tensor logcumsumexp_backward(
    return grad;
  }

-  // Reference: https://github.com/tensorflow/tensorflow/blob/
-  // 2a5910906a0e0f3dbc186ff9db6386d81a63448c/tensorflow/python/ops/math_grad.py#L1832-L1863
+  // Reference:
+  // https://github.com/tensorflow/tensorflow/blob/2a5910906a0e0f3dbc186ff9db6386d81a63448c/tensorflow/python/ops/math_grad.py#L1832-L1863

  auto scalar_min = AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(
      at::ScalarType::BFloat16,
--- a/torch/csrc/jit/codegen/onednn/LlgaTensorImpl.h
+++ b/torch/csrc/jit/codegen/onednn/LlgaTensorImpl.h
@ -15,7 +15,8 @@ namespace torch::jit::fuser::onednn {
 // being created for each device. The device handle passed from PyTorch allows
 // oneDNN Graph implementation to work on the device specified by PyTorch, which
 // is currently CPU, so we only have one engine.
-// Ref: https://spec.oneapi.io/onednn-graph/latest/programming_model.html#engine
+// Ref:
+// https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onednn/source/graph/programming_model#engine
 struct Engine {
  // CPU engine singleton
  static dnnl::engine& getEngine();
--- a/torch/csrc/jit/codegen/onednn/README.md
+++ b/torch/csrc/jit/codegen/onednn/README.md
@ -1,5 +1,5 @@
 # Pytorch - oneDNN Graph API Bridge
-This is a PyTorch JIT graph fuser based on [oneDNN Graph API](https://spec.oneapi.io/onednn-graph/latest/programming_model.html), which provides a flexible API for aggressive fusion. Float & BFloat16 inference is supported. However, BFloat16 only performs well on Intel Xeon Cooper Lake platform & beyond, as they have native BFloat16 support. Also, currently, PyTorch has divergent AMP support in JIT & eager modes, so one should disable JIT AMP support & leverage eager mode AMP support to use BFloat16. Please refer to the BFloat16 example below.
+This is a PyTorch JIT graph fuser based on [oneDNN Graph API](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onednn/source/graph/programming_model), which provides a flexible API for aggressive fusion. Float & BFloat16 inference is supported. However, BFloat16 only performs well on Intel Xeon Cooper Lake platform & beyond, as they have native BFloat16 support. Also, currently, PyTorch has divergent AMP support in JIT & eager modes, so one should disable JIT AMP support & leverage eager mode AMP support to use BFloat16. Please refer to the BFloat16 example below.

 Currently, speedup is achieved only for static shapes, although we'd soon add dynamic-shape support. When oneDNN Graph is enabled, weights are cached, as they're constant during inference.

@ -29,7 +29,7 @@ We have registered optimization passes in the custom pre-passes set of PyTorch:

 ## Graph Executor
 During runtime execution of a (re-written) PyTorch JIT graph, oneDNN graph partitions will be dispatched to the oneDNN graph JIT variadic Operator.
-Inside the oneDNN graph JIT Op, input PyTorch tensors of each partition will be mapped to oneDNN graph tensors. The partition will then be [compiled](https://spec.oneapi.io/onednn-graph/latest/programming_model.html#partition) and [executed](https://spec.oneapi.io/onednn-graph/latest/programming_model.html#compiled-partition). The output oneDNN graph tensor will be mapped back to PyTorch tensors to be fed to the next operator on the PyTorch JIT graph.
+Inside the oneDNN graph JIT Op, input PyTorch tensors of each partition will be mapped to oneDNN graph tensors. The partition will then be [compiled](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onednn/source/graph/programming_model#partition) and [executed](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onednn/source/graph/programming_model#compiled-partition). The output oneDNN graph tensor will be mapped back to PyTorch tensors to be fed to the next operator on the PyTorch JIT graph.


 ## Tests
--- a/torch/csrc/jit/tensorexpr/expr.cpp
+++ b/torch/csrc/jit/tensorexpr/expr.cpp
@ -143,7 +143,7 @@ ExprHandle abs(const ExprHandle& v) {
 }

 // The default tanh is quite slow, use the Eigen version from here:
-// https://bitbucket.org/eigen/eigen/src/94875feeeeb9abe5509b314197da1991ba2070f5/Eigen/src/Core/MathFunctionsImpl.h#lines-26
+// https://github.com/TUW-VieVS/VieSchedpp/blob/master/Eigen/src/Core/MathFunctionsImpl.h#L26
 ExprHandle fast_tanh(const ExprHandle& v) {
  // TODO: use a dedicated bind-var to make sure v is not evaluated multiple
  // times. Clamp the input expression to [-9, 9]
@ -205,7 +205,7 @@ ExprHandle fast_sigmoid(const ExprHandle& x) {

 ExprHandle fast_log(const ExprHandle& v) {
  // this implementation is taken from sleef:
-  // https://github.com/shibatch/sleef/blob/master/src/libm/sleefsp.c#L1131
+  // https://github.com/shibatch/sleef/blob/master/src/libm/sleefsimdsp.c#L1277
  // to generate coefficients, this tool is provided
  // https://github.com/shibatch/sleef/blob/master/src/gencoef/gencoef.txt
  auto ilogb2kf = [](const ExprHandle& x) {
--- a/torch/csrc/lazy/core/cache.h
+++ b/torch/csrc/lazy/core/cache.h
@ -1,6 +1,6 @@
 /**
 * Cache utils in this file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/cache.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/cache.h
 */

 #pragma once
--- a/torch/csrc/lazy/core/metrics.h
+++ b/torch/csrc/lazy/core/metrics.h
@ -1,6 +1,6 @@
 /**
 * This file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/metrics.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/metrics.h
 */

 #pragma once
--- a/torch/csrc/lazy/core/multi_wait.h
+++ b/torch/csrc/lazy/core/multi_wait.h
@ -1,6 +1,6 @@
 /**
 * This file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/multi_wait.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/multi_wait.h
 */

 #pragma once
--- a/torch/csrc/lazy/core/thread_pool.h
+++ b/torch/csrc/lazy/core/thread_pool.h
@ -1,6 +1,6 @@
 /**
 * This file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/metrics.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/metrics.h
 */

 #pragma once
--- a/torch/csrc/lazy/core/unique.h
+++ b/torch/csrc/lazy/core/unique.h
@ -1,6 +1,6 @@
 /**
 * Unique in this file is adapted from PyTorch/XLA
- * https://github.com/pytorch/xla/blob/master/third_party/xla_client/unique.h
+ * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/unique.h
 */

 #pragma once
--- a/Show More
+++ b/Show More