diff --git a/.github/workflows/_link_check.yml b/.github/workflows/_link_check.yml index e1c155289c5..7219a868580 100644 --- a/.github/workflows/_link_check.yml +++ b/.github/workflows/_link_check.yml @@ -7,29 +7,25 @@ on: ref: type: string required: true - run-url-lint: - type: boolean - required: false - default: false jobs: lint-urls: - if: ${{ inputs.run-url-lint }} + if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-url-lint') }} uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: timeout: 120 runner: ${{ inputs.runner }}linux.2xlarge - docker-image: pytorch-linux-focal-linter + docker-image: ci-image:pytorch-linux-focal-linter fetch-depth: 0 submodules: false ref: ${{ inputs.ref }} script: | ./scripts/lint_urls.sh $( - { [ "${{ github.event_name }}" = "pull_request" ] \ - && git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \ - || \ - { [ "${{ github.event_name }}" = "push" ] \ - && git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; } + if [ "${{ github.event_name }}" = "pull_request" ]; then + echo "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}" + else + echo "${{ github.event.before }}" "${{ github.sha }}" + fi ) || { echo echo "URL lint failed." @@ -44,17 +40,17 @@ jobs: with: timeout: 60 runner: ${{ inputs.runner }}linux.2xlarge - docker-image: pytorch-linux-focal-linter + docker-image: ci-image:pytorch-linux-focal-linter fetch-depth: 0 submodules: false ref: ${{ inputs.ref }} script: | ./scripts/lint_xrefs.sh $( - { [ "${{ github.event_name }}" = "pull_request" ] \ - && git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \ - || \ - { [ "${{ github.event_name }}" = "push" ] \ - && git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; } + if [ "${{ github.event_name }}" = "pull_request" ]; then + echo "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}" + else + echo "${{ github.event.before }}" "${{ github.sha }}" + fi ) || { echo echo "Xref lint failed." diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index bdfa04861e2..9408365025d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -283,6 +283,15 @@ jobs: # All we need to see is that it passes python3 torch/utils/collect_env.py + link-check: + name: Link checks + needs: get-label-type + uses: ./.github/workflows/_link_check.yml + with: + runner: ${{ needs.get-label-type.outputs.label-type }} + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + secrets: inherit + concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} cancel-in-progress: true diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 89d8e2e745e..70fea3c8cc1 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -34,7 +34,6 @@ jobs: with: runner: ${{ needs.get-label-type.outputs.label-type }} ref: ${{ github.sha }} - run-url-lint: true secrets: inherit docs-build: diff --git a/scripts/lint_urls.sh b/scripts/lint_urls.sh index 48db5285907..c8f4e183e17 100755 --- a/scripts/lint_urls.sh +++ b/scripts/lint_urls.sh @@ -61,7 +61,7 @@ while IFS=: read -r filepath url; do while [ "$(running_jobs)" -ge "$max_jobs" ]; do sleep 1 done - done < <( +done < <( pattern='(?!.*@lint-ignore)(?\")]*[<>\{\}\$])[^[:space:]<>")\[\]\\|]+' excludes=( ':(exclude,glob)**/.*' @@ -73,12 +73,17 @@ while IFS=: read -r filepath url; do ':(exclude,glob)**/third-party/**' ':(exclude,glob)**/third_party/**' ) - if [ $# -gt 0 ]; then - paths=("$@") + if [ $# -eq 2 ]; then + for filename in $(git diff --name-only --unified=0 "$1...$2"); do + git diff --unified=0 "$1...$2" -- "$filename" "${excludes[@]}" \ + | grep -E '^\+' \ + | grep -Ev '^\+\+\+' \ + | perl -nle 'print for m#'"$pattern"'#g' \ + | sed 's|^|'"$filename"':|' + done else - paths=('*') - fi - git --no-pager grep --no-color -I -P -o "$pattern" -- "${paths[@]}" "${excludes[@]}" \ + git --no-pager grep --no-color -I -P -o "$pattern" -- . "${excludes[@]}" + fi \ | sed -E 's/[^/[:alnum:]]+$//' \ | grep -Ev '://(0\.0\.0\.0|127\.0\.0\.1|localhost)([:/])' \ | grep -Ev '://[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' \ diff --git a/scripts/lint_xrefs.sh b/scripts/lint_xrefs.sh index 7a26211b010..8c36d68ba09 100755 --- a/scripts/lint_xrefs.sh +++ b/scripts/lint_xrefs.sh @@ -29,12 +29,17 @@ done < <( ':(exclude,glob)**/third-party/**' ':(exclude,glob)**/third_party/**' ) - if [ $# -gt 0 ]; then - paths=("$@") + if [ $# -eq 2 ]; then + for filename in $(git diff --name-only --unified=0 "$1...$2"); do + git diff --unified=0 "$1...$2" -- "$filename" "${excludes[@]}" \ + | grep -E '^\+' \ + | grep -Ev '^\+\+\+' \ + | perl -nle 'print for m#'"$pattern"'#g' \ + | sed 's|^|'"$filename"':|' + done else - paths=('*') - fi - git --no-pager grep --no-color -I -P -o "$pattern" -- "${paths[@]}" "${excludes[@]}" \ + git --no-pager grep --no-color -I -P -o "$pattern" -- . "${excludes[@]}" + fi \ | grep -Ev 'https?://' \ | sed -E \ -e 's#([^:]+):\[[^]]+\]\(([^)]+)\)#\1:\2#' \