[Monitor] Enable non-perf linux test monitor (#142168)

# Overview
Enable monitorings for non-perf linux tests

# Other
- move monitoring step right before build artifact for mac_test.yml, notice this test is not enable monitoring now

Pull Request resolved: https://github.com/pytorch/pytorch/pull/142168
Approved by: https://github.com/huydhn, https://github.com/ZainRizvi
This commit is contained in:
Yang Wang 2024-12-11 01:10:40 +00:00 committed by PyTorch MergeBot
parent 393cf46f42
commit 2b105de2c1
8 changed files with 27 additions and 9 deletions

View File

@ -54,7 +54,7 @@ on:
since we are investigating the behaviour of the monitor script with different tests. since we are investigating the behaviour of the monitor script with different tests.
required: false required: false
type: boolean type: boolean
default: true default: false
secrets: secrets:
HUGGING_FACE_HUB_TOKEN: HUGGING_FACE_HUB_TOKEN:
required: false required: false

View File

@ -88,6 +88,14 @@ jobs:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
- name: Start monitoring script
id: monitor-script
if: ${{ !inputs.disable-monitor }}
continue-on-error: true
run: |
${CONDA_RUN} python3 -m tools.stats.monitor > usage_log.txt 2>&1 &
echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
- name: Download build artifacts - name: Download build artifacts
uses: ./.github/actions/download-build-artifacts uses: ./.github/actions/download-build-artifacts
with: with:
@ -107,14 +115,6 @@ jobs:
environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }} environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
pip-requirements-file: .github/requirements/pip-requirements-${{ runner.os }}.txt pip-requirements-file: .github/requirements/pip-requirements-${{ runner.os }}.txt
- name: Start monitoring script
id: monitor-script
if: ${{ !inputs.disable-monitor }}
continue-on-error: true
run: |
${CONDA_RUN} python3 -m tools.stats.monitor > usage_log.txt 2>&1 &
echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
- name: Parse ref - name: Parse ref
id: parse-ref id: parse-ref
run: .github/scripts/parse_ref.py run: .github/scripts/parse_ref.py

View File

@ -63,4 +63,6 @@ jobs:
docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }} docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }} test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha use-gha: anything-non-empty-to-use-gha
# disable monitor in perf tests for more investigation
disable-monitor: true
secrets: inherit secrets: inherit

View File

@ -124,6 +124,8 @@ jobs:
test-matrix: ${{ needs.linux-jammy-aarch64-py3_10-inductor-build.outputs.test-matrix }} test-matrix: ${{ needs.linux-jammy-aarch64-py3_10-inductor-build.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha use-gha: anything-non-empty-to-use-gha
timeout-minutes: 720 timeout-minutes: 720
# disable monitor in perf tests for more investigation
disable-monitor: true
secrets: inherit secrets: inherit

View File

@ -55,4 +55,6 @@ jobs:
# Same as the build job # Same as the build job
python-version: 3.9.12 python-version: 3.9.12
test-matrix: ${{ needs.macos-perf-py3-arm64-build.outputs.test-matrix }} test-matrix: ${{ needs.macos-perf-py3-arm64-build.outputs.test-matrix }}
# disable monitor in perf tests for more investigation
disable-monitor: true
secrets: inherit secrets: inherit

View File

@ -97,6 +97,8 @@ jobs:
test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }} test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha use-gha: anything-non-empty-to-use-gha
timeout-minutes: 720 timeout-minutes: 720
# disable monitor in perf tests for more investigation
disable-monitor: true
secrets: inherit secrets: inherit
@ -112,4 +114,6 @@ jobs:
test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }} test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha use-gha: anything-non-empty-to-use-gha
timeout-minutes: 720 timeout-minutes: 720
# disable monitor in perf tests for more investigation
disable-monitor: true
secrets: inherit secrets: inherit

View File

@ -116,6 +116,8 @@ jobs:
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }} test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha use-gha: anything-non-empty-to-use-gha
timeout-minutes: 720 timeout-minutes: 720
# disable monitor in perf tests for more investigation
disable-monitor: true
secrets: inherit secrets: inherit
linux-focal-cuda12_1-py3_10-gcc9-inductor-test-weekly: linux-focal-cuda12_1-py3_10-gcc9-inductor-test-weekly:
@ -130,6 +132,8 @@ jobs:
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }} test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha use-gha: anything-non-empty-to-use-gha
timeout-minutes: 1440 timeout-minutes: 1440
# disable monitor in perf tests for more investigation
disable-monitor: true
secrets: inherit secrets: inherit
linux-focal-cuda12_1-py3_10-gcc9-inductor-test: linux-focal-cuda12_1-py3_10-gcc9-inductor-test:
@ -144,4 +148,6 @@ jobs:
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }} test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha use-gha: anything-non-empty-to-use-gha
timeout-minutes: 720 timeout-minutes: 720
# disable monitor in perf tests for more investigation
disable-monitor: true
secrets: inherit secrets: inherit

View File

@ -104,6 +104,8 @@ jobs:
docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp.outputs.docker-image }} docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp.outputs.test-matrix }} test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha use-gha: anything-non-empty-to-use-gha
# disable monitor in smoke perf tests for more investigation
disable-monitor: true
secrets: inherit secrets: inherit
linux-jammy-cpu-py3_9-gcc11-periodic-dynamo-benchmarks-build: linux-jammy-cpu-py3_9-gcc11-periodic-dynamo-benchmarks-build: