From 90f82426b96597ca3705c56ee7a58658eb61b69b Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Wed, 26 Jun 2024 19:06:52 +0000 Subject: [PATCH] RS migration - trymerge to upload merge records to s3 (#129503) Uploads merge records to to ossci-raw-job-status (public) bucket instead of directly to rockset The runner used by trymerge is a GH runner, so it doesn't have access to s3. Instead, I save the record as a json and upload the json to s3 in a different step that runs after the aws credentials are configured. The role is defined [here](https://togithub.com/pytorch-labs/pytorch-gha-infra/pull/421) Pull Request resolved: https://github.com/pytorch/pytorch/pull/129503 Approved by: https://github.com/huydhn, https://github.com/ZainRizvi, https://github.com/malfet --- .github/scripts/trymerge.py | 82 +++++++++++++--------------------- .github/workflows/trymerge.yml | 17 +++++++ .gitignore | 1 + 3 files changed, 48 insertions(+), 52 deletions(-) diff --git a/.github/scripts/trymerge.py b/.github/scripts/trymerge.py index f32431343ce..bdb964ed354 100755 --- a/.github/scripts/trymerge.py +++ b/.github/scripts/trymerge.py @@ -1177,7 +1177,6 @@ class GitHubPR: # Finally, upload the record to Rockset. The list of pending and failed # checks are at the time of the merge save_merge_record( - collection=ROCKSET_MERGES_COLLECTION, comment_id=comment_id, pr_num=self.pr_num, owner=self.org, @@ -1193,10 +1192,8 @@ class GitHubPR: merge_base_sha=self.get_merge_base(), merge_commit_sha=merge_commit_sha, is_failed=False, - dry_run=dry_run, skip_mandatory_checks=skip_mandatory_checks, ignore_current=bool(ignore_current_checks), - workspace=ROCKSET_MERGES_WORKSPACE, ) else: print("Missing comment ID or PR number, couldn't upload to Rockset") @@ -1503,7 +1500,6 @@ def checks_to_markdown_bullets( @retries_decorator() def save_merge_record( - collection: str, comment_id: int, pr_num: int, owner: str, @@ -1519,59 +1515,44 @@ def save_merge_record( merge_base_sha: str, merge_commit_sha: str = "", is_failed: bool = False, - dry_run: bool = False, skip_mandatory_checks: bool = False, ignore_current: bool = False, error: str = "", - workspace: str = "commons", ) -> None: """ - This saves the merge records into Rockset, so we can query them (for fun and profit) + This saves the merge records as a json, which can later be uploaded to s3 """ - if dry_run: - # Decide not to save the record to Rockset if dry-run is set to not pollute - # the collection - return - try: - import rockset # type: ignore[import] + # Prepare the record to be written into Rockset + data = [ + { + "comment_id": comment_id, + "pr_num": pr_num, + "owner": owner, + "project": project, + "author": author, + "pending_checks": pending_checks, + "failed_checks": failed_checks, + "ignore_current_checks": ignore_current_checks, + "broken_trunk_checks": broken_trunk_checks, + "flaky_checks": flaky_checks, + "unstable_checks": unstable_checks, + "last_commit_sha": last_commit_sha, + "merge_base_sha": merge_base_sha, + "merge_commit_sha": merge_commit_sha, + "is_failed": is_failed, + "skip_mandatory_checks": skip_mandatory_checks, + "ignore_current": ignore_current, + "error": error, + # This is a unique identifier for the record for deduping purposes + # in rockset. Any unique string would work + "_id": f"{project}-{pr_num}-{comment_id}-{os.environ.get('GITHUB_RUN_ID')}", + } + ] + repo_root = Path(__file__).resolve().parent.parent.parent - # Prepare the record to be written into Rockset - data = [ - { - "comment_id": comment_id, - "pr_num": pr_num, - "owner": owner, - "project": project, - "author": author, - "pending_checks": pending_checks, - "failed_checks": failed_checks, - "ignore_current_checks": ignore_current_checks, - "broken_trunk_checks": broken_trunk_checks, - "flaky_checks": flaky_checks, - "unstable_checks": unstable_checks, - "last_commit_sha": last_commit_sha, - "merge_base_sha": merge_base_sha, - "merge_commit_sha": merge_commit_sha, - "is_failed": is_failed, - "skip_mandatory_checks": skip_mandatory_checks, - "ignore_current": ignore_current, - "error": error, - } - ] - - client = rockset.RocksetClient( - host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"] - ) - client.Documents.add_documents( - collection=collection, - data=data, - workspace=workspace, - ) - - except ModuleNotFoundError: - print("Rockset is missing, no record will be saved") - return + with open(repo_root / "merge_record.json", "w") as f: + json.dump(data, f) @retries_decorator(rc=[]) @@ -2388,7 +2369,6 @@ def main() -> None: # list of pending and failed checks here, but they are not really # needed at the moment save_merge_record( - collection=ROCKSET_MERGES_COLLECTION, comment_id=args.comment_id, pr_num=args.pr_num, owner=org, @@ -2403,11 +2383,9 @@ def main() -> None: last_commit_sha=pr.last_commit().get("oid", ""), merge_base_sha=pr.get_merge_base(), is_failed=True, - dry_run=args.dry_run, skip_mandatory_checks=args.force, ignore_current=args.ignore_current, error=str(e), - workspace=ROCKSET_MERGES_WORKSPACE, ) else: print("Missing comment ID or PR number, couldn't upload to Rockset") diff --git a/.github/workflows/trymerge.yml b/.github/workflows/trymerge.yml index db0b80b32fa..ad0988c732e 100644 --- a/.github/workflows/trymerge.yml +++ b/.github/workflows/trymerge.yml @@ -43,6 +43,7 @@ jobs: IGNORE_CURRENT: ${{ github.event.client_payload.ignore_current }} ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }} DRCI_BOT_KEY: ${{ secrets.DRCI_BOT_KEY }} + GITHUB_RUN_ID: ${{ github.run_id }} run: | set -x if [ -n "${REBASE}" ]; then @@ -84,6 +85,22 @@ jobs: set -x python3 .github/scripts/comment_on_pr.py "${PR_NUM}" "merge" + - name: configure aws credentials + uses: aws-actions/configure-aws-credentials@v3 + continue-on-error: true + with: + role-to-assume: arn:aws:iam::308535385114:role/upload_to_ossci_raw_job_status + aws-region: us-east-1 + + - name: Upload merge record to s3 + if: always() + continue-on-error: true + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: ossci-raw-job-status + s3-prefix: merges/${{ github.repository }}/${{ github.event.client_payload.pr_num }}/${{ github.event.client_payload.comment_id }}/${{ github.run_id }} + path: merge-record.json + # We want newer merge commands to supercede old ones concurrency: group: try-merge-${{ github.event.client_payload.pr_num }} diff --git a/.gitignore b/.gitignore index bfb3013c6d1..8a1d99053a1 100644 --- a/.gitignore +++ b/.gitignore @@ -129,6 +129,7 @@ env scripts/release_notes/*.json sccache-stats*.json lint.json +merge_record.json # These files get copied over on invoking setup.py torchgen/packaged/*