mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
fixing stress test failure (#164353)
Summary: This diff fixes a stress test failure by adding a new binary echo4.py and modifying the existing echo1.py binary. The changes are made in both fbcode and xplat directories. The api_test.py file is updated to use the new echo4.py binary, and the BUCK file is updated to include the new binary. Test Plan: ``` buck test -j 18 'fbcode//mode/opt' fbcode//caffe2/test/distributed/elastic/multiprocessing:api_test -- --exact 'caffe2/test/distributed/elastic/multiprocessing:api_test - test_binary_redirect_and_tee (api_test.StartProcessesListAsBinaryTest)' --run-disabled --stress-runs 20 --record-results ``` ``` buck test -j 18 'fbcode//mode/opt' fbcode//caffe2/test/distributed/elastic/multiprocessing:api_test -- --exact 'caffe2/test/distributed/elastic/multiprocessing:api_test - test_binary (api_test.StartProcessesListAsBinaryTest)' --run-disabled --stress-runs 20 --record-results ``` https://www.internalfb.com/intern/testinfra/testrun/17732923648474906 https://www.internalfb.com/intern/testinfra/testrun/15481123834815653 Differential Revision: D83623694 Pull Request resolved: https://github.com/pytorch/pytorch/pull/164353 Approved by: https://github.com/d4l3k
This commit is contained in:
parent
3681312ce0
commit
9ffba8a2f9
|
|
@ -559,7 +559,7 @@ if not (TEST_WITH_DEV_DBG_ASAN or IS_WINDOWS or IS_MACOS):
|
|||
FAIL = 138
|
||||
pc = start_processes(
|
||||
name="echo",
|
||||
entrypoint=bin("echo1.py"),
|
||||
entrypoint=bin("echo4.py"),
|
||||
args={0: ("--exitcode", FAIL, "foo"), 1: ("--exitcode", 0, "bar")},
|
||||
envs={0: {"RANK": "0"}, 1: {"RANK": "1"}},
|
||||
logs_specs=DefaultLogsSpecs(
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@
|
|||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -24,6 +23,5 @@ if __name__ == "__main__":
|
|||
print(f"exit {exitcode} from {rank}", file=sys.stderr)
|
||||
sys.exit(exitcode)
|
||||
else:
|
||||
time.sleep(1000)
|
||||
print(f"{args.msg} stdout from {rank}")
|
||||
print(f"{args.msg} stderr from {rank}", file=sys.stderr)
|
||||
|
|
|
|||
29
test/distributed/elastic/multiprocessing/bin/echo4.py
Executable file
29
test/distributed/elastic/multiprocessing/bin/echo4.py
Executable file
|
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="test binary, exits with exitcode")
|
||||
parser.add_argument("--exitcode", type=int, default=0)
|
||||
parser.add_argument("msg", type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
rank = int(os.environ["RANK"])
|
||||
exitcode = args.exitcode
|
||||
if exitcode != 0:
|
||||
print(f"exit {exitcode} from {rank}", file=sys.stderr)
|
||||
sys.exit(exitcode)
|
||||
else:
|
||||
time.sleep(1000)
|
||||
print(f"{args.msg} stdout from {rank}")
|
||||
print(f"{args.msg} stderr from {rank}", file=sys.stderr)
|
||||
Loading…
Reference in New Issue
Block a user