mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/43883 Check the result of GCC coverage in OSS is reasonable and ready to ship. The amount of executable lines are not the same between `gcc` and `clang` because of the following reasons: * Lines following are counted in `clang` but not in `gcc`: 1. empty line or line with only “{” or “}” 3. some comments are counted in clang but not in gcc 5. `#define ...` -- not supported by gcc according to official documentation * Besides, a statement that explains to more than one line will be counted as only one executable line in gcc, but several lines in clang ## Advantage of `gcc` coverage 1. Much faster - code coverage tool runtime is onle **4 min** (*ammazzzing!!*) by `gcc`, compared to **3 hours!!** by `clang`, to analyze all the tests' artifacts 2. Use less disk - `Clang`'s artifacts will take as large as 170G, but `GCC` is 980M Besides, also update `README.md`. Test Plan: Compare the result in OSS `clang` and OSS `gcc` with the same command: ``` python oss_coverage.py --run-only atest test_nn.py --interested-folder=aten ``` ---- ## GCC **Summary** > time: 0:15:45 summary percentage: 44.85% **Report and Log** [File Coverage Report](P140825162) [Line Coverage Report](P140825196) [Log](P140825385) ------ ## CLANG **Summary** > time: 0:21:35 summary percentage: 44.08% **Report and Log** [File Coverage Report](P140825845) [Line Coverage Report](P140825923) [Log](P140825950) ---------- # Run all tests ``` # run all tests and get coverage over Pytorch python oss_coverage.py ``` **Summary** > time: 1:27:20. ( time to run tests: 1:23:33) summary percentage: 56.62% **Report and Log** [File Coverage Report](P140837175) [Log](P140837121) Reviewed By: malfet Differential Revision: D23416772 fbshipit-source-id: a6810fa4d8199690f10bd0a4f58a42ab2a22182b
205 lines
7.0 KiB
Python
205 lines
7.0 KiB
Python
import json
|
|
import os
|
|
import time
|
|
from typing import Any, Dict, List, Set, Tuple
|
|
|
|
from ..util.setting import JSON_FOLDER_BASE_DIR, TestList, TestPlatform, TestStatusType
|
|
from ..util.utils import (
|
|
check_compiler_type,
|
|
get_cov_type,
|
|
print_error,
|
|
print_time,
|
|
related_to_test_list,
|
|
)
|
|
from .parser.coverage_record import CoverageRecord
|
|
from .parser.gcov_coverage_parser import GcovCoverageParser
|
|
from .parser.llvm_coverage_parser import LlvmCoverageParser
|
|
from .print_report import file_oriented_report, line_oriented_report
|
|
|
|
|
|
# coverage_records: Dict[str, LineInfo] = dict()
|
|
covered_lines: Dict[str, Set[int]] = {}
|
|
uncovered_lines: Dict[str, Set[int]] = {}
|
|
tests_type: TestStatusType = {"success": set(), "partial": set(), "fail": set()}
|
|
|
|
|
|
def transform_file_name(
|
|
file_path: str, interested_folders: List[str], platform: TestPlatform
|
|
) -> str:
|
|
remove_patterns: Set[str] = {".DEFAULT.cpp", ".AVX.cpp", ".AVX2.cpp"}
|
|
for pattern in remove_patterns:
|
|
file_path = file_path.replace(pattern, "")
|
|
# if user has specifiled interested folder
|
|
if interested_folders:
|
|
for folder in interested_folders:
|
|
if folder in file_path:
|
|
return file_path[file_path.find(folder) :]
|
|
# remove pytorch base folder path
|
|
if platform == TestPlatform.OSS:
|
|
from package.oss.utils import get_pytorch_folder
|
|
|
|
pytorch_foler = get_pytorch_folder()
|
|
assert file_path.startswith(pytorch_foler)
|
|
file_path = file_path[len(pytorch_foler) + 1 :]
|
|
return file_path
|
|
|
|
|
|
def is_intrested_file(
|
|
file_path: str, interested_folders: List[str], platform: TestPlatform
|
|
):
|
|
ignored_patterns = ["cuda", "aten/gen_aten", "aten/aten_", "build/"]
|
|
if any([pattern in file_path for pattern in ignored_patterns]):
|
|
return False
|
|
|
|
# ignore files that are not belong to pytorch
|
|
if platform == TestPlatform.OSS:
|
|
from package.oss.utils import get_pytorch_folder
|
|
|
|
if not file_path.startswith(get_pytorch_folder()):
|
|
return False
|
|
# if user has specifiled interested folder
|
|
if interested_folders:
|
|
for folder in interested_folders:
|
|
intersted_folder_path = folder if folder.endswith("/") else f"{folder}/"
|
|
if intersted_folder_path in file_path:
|
|
return True
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
|
|
def get_json_obj(json_file: str) -> Tuple[Any, int]:
|
|
"""
|
|
Sometimes at the start of file llvm/gcov will complains "fail to find coverage data",
|
|
then we need to skip these lines
|
|
-- success read: 0 - this json file have the full json coverage information
|
|
-- partial success: 1 - this json file starts with some error prompt, but still have the coverage information
|
|
-- fail to read: 2 - this json file doesn't have any coverage information
|
|
"""
|
|
read_status = -1
|
|
with open(json_file) as f:
|
|
lines = f.readlines()
|
|
for line in lines:
|
|
try:
|
|
json_obj = json.loads(line)
|
|
except json.JSONDecodeError:
|
|
read_status = 1
|
|
continue
|
|
else:
|
|
if read_status == -1:
|
|
# not meet jsonDecoderError before, return success
|
|
read_status = 0
|
|
return (json_obj, read_status)
|
|
return None, 2
|
|
|
|
|
|
def parse_json(json_file: str) -> List[CoverageRecord]:
|
|
print("start parse:", json_file)
|
|
json_obj, read_status = get_json_obj(json_file)
|
|
if read_status == 0:
|
|
tests_type["success"].add(json_file)
|
|
elif read_status == 1:
|
|
tests_type["partial"].add(json_file)
|
|
else:
|
|
tests_type["fail"].add(json_file)
|
|
raise RuntimeError(
|
|
"Fail to do code coverage! Fail to load json file: ", json_file
|
|
)
|
|
cov_type = get_cov_type()
|
|
check_compiler_type(cov_type)
|
|
coverage_records: List[CoverageRecord] = []
|
|
if cov_type == "CLANG":
|
|
coverage_records = LlvmCoverageParser(json_obj).parse("fbcode")
|
|
# print(coverage_records)
|
|
elif cov_type == "GCC":
|
|
coverage_records = GcovCoverageParser(json_obj).parse()
|
|
|
|
return coverage_records
|
|
|
|
|
|
def parse_jsons(
|
|
test_list: TestList, interested_folders: List[str], platform: TestPlatform
|
|
) -> None:
|
|
g = os.walk(JSON_FOLDER_BASE_DIR)
|
|
|
|
for path, _, file_list in g:
|
|
for file_name in file_list:
|
|
if file_name.endswith(".json"):
|
|
# if compiler is clang, we only analyze related json / when compiler is gcc, we analyze all jsons
|
|
if get_cov_type() == "CLANG" and not related_to_test_list(
|
|
file_name, test_list
|
|
):
|
|
continue
|
|
json_file = os.path.join(path, file_name)
|
|
try:
|
|
coverage_records = parse_json(json_file)
|
|
except RuntimeError:
|
|
print_error("Fail to load json file: ", json_file)
|
|
continue
|
|
# collect information from each target's export file and merge them together:
|
|
update_coverage(coverage_records, interested_folders, platform)
|
|
|
|
|
|
def update_coverage(
|
|
coverage_records: List[CoverageRecord],
|
|
interested_folders: List[str],
|
|
platform: TestPlatform,
|
|
) -> None:
|
|
for item in coverage_records:
|
|
# extract information for the record
|
|
record = item.to_dict()
|
|
file_path = record["filepath"]
|
|
if not is_intrested_file(file_path, interested_folders, platform):
|
|
continue
|
|
covered_range = record["covered_lines"]
|
|
uncovered_range = record["uncovered_lines"]
|
|
# transform file name: remote/13223/caffe2/aten -> caffe2/aten
|
|
file_path = transform_file_name(file_path, interested_folders, platform)
|
|
|
|
# if file not exists, add it into dictionary
|
|
if file_path not in covered_lines:
|
|
covered_lines[file_path] = set()
|
|
if file_path not in uncovered_lines:
|
|
uncovered_lines[file_path] = set()
|
|
# update this file's covered and uncovered lines
|
|
if covered_range is not None:
|
|
covered_lines[file_path].update(covered_range)
|
|
if uncovered_range is not None:
|
|
uncovered_lines[file_path].update(uncovered_range)
|
|
|
|
|
|
def update_set() -> None:
|
|
for file_name in covered_lines:
|
|
# difference_update
|
|
uncovered_lines[file_name].difference_update(covered_lines[file_name])
|
|
|
|
|
|
def summarize_jsons(
|
|
test_list: TestList,
|
|
interested_folders: List[str],
|
|
coverage_only: List[str],
|
|
platform: TestPlatform,
|
|
program_start_time: float,
|
|
) -> None:
|
|
start_time = time.time()
|
|
parse_jsons(test_list, interested_folders, platform)
|
|
update_set()
|
|
line_oriented_report(
|
|
test_list,
|
|
tests_type,
|
|
interested_folders,
|
|
coverage_only,
|
|
covered_lines,
|
|
uncovered_lines,
|
|
)
|
|
file_oriented_report(
|
|
test_list,
|
|
tests_type,
|
|
interested_folders,
|
|
coverage_only,
|
|
program_start_time,
|
|
covered_lines,
|
|
uncovered_lines,
|
|
)
|
|
print_time("summary jsons take time: ", start_time)
|