From 68954fe89735baf5991a1596734493e3feee1598 Mon Sep 17 00:00:00 2001 From: Alban Desmaison Date: Thu, 5 Nov 2020 06:40:59 -0800 Subject: [PATCH] Add release note scripts (#47360) Summary: First commit contains the initial code from Richard's branch. Second commit are the changes that I made during the writing process Third commit is the update to support category/topic pair for each commit Pull Request resolved: https://github.com/pytorch/pytorch/pull/47360 Reviewed By: ejguan Differential Revision: D24741003 Pulled By: albanD fbshipit-source-id: d0fcc6765968dc1732d8a515688d11372c7e653d --- scripts/release_notes/categorize.py | 134 +++++++++++++ scripts/release_notes/commitlist.py | 181 ++++++++++++++++++ scripts/release_notes/common.py | 196 ++++++++++++++++++++ scripts/release_notes/requirements.txt | 1 + scripts/release_notes/test_release_notes.py | 45 +++++ 5 files changed, 557 insertions(+) create mode 100644 scripts/release_notes/categorize.py create mode 100644 scripts/release_notes/commitlist.py create mode 100644 scripts/release_notes/common.py create mode 100644 scripts/release_notes/requirements.txt create mode 100644 scripts/release_notes/test_release_notes.py diff --git a/scripts/release_notes/categorize.py b/scripts/release_notes/categorize.py new file mode 100644 index 00000000000..b72eb9094b7 --- /dev/null +++ b/scripts/release_notes/categorize.py @@ -0,0 +1,134 @@ +import json +import argparse +import os +import textwrap +from common import dict_to_features, categories, topics, get_features, CommitDataCache +from commitlist import CommitList + +class Categorizer: + def __init__(self, path, category='Uncategorized'): + self.cache = CommitDataCache() + self.commits = CommitList.from_existing(path) + + # Special categories: 'Uncategorized' + # All other categories must be real + self.category = category + + def categorize(self): + commits = self.commits.filter(self.category) + i = 0 + while i < len(commits): + cur_commit = commits[i] + next_commit = commits[i + 1] if i + 1 < len(commits) else None + jump_to = self.handle_commit(cur_commit, i + 1, len(commits), commits) + + # Increment counter + if jump_to is not None: + i = jump_to + elif next_commit is None: + i = len(commits) + else: + i = commits.index(next_commit) + + def features(self, commit): + return self.cache.get(commit.commit_hash) + + def potential_reverts_of(self, commit, commits): + if 'Updating submodules' in commit.title: + return [] + index = commits.index(commit) + # -8 to remove the (#35011) + cleaned_title = commit.title[:-10] + # NB: the index + 2 is sketch + return {(index + 2 + delta): cand for delta, cand in enumerate(commits[index + 1:]) + if cleaned_title in cand.title and + commit.commit_hash != cand.commit_hash} + + def handle_commit(self, commit, i, total, commits): + potential_reverts = self.potential_reverts_of(commit, commits) + if potential_reverts: + potential_reverts = f'!!!POTENTIAL REVERTS!!!: {potential_reverts}' + else: + potential_reverts = "" + + features = self.features(commit) + + breaking_alarm = "" + if 'topic: bc-breaking' in features.labels: + breaking_alarm += "!!!!!! BC BREAKING !!!!!!" + + if 'module: deprecation' in features.labels: + breaking_alarm += "!!!!!! DEPRECATION !!!!!!" + + os.system('clear') + view = textwrap.dedent(f'''\ +[{i}/{total}] +================================================================================ +{features.title} + +{features.body} + +Files changed: {features.files_changed} + +Labels: {features.labels} + +{potential_reverts} {breaking_alarm} + +Current category: {commit.category} + +Select from: {', '.join(categories)} + + ''') + print(view) + cat_choice = None + while cat_choice is None: + value = input('category> ').strip() + if len(value) == 0: + cat_choice = commit.category + continue + choices = [cat for cat in categories + if cat.startswith(value)] + if len(choices) != 1: + print(f'Possible matches: {choices}, try again') + continue + cat_choice = choices[0] + print(f'\nSelected: {cat_choice}') + print(f'\nCurrent topic: {commit.topic}') + print(f'''Select from: {', '.join(topics)}''') + topic_choice = None + while topic_choice is None: + value = input('topic> ').strip() + if len(value) == 0: + topic_choice = commit.topic + continue + choices = [cat for cat in topics + if cat.startswith(value)] + if len(choices) != 1: + print(f'Possible matches: {choices}, try again') + continue + topic_choice = choices[0] + print(f'\nSelected: {topic_choice}') + self.update_commit(commit, cat_choice, topic_choice) + return None + + def update_commit(self, commit, category, topic): + assert category in categories + assert topic in topics + commit.category = category + commit.topic = topic + self.commits.write_to_disk() + +def main(): + parser = argparse.ArgumentParser(description='Tool to help categorize commits') + parser.add_argument('--category', type=str, default='Uncategorized', + help='Which category to filter by. "Uncategorized", None, or a category name') + parser.add_argument('--file', help='The location of the commits CSV', + default='results/commitlist.csv') + + args = parser.parse_args() + categorizer = Categorizer(args.file, args.category) + categorizer.categorize() + + +if __name__ == '__main__': + main() diff --git a/scripts/release_notes/commitlist.py b/scripts/release_notes/commitlist.py new file mode 100644 index 00000000000..fda7c913add --- /dev/null +++ b/scripts/release_notes/commitlist.py @@ -0,0 +1,181 @@ +import argparse +from common import run, topics +from collections import namedtuple, defaultdict +import os +import csv +import pprint +from common import CommitDataCache +import re + + +""" +Example Usages + +Create a new commitlist for consumption by categorize.py. +Said commitlist contains commits between v1.5.0 and f5bc91f851. + + python commitlist.py --create_new tags/v1.5.0 f5bc91f851 + +Update the existing commitlist to commit bfcb687b9c. + + python commitlist.py --update_to bfcb687b9c + +""" + +class Commit: + def __init__(self, commit_hash, category, topic, title): + self.commit_hash = commit_hash + self.category = category + self.topic = topic + self.title = title + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + return self.commit_hash == other.commit_hash and \ + self.category == other.category and \ + self.topic == other.topic and \ + self.title == other.title + + def __repr__(self): + return f'Commit({self.commit_hash}, {self.category}, {self.topic}, {self.title})' + +class CommitList: + # NB: Private ctor. Use `from_existing` or `create_new`. + def __init__(self, path, commits): + self.path = path + self.commits = commits + + @staticmethod + def from_existing(path): + commits = CommitList.read_from_disk(path) + return CommitList(path, commits) + + @staticmethod + def create_new(path, base_version, new_version): + if os.path.exists(path): + raise ValueError('Attempted to create a new commitlist but one exists already!') + commits = CommitList.get_commits_between(base_version, new_version) + return CommitList(path, commits) + + @staticmethod + def read_from_disk(path): + with open(path) as csvfile: + reader = csv.reader(csvfile) + rows = list(row for row in reader) + assert all(len(row) >= 4 for row in rows) + return [Commit(*row[:4]) for row in rows] + + def write_to_disk(self): + path = self.path + rows = self.commits + with open(path, 'w') as csvfile: + writer = csv.writer(csvfile) + for commit in rows: + writer.writerow([commit.commit_hash, commit.category, commit.topic, commit.title]) + + @staticmethod + def get_commits_between(base_version, new_version): + cmd = f'git merge-base {base_version} {new_version}' + rc, merge_base, _ = run(cmd) + assert rc == 0 + + # Returns a list of something like + # b33e38ec47 Allow a higher-precision step type for Vec256::arange (#34555) + cmd = f'git log --reverse --oneline {merge_base}..{new_version}' + rc, commits, _ = run(cmd) + assert rc == 0 + + log_lines = commits.split('\n') + hashes, titles = zip(*[log_line.split(' ', 1) for log_line in log_lines]) + return [Commit(commit_hash, 'Uncategorized', 'Untopiced', title) for commit_hash, title in zip(hashes, titles)] + + def filter(self, *, category=None, topic=None): + commits = self.commits + if category is not None: + commits = [commit for commit in commits if commit.category == category] + if topic is not None: + commits = [commit for commit in commits if commit.topic == topic] + return commits + + def update_to(self, new_version): + last_hash = self.commits[-1].commit_hash + new_commits = CommitList.get_commits_between(last_hash, new_version) + self.commits += new_commits + + def stat(self): + counts = defaultdict(lambda: defaultdict(int)) + for commit in self.commits: + counts[commit.category][commit.topic] += 1 + return counts + + +def create_new(path, base_version, new_version): + commits = CommitList.create_new(path, base_version, new_version) + commits.write_to_disk() + +def update_existing(path, new_version): + commits = CommitList.from_existing(path) + commits.update_to(new_version) + commits.write_to_disk() + +def to_markdown(commit_list, category): + def cleanup_title(commit): + match = re.match('(.*) \(#\d+\)', commit.title) + if match is None: + return commit.title + return match.group(1) + + cdc = CommitDataCache() + lines = [f'\n## {category}\n'] + for topic in topics: + lines.append(f'### {topic}\n') + commits = commit_list.filter(category=category, topic=topic) + for commit in commits: + result = cleanup_title(commit) + maybe_pr_number = cdc.get(commit.commit_hash).pr_number + if maybe_pr_number is None: + result = f'- {result} ({commit.commit_hash})\n' + else: + result = f'- {result} ([#{maybe_pr_number}](https://github.com/pytorch/pytorch/pull/{maybe_pr_number}))\n' + lines.append(result) + return lines + +def main(): + parser = argparse.ArgumentParser(description='Tool to create a commit list') + + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--create_new', nargs=2) + group.add_argument('--update_to') + group.add_argument('--stat', action='store_true') + group.add_argument('--export_markdown', action='store_true') + + parser.add_argument('--path', default='results/commitlist.csv') + args = parser.parse_args() + + if args.create_new: + create_new(args.path, args.create_new[0], args.create_new[1]) + return + if args.update_to: + update_existing(args.path, args.update_to) + return + if args.stat: + commits = CommitList.from_existing(args.path) + stats = commits.stat() + pprint.pprint(stats) + return + if args.export_markdown: + commits = CommitList.from_existing(args.path) + categories = list(commits.stat().keys()) + lines = [] + for category in categories: + lines += to_markdown(commits, category) + filename = f'results/result.md' + os.makedirs(os.path.dirname(filename), exist_ok=True) + with open(filename, 'w') as f: + f.writelines(lines) + return + assert False + +if __name__ == '__main__': + main() diff --git a/scripts/release_notes/common.py b/scripts/release_notes/common.py new file mode 100644 index 00000000000..4312d71b544 --- /dev/null +++ b/scripts/release_notes/common.py @@ -0,0 +1,196 @@ +from collections import namedtuple +from os.path import expanduser +import locale +import subprocess +import re +import requests +import os +import json + +categories = [ + 'Uncategorized', + 'distributed', + 'mobile', + 'jit', + 'visualization', + 'onnx', + 'caffe2', + 'quantization', + 'amd', + 'benchmark', + 'profiler', + 'dispatcher', + 'releng', + 'fx', + 'code_coverage', + 'vulkan', + 'skip', + 'cpp_frontend', + 'python_frontend', + 'complex_frontend', + 'vmap_frontend', + 'autograd_frontend', + 'build_frontend', + 'memory_format_frontend', + 'foreach_frontend', +] + +topics = [ + 'bc_breaking', + 'deprecations', + 'new_features', + 'improvements', + 'bug_fixes', + 'performance', + 'docs', + 'devs', + 'Untopiced', +] + + +Features = namedtuple('Features', [ + 'title', + 'body', + 'pr_number', + 'files_changed', + 'labels', +]) + + +def dict_to_features(dct): + return Features( + title=dct['title'], + body=dct['body'], + pr_number=dct['pr_number'], + files_changed=dct['files_changed'], + labels=dct['labels']) + + +def features_to_dict(features): + return dict(features._asdict()) + + +def run(command): + """Returns (return-code, stdout, stderr)""" + p = subprocess.Popen(command, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, shell=True) + output, err = p.communicate() + rc = p.returncode + enc = locale.getpreferredencoding() + output = output.decode(enc) + err = err.decode(enc) + return rc, output.strip(), err.strip() + + +def commit_body(commit_hash): + cmd = f'git log -n 1 --pretty=format:%b {commit_hash}' + ret, out, err = run(cmd) + return out if ret == 0 else None + + +def commit_title(commit_hash): + cmd = f'git log -n 1 --pretty=format:%s {commit_hash}' + ret, out, err = run(cmd) + return out if ret == 0 else None + + +def commit_files_changed(commit_hash): + cmd = f'git diff-tree --no-commit-id --name-only -r {commit_hash}' + ret, out, err = run(cmd) + return out.split('\n') if ret == 0 else None + + +def parse_pr_number(body, commit_hash, title): + regex = r'Pull Request resolved: https://github.com/pytorch/pytorch/pull/([0-9]+)' + matches = re.findall(regex, body) + if len(matches) == 0: + if 'revert' not in title.lower() and 'updating submodules' not in title.lower(): + print(f'[{commit_hash}: {title}] Could not parse PR number, ignoring PR') + return None + if len(matches) > 1: + print(f'[{commit_hash}: {title}] Got two PR numbers, using the first one') + return matches[0] + return matches[0] + + +def get_ghstack_token(): + pattern = 'github_oauth = (.*)' + with open(expanduser('~/.ghstackrc'), 'r+') as f: + config = f.read() + matches = re.findall(pattern, config) + if len(matches) == 0: + raise RuntimeError("Can't find a github oauth token") + return matches[0] + +token = get_ghstack_token() +headers = {"Authorization": f"token {token}"} + +def run_query(query): + request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers) + if request.status_code == 200: + return request.json() + else: + raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query)) + + +def gh_labels(pr_number): + query = f""" + {{ + repository(owner: "pytorch", name: "pytorch") {{ + pullRequest(number: {pr_number}) {{ + labels(first: 10) {{ + edges {{ + node {{ + name + }} + }} + }} + }} + }} + }} + """ + query = run_query(query) + edges = query['data']['repository']['pullRequest']['labels']['edges'] + return [edge['node']['name'] for edge in edges] + + +def get_features(commit_hash, return_dict=False): + title, body, files_changed = ( + commit_title(commit_hash), + commit_body(commit_hash), + commit_files_changed(commit_hash)) + pr_number = parse_pr_number(body, commit_hash, title) + labels = [] + if pr_number is not None: + labels = gh_labels(pr_number) + result = Features(title, body, pr_number, files_changed, labels) + if return_dict: + return features_to_dict(result) + return result + +class CommitDataCache: + def __init__(self, path='results/data.json'): + self.path = path + self.data = {} + if os.path.exists(path): + self.data = self.read_from_disk() + + def get(self, commit): + if commit not in self.data.keys(): + # Fetch and cache the data + self.data[commit] = get_features(commit) + self.write_to_disk() + return self.data[commit] + + def read_from_disk(self): + with open(self.path, 'r') as f: + data = json.load(f) + data = {commit: dict_to_features(dct) + for commit, dct in data.items()} + return data + + def write_to_disk(self): + data = {commit: features._asdict() for commit, features in self.data.items()} + with open(self.path, 'w') as f: + json.dump(data, f) + diff --git a/scripts/release_notes/requirements.txt b/scripts/release_notes/requirements.txt new file mode 100644 index 00000000000..945b116ad3c --- /dev/null +++ b/scripts/release_notes/requirements.txt @@ -0,0 +1 @@ +PyGithub diff --git a/scripts/release_notes/test_release_notes.py b/scripts/release_notes/test_release_notes.py new file mode 100644 index 00000000000..898db48c292 --- /dev/null +++ b/scripts/release_notes/test_release_notes.py @@ -0,0 +1,45 @@ +import unittest +import tempfile +from commitlist import CommitList + +class TestCommitList(unittest.TestCase): + def test_create_new(self): + with tempfile.TemporaryDirectory() as tempdir: + commit_list_path = f'{tempdir}/commitlist.csv' + commit_list = CommitList.create_new(commit_list_path, 'v1.5.0', '7543e7e558') + self.assertEqual(len(commit_list.commits), 2143) + self.assertEqual(commit_list.commits[0].commit_hash, '7335f079ab') + self.assertTrue(commit_list.commits[0].title.startswith('[pt][quant] qmul and qadd')) + self.assertEqual(commit_list.commits[-1].commit_hash, '7543e7e558') + self.assertTrue(commit_list.commits[-1].title.startswith('Migrate minall, max, maxall')) + + def test_read_write(self): + with tempfile.TemporaryDirectory() as tempdir: + commit_list_path = f'{tempdir}/commitlist.csv' + initial = CommitList.create_new(commit_list_path, 'v1.5.0', '7543e7e558') + initial.write_to_disk() + + expected = CommitList.from_existing(commit_list_path) + expected.commits[-2].category = 'foobar' + expected.write_to_disk() + + commit_list = CommitList.from_existing(commit_list_path) + for commit, expected in zip(commit_list.commits, expected.commits): + self.assertEqual(commit, expected) + + def test_update_to(self): + with tempfile.TemporaryDirectory() as tempdir: + commit_list_path = f'{tempdir}/commitlist.csv' + initial = CommitList.create_new(commit_list_path, 'v1.5.0', '7543e7e558') + initial.commits[-2].category = 'foobar' + self.assertEqual(len(initial.commits), 2143) + initial.write_to_disk() + + commit_list = CommitList.from_existing(commit_list_path) + commit_list.update_to('5702a28b26') + self.assertEqual(len(commit_list.commits), 2143 + 4) + self.assertEqual(commit_list.commits[-5], initial.commits[-1]) + + +if __name__ == '__main__': + unittest.main()