Add release note scripts (#47360)

Summary:
First commit contains the initial code from Richard's branch.
Second commit are the changes that I made during the writing process
Third commit is the update to support category/topic pair for each commit

Pull Request resolved: https://github.com/pytorch/pytorch/pull/47360

Reviewed By: ejguan

Differential Revision: D24741003

Pulled By: albanD

fbshipit-source-id: d0fcc6765968dc1732d8a515688d11372c7e653d
This commit is contained in:
Alban Desmaison 2020-11-05 06:40:59 -08:00 committed by Facebook GitHub Bot
parent a4ba018e57
commit 68954fe897
5 changed files with 557 additions and 0 deletions

View File

@ -0,0 +1,134 @@
import json
import argparse
import os
import textwrap
from common import dict_to_features, categories, topics, get_features, CommitDataCache
from commitlist import CommitList
class Categorizer:
def __init__(self, path, category='Uncategorized'):
self.cache = CommitDataCache()
self.commits = CommitList.from_existing(path)
# Special categories: 'Uncategorized'
# All other categories must be real
self.category = category
def categorize(self):
commits = self.commits.filter(self.category)
i = 0
while i < len(commits):
cur_commit = commits[i]
next_commit = commits[i + 1] if i + 1 < len(commits) else None
jump_to = self.handle_commit(cur_commit, i + 1, len(commits), commits)
# Increment counter
if jump_to is not None:
i = jump_to
elif next_commit is None:
i = len(commits)
else:
i = commits.index(next_commit)
def features(self, commit):
return self.cache.get(commit.commit_hash)
def potential_reverts_of(self, commit, commits):
if 'Updating submodules' in commit.title:
return []
index = commits.index(commit)
# -8 to remove the (#35011)
cleaned_title = commit.title[:-10]
# NB: the index + 2 is sketch
return {(index + 2 + delta): cand for delta, cand in enumerate(commits[index + 1:])
if cleaned_title in cand.title and
commit.commit_hash != cand.commit_hash}
def handle_commit(self, commit, i, total, commits):
potential_reverts = self.potential_reverts_of(commit, commits)
if potential_reverts:
potential_reverts = f'!!!POTENTIAL REVERTS!!!: {potential_reverts}'
else:
potential_reverts = ""
features = self.features(commit)
breaking_alarm = ""
if 'topic: bc-breaking' in features.labels:
breaking_alarm += "!!!!!! BC BREAKING !!!!!!"
if 'module: deprecation' in features.labels:
breaking_alarm += "!!!!!! DEPRECATION !!!!!!"
os.system('clear')
view = textwrap.dedent(f'''\
[{i}/{total}]
================================================================================
{features.title}
{features.body}
Files changed: {features.files_changed}
Labels: {features.labels}
{potential_reverts} {breaking_alarm}
Current category: {commit.category}
Select from: {', '.join(categories)}
''')
print(view)
cat_choice = None
while cat_choice is None:
value = input('category> ').strip()
if len(value) == 0:
cat_choice = commit.category
continue
choices = [cat for cat in categories
if cat.startswith(value)]
if len(choices) != 1:
print(f'Possible matches: {choices}, try again')
continue
cat_choice = choices[0]
print(f'\nSelected: {cat_choice}')
print(f'\nCurrent topic: {commit.topic}')
print(f'''Select from: {', '.join(topics)}''')
topic_choice = None
while topic_choice is None:
value = input('topic> ').strip()
if len(value) == 0:
topic_choice = commit.topic
continue
choices = [cat for cat in topics
if cat.startswith(value)]
if len(choices) != 1:
print(f'Possible matches: {choices}, try again')
continue
topic_choice = choices[0]
print(f'\nSelected: {topic_choice}')
self.update_commit(commit, cat_choice, topic_choice)
return None
def update_commit(self, commit, category, topic):
assert category in categories
assert topic in topics
commit.category = category
commit.topic = topic
self.commits.write_to_disk()
def main():
parser = argparse.ArgumentParser(description='Tool to help categorize commits')
parser.add_argument('--category', type=str, default='Uncategorized',
help='Which category to filter by. "Uncategorized", None, or a category name')
parser.add_argument('--file', help='The location of the commits CSV',
default='results/commitlist.csv')
args = parser.parse_args()
categorizer = Categorizer(args.file, args.category)
categorizer.categorize()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,181 @@
import argparse
from common import run, topics
from collections import namedtuple, defaultdict
import os
import csv
import pprint
from common import CommitDataCache
import re
"""
Example Usages
Create a new commitlist for consumption by categorize.py.
Said commitlist contains commits between v1.5.0 and f5bc91f851.
python commitlist.py --create_new tags/v1.5.0 f5bc91f851
Update the existing commitlist to commit bfcb687b9c.
python commitlist.py --update_to bfcb687b9c
"""
class Commit:
def __init__(self, commit_hash, category, topic, title):
self.commit_hash = commit_hash
self.category = category
self.topic = topic
self.title = title
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return self.commit_hash == other.commit_hash and \
self.category == other.category and \
self.topic == other.topic and \
self.title == other.title
def __repr__(self):
return f'Commit({self.commit_hash}, {self.category}, {self.topic}, {self.title})'
class CommitList:
# NB: Private ctor. Use `from_existing` or `create_new`.
def __init__(self, path, commits):
self.path = path
self.commits = commits
@staticmethod
def from_existing(path):
commits = CommitList.read_from_disk(path)
return CommitList(path, commits)
@staticmethod
def create_new(path, base_version, new_version):
if os.path.exists(path):
raise ValueError('Attempted to create a new commitlist but one exists already!')
commits = CommitList.get_commits_between(base_version, new_version)
return CommitList(path, commits)
@staticmethod
def read_from_disk(path):
with open(path) as csvfile:
reader = csv.reader(csvfile)
rows = list(row for row in reader)
assert all(len(row) >= 4 for row in rows)
return [Commit(*row[:4]) for row in rows]
def write_to_disk(self):
path = self.path
rows = self.commits
with open(path, 'w') as csvfile:
writer = csv.writer(csvfile)
for commit in rows:
writer.writerow([commit.commit_hash, commit.category, commit.topic, commit.title])
@staticmethod
def get_commits_between(base_version, new_version):
cmd = f'git merge-base {base_version} {new_version}'
rc, merge_base, _ = run(cmd)
assert rc == 0
# Returns a list of something like
# b33e38ec47 Allow a higher-precision step type for Vec256::arange (#34555)
cmd = f'git log --reverse --oneline {merge_base}..{new_version}'
rc, commits, _ = run(cmd)
assert rc == 0
log_lines = commits.split('\n')
hashes, titles = zip(*[log_line.split(' ', 1) for log_line in log_lines])
return [Commit(commit_hash, 'Uncategorized', 'Untopiced', title) for commit_hash, title in zip(hashes, titles)]
def filter(self, *, category=None, topic=None):
commits = self.commits
if category is not None:
commits = [commit for commit in commits if commit.category == category]
if topic is not None:
commits = [commit for commit in commits if commit.topic == topic]
return commits
def update_to(self, new_version):
last_hash = self.commits[-1].commit_hash
new_commits = CommitList.get_commits_between(last_hash, new_version)
self.commits += new_commits
def stat(self):
counts = defaultdict(lambda: defaultdict(int))
for commit in self.commits:
counts[commit.category][commit.topic] += 1
return counts
def create_new(path, base_version, new_version):
commits = CommitList.create_new(path, base_version, new_version)
commits.write_to_disk()
def update_existing(path, new_version):
commits = CommitList.from_existing(path)
commits.update_to(new_version)
commits.write_to_disk()
def to_markdown(commit_list, category):
def cleanup_title(commit):
match = re.match('(.*) \(#\d+\)', commit.title)
if match is None:
return commit.title
return match.group(1)
cdc = CommitDataCache()
lines = [f'\n## {category}\n']
for topic in topics:
lines.append(f'### {topic}\n')
commits = commit_list.filter(category=category, topic=topic)
for commit in commits:
result = cleanup_title(commit)
maybe_pr_number = cdc.get(commit.commit_hash).pr_number
if maybe_pr_number is None:
result = f'- {result} ({commit.commit_hash})\n'
else:
result = f'- {result} ([#{maybe_pr_number}](https://github.com/pytorch/pytorch/pull/{maybe_pr_number}))\n'
lines.append(result)
return lines
def main():
parser = argparse.ArgumentParser(description='Tool to create a commit list')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--create_new', nargs=2)
group.add_argument('--update_to')
group.add_argument('--stat', action='store_true')
group.add_argument('--export_markdown', action='store_true')
parser.add_argument('--path', default='results/commitlist.csv')
args = parser.parse_args()
if args.create_new:
create_new(args.path, args.create_new[0], args.create_new[1])
return
if args.update_to:
update_existing(args.path, args.update_to)
return
if args.stat:
commits = CommitList.from_existing(args.path)
stats = commits.stat()
pprint.pprint(stats)
return
if args.export_markdown:
commits = CommitList.from_existing(args.path)
categories = list(commits.stat().keys())
lines = []
for category in categories:
lines += to_markdown(commits, category)
filename = f'results/result.md'
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, 'w') as f:
f.writelines(lines)
return
assert False
if __name__ == '__main__':
main()

View File

@ -0,0 +1,196 @@
from collections import namedtuple
from os.path import expanduser
import locale
import subprocess
import re
import requests
import os
import json
categories = [
'Uncategorized',
'distributed',
'mobile',
'jit',
'visualization',
'onnx',
'caffe2',
'quantization',
'amd',
'benchmark',
'profiler',
'dispatcher',
'releng',
'fx',
'code_coverage',
'vulkan',
'skip',
'cpp_frontend',
'python_frontend',
'complex_frontend',
'vmap_frontend',
'autograd_frontend',
'build_frontend',
'memory_format_frontend',
'foreach_frontend',
]
topics = [
'bc_breaking',
'deprecations',
'new_features',
'improvements',
'bug_fixes',
'performance',
'docs',
'devs',
'Untopiced',
]
Features = namedtuple('Features', [
'title',
'body',
'pr_number',
'files_changed',
'labels',
])
def dict_to_features(dct):
return Features(
title=dct['title'],
body=dct['body'],
pr_number=dct['pr_number'],
files_changed=dct['files_changed'],
labels=dct['labels'])
def features_to_dict(features):
return dict(features._asdict())
def run(command):
"""Returns (return-code, stdout, stderr)"""
p = subprocess.Popen(command, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True)
output, err = p.communicate()
rc = p.returncode
enc = locale.getpreferredencoding()
output = output.decode(enc)
err = err.decode(enc)
return rc, output.strip(), err.strip()
def commit_body(commit_hash):
cmd = f'git log -n 1 --pretty=format:%b {commit_hash}'
ret, out, err = run(cmd)
return out if ret == 0 else None
def commit_title(commit_hash):
cmd = f'git log -n 1 --pretty=format:%s {commit_hash}'
ret, out, err = run(cmd)
return out if ret == 0 else None
def commit_files_changed(commit_hash):
cmd = f'git diff-tree --no-commit-id --name-only -r {commit_hash}'
ret, out, err = run(cmd)
return out.split('\n') if ret == 0 else None
def parse_pr_number(body, commit_hash, title):
regex = r'Pull Request resolved: https://github.com/pytorch/pytorch/pull/([0-9]+)'
matches = re.findall(regex, body)
if len(matches) == 0:
if 'revert' not in title.lower() and 'updating submodules' not in title.lower():
print(f'[{commit_hash}: {title}] Could not parse PR number, ignoring PR')
return None
if len(matches) > 1:
print(f'[{commit_hash}: {title}] Got two PR numbers, using the first one')
return matches[0]
return matches[0]
def get_ghstack_token():
pattern = 'github_oauth = (.*)'
with open(expanduser('~/.ghstackrc'), 'r+') as f:
config = f.read()
matches = re.findall(pattern, config)
if len(matches) == 0:
raise RuntimeError("Can't find a github oauth token")
return matches[0]
token = get_ghstack_token()
headers = {"Authorization": f"token {token}"}
def run_query(query):
request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
if request.status_code == 200:
return request.json()
else:
raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))
def gh_labels(pr_number):
query = f"""
{{
repository(owner: "pytorch", name: "pytorch") {{
pullRequest(number: {pr_number}) {{
labels(first: 10) {{
edges {{
node {{
name
}}
}}
}}
}}
}}
}}
"""
query = run_query(query)
edges = query['data']['repository']['pullRequest']['labels']['edges']
return [edge['node']['name'] for edge in edges]
def get_features(commit_hash, return_dict=False):
title, body, files_changed = (
commit_title(commit_hash),
commit_body(commit_hash),
commit_files_changed(commit_hash))
pr_number = parse_pr_number(body, commit_hash, title)
labels = []
if pr_number is not None:
labels = gh_labels(pr_number)
result = Features(title, body, pr_number, files_changed, labels)
if return_dict:
return features_to_dict(result)
return result
class CommitDataCache:
def __init__(self, path='results/data.json'):
self.path = path
self.data = {}
if os.path.exists(path):
self.data = self.read_from_disk()
def get(self, commit):
if commit not in self.data.keys():
# Fetch and cache the data
self.data[commit] = get_features(commit)
self.write_to_disk()
return self.data[commit]
def read_from_disk(self):
with open(self.path, 'r') as f:
data = json.load(f)
data = {commit: dict_to_features(dct)
for commit, dct in data.items()}
return data
def write_to_disk(self):
data = {commit: features._asdict() for commit, features in self.data.items()}
with open(self.path, 'w') as f:
json.dump(data, f)

View File

@ -0,0 +1 @@
PyGithub

View File

@ -0,0 +1,45 @@
import unittest
import tempfile
from commitlist import CommitList
class TestCommitList(unittest.TestCase):
def test_create_new(self):
with tempfile.TemporaryDirectory() as tempdir:
commit_list_path = f'{tempdir}/commitlist.csv'
commit_list = CommitList.create_new(commit_list_path, 'v1.5.0', '7543e7e558')
self.assertEqual(len(commit_list.commits), 2143)
self.assertEqual(commit_list.commits[0].commit_hash, '7335f079ab')
self.assertTrue(commit_list.commits[0].title.startswith('[pt][quant] qmul and qadd'))
self.assertEqual(commit_list.commits[-1].commit_hash, '7543e7e558')
self.assertTrue(commit_list.commits[-1].title.startswith('Migrate minall, max, maxall'))
def test_read_write(self):
with tempfile.TemporaryDirectory() as tempdir:
commit_list_path = f'{tempdir}/commitlist.csv'
initial = CommitList.create_new(commit_list_path, 'v1.5.0', '7543e7e558')
initial.write_to_disk()
expected = CommitList.from_existing(commit_list_path)
expected.commits[-2].category = 'foobar'
expected.write_to_disk()
commit_list = CommitList.from_existing(commit_list_path)
for commit, expected in zip(commit_list.commits, expected.commits):
self.assertEqual(commit, expected)
def test_update_to(self):
with tempfile.TemporaryDirectory() as tempdir:
commit_list_path = f'{tempdir}/commitlist.csv'
initial = CommitList.create_new(commit_list_path, 'v1.5.0', '7543e7e558')
initial.commits[-2].category = 'foobar'
self.assertEqual(len(initial.commits), 2143)
initial.write_to_disk()
commit_list = CommitList.from_existing(commit_list_path)
commit_list.update_to('5702a28b26')
self.assertEqual(len(commit_list.commits), 2143 + 4)
self.assertEqual(commit_list.commits[-5], initial.commits[-1])
if __name__ == '__main__':
unittest.main()