mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Add release note scripts (#47360)
Summary: First commit contains the initial code from Richard's branch. Second commit are the changes that I made during the writing process Third commit is the update to support category/topic pair for each commit Pull Request resolved: https://github.com/pytorch/pytorch/pull/47360 Reviewed By: ejguan Differential Revision: D24741003 Pulled By: albanD fbshipit-source-id: d0fcc6765968dc1732d8a515688d11372c7e653d
This commit is contained in:
parent
a4ba018e57
commit
68954fe897
134
scripts/release_notes/categorize.py
Normal file
134
scripts/release_notes/categorize.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
import json
|
||||
import argparse
|
||||
import os
|
||||
import textwrap
|
||||
from common import dict_to_features, categories, topics, get_features, CommitDataCache
|
||||
from commitlist import CommitList
|
||||
|
||||
class Categorizer:
|
||||
def __init__(self, path, category='Uncategorized'):
|
||||
self.cache = CommitDataCache()
|
||||
self.commits = CommitList.from_existing(path)
|
||||
|
||||
# Special categories: 'Uncategorized'
|
||||
# All other categories must be real
|
||||
self.category = category
|
||||
|
||||
def categorize(self):
|
||||
commits = self.commits.filter(self.category)
|
||||
i = 0
|
||||
while i < len(commits):
|
||||
cur_commit = commits[i]
|
||||
next_commit = commits[i + 1] if i + 1 < len(commits) else None
|
||||
jump_to = self.handle_commit(cur_commit, i + 1, len(commits), commits)
|
||||
|
||||
# Increment counter
|
||||
if jump_to is not None:
|
||||
i = jump_to
|
||||
elif next_commit is None:
|
||||
i = len(commits)
|
||||
else:
|
||||
i = commits.index(next_commit)
|
||||
|
||||
def features(self, commit):
|
||||
return self.cache.get(commit.commit_hash)
|
||||
|
||||
def potential_reverts_of(self, commit, commits):
|
||||
if 'Updating submodules' in commit.title:
|
||||
return []
|
||||
index = commits.index(commit)
|
||||
# -8 to remove the (#35011)
|
||||
cleaned_title = commit.title[:-10]
|
||||
# NB: the index + 2 is sketch
|
||||
return {(index + 2 + delta): cand for delta, cand in enumerate(commits[index + 1:])
|
||||
if cleaned_title in cand.title and
|
||||
commit.commit_hash != cand.commit_hash}
|
||||
|
||||
def handle_commit(self, commit, i, total, commits):
|
||||
potential_reverts = self.potential_reverts_of(commit, commits)
|
||||
if potential_reverts:
|
||||
potential_reverts = f'!!!POTENTIAL REVERTS!!!: {potential_reverts}'
|
||||
else:
|
||||
potential_reverts = ""
|
||||
|
||||
features = self.features(commit)
|
||||
|
||||
breaking_alarm = ""
|
||||
if 'topic: bc-breaking' in features.labels:
|
||||
breaking_alarm += "!!!!!! BC BREAKING !!!!!!"
|
||||
|
||||
if 'module: deprecation' in features.labels:
|
||||
breaking_alarm += "!!!!!! DEPRECATION !!!!!!"
|
||||
|
||||
os.system('clear')
|
||||
view = textwrap.dedent(f'''\
|
||||
[{i}/{total}]
|
||||
================================================================================
|
||||
{features.title}
|
||||
|
||||
{features.body}
|
||||
|
||||
Files changed: {features.files_changed}
|
||||
|
||||
Labels: {features.labels}
|
||||
|
||||
{potential_reverts} {breaking_alarm}
|
||||
|
||||
Current category: {commit.category}
|
||||
|
||||
Select from: {', '.join(categories)}
|
||||
|
||||
''')
|
||||
print(view)
|
||||
cat_choice = None
|
||||
while cat_choice is None:
|
||||
value = input('category> ').strip()
|
||||
if len(value) == 0:
|
||||
cat_choice = commit.category
|
||||
continue
|
||||
choices = [cat for cat in categories
|
||||
if cat.startswith(value)]
|
||||
if len(choices) != 1:
|
||||
print(f'Possible matches: {choices}, try again')
|
||||
continue
|
||||
cat_choice = choices[0]
|
||||
print(f'\nSelected: {cat_choice}')
|
||||
print(f'\nCurrent topic: {commit.topic}')
|
||||
print(f'''Select from: {', '.join(topics)}''')
|
||||
topic_choice = None
|
||||
while topic_choice is None:
|
||||
value = input('topic> ').strip()
|
||||
if len(value) == 0:
|
||||
topic_choice = commit.topic
|
||||
continue
|
||||
choices = [cat for cat in topics
|
||||
if cat.startswith(value)]
|
||||
if len(choices) != 1:
|
||||
print(f'Possible matches: {choices}, try again')
|
||||
continue
|
||||
topic_choice = choices[0]
|
||||
print(f'\nSelected: {topic_choice}')
|
||||
self.update_commit(commit, cat_choice, topic_choice)
|
||||
return None
|
||||
|
||||
def update_commit(self, commit, category, topic):
|
||||
assert category in categories
|
||||
assert topic in topics
|
||||
commit.category = category
|
||||
commit.topic = topic
|
||||
self.commits.write_to_disk()
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Tool to help categorize commits')
|
||||
parser.add_argument('--category', type=str, default='Uncategorized',
|
||||
help='Which category to filter by. "Uncategorized", None, or a category name')
|
||||
parser.add_argument('--file', help='The location of the commits CSV',
|
||||
default='results/commitlist.csv')
|
||||
|
||||
args = parser.parse_args()
|
||||
categorizer = Categorizer(args.file, args.category)
|
||||
categorizer.categorize()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
181
scripts/release_notes/commitlist.py
Normal file
181
scripts/release_notes/commitlist.py
Normal file
|
|
@ -0,0 +1,181 @@
|
|||
import argparse
|
||||
from common import run, topics
|
||||
from collections import namedtuple, defaultdict
|
||||
import os
|
||||
import csv
|
||||
import pprint
|
||||
from common import CommitDataCache
|
||||
import re
|
||||
|
||||
|
||||
"""
|
||||
Example Usages
|
||||
|
||||
Create a new commitlist for consumption by categorize.py.
|
||||
Said commitlist contains commits between v1.5.0 and f5bc91f851.
|
||||
|
||||
python commitlist.py --create_new tags/v1.5.0 f5bc91f851
|
||||
|
||||
Update the existing commitlist to commit bfcb687b9c.
|
||||
|
||||
python commitlist.py --update_to bfcb687b9c
|
||||
|
||||
"""
|
||||
|
||||
class Commit:
|
||||
def __init__(self, commit_hash, category, topic, title):
|
||||
self.commit_hash = commit_hash
|
||||
self.category = category
|
||||
self.topic = topic
|
||||
self.title = title
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, self.__class__):
|
||||
return False
|
||||
return self.commit_hash == other.commit_hash and \
|
||||
self.category == other.category and \
|
||||
self.topic == other.topic and \
|
||||
self.title == other.title
|
||||
|
||||
def __repr__(self):
|
||||
return f'Commit({self.commit_hash}, {self.category}, {self.topic}, {self.title})'
|
||||
|
||||
class CommitList:
|
||||
# NB: Private ctor. Use `from_existing` or `create_new`.
|
||||
def __init__(self, path, commits):
|
||||
self.path = path
|
||||
self.commits = commits
|
||||
|
||||
@staticmethod
|
||||
def from_existing(path):
|
||||
commits = CommitList.read_from_disk(path)
|
||||
return CommitList(path, commits)
|
||||
|
||||
@staticmethod
|
||||
def create_new(path, base_version, new_version):
|
||||
if os.path.exists(path):
|
||||
raise ValueError('Attempted to create a new commitlist but one exists already!')
|
||||
commits = CommitList.get_commits_between(base_version, new_version)
|
||||
return CommitList(path, commits)
|
||||
|
||||
@staticmethod
|
||||
def read_from_disk(path):
|
||||
with open(path) as csvfile:
|
||||
reader = csv.reader(csvfile)
|
||||
rows = list(row for row in reader)
|
||||
assert all(len(row) >= 4 for row in rows)
|
||||
return [Commit(*row[:4]) for row in rows]
|
||||
|
||||
def write_to_disk(self):
|
||||
path = self.path
|
||||
rows = self.commits
|
||||
with open(path, 'w') as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
for commit in rows:
|
||||
writer.writerow([commit.commit_hash, commit.category, commit.topic, commit.title])
|
||||
|
||||
@staticmethod
|
||||
def get_commits_between(base_version, new_version):
|
||||
cmd = f'git merge-base {base_version} {new_version}'
|
||||
rc, merge_base, _ = run(cmd)
|
||||
assert rc == 0
|
||||
|
||||
# Returns a list of something like
|
||||
# b33e38ec47 Allow a higher-precision step type for Vec256::arange (#34555)
|
||||
cmd = f'git log --reverse --oneline {merge_base}..{new_version}'
|
||||
rc, commits, _ = run(cmd)
|
||||
assert rc == 0
|
||||
|
||||
log_lines = commits.split('\n')
|
||||
hashes, titles = zip(*[log_line.split(' ', 1) for log_line in log_lines])
|
||||
return [Commit(commit_hash, 'Uncategorized', 'Untopiced', title) for commit_hash, title in zip(hashes, titles)]
|
||||
|
||||
def filter(self, *, category=None, topic=None):
|
||||
commits = self.commits
|
||||
if category is not None:
|
||||
commits = [commit for commit in commits if commit.category == category]
|
||||
if topic is not None:
|
||||
commits = [commit for commit in commits if commit.topic == topic]
|
||||
return commits
|
||||
|
||||
def update_to(self, new_version):
|
||||
last_hash = self.commits[-1].commit_hash
|
||||
new_commits = CommitList.get_commits_between(last_hash, new_version)
|
||||
self.commits += new_commits
|
||||
|
||||
def stat(self):
|
||||
counts = defaultdict(lambda: defaultdict(int))
|
||||
for commit in self.commits:
|
||||
counts[commit.category][commit.topic] += 1
|
||||
return counts
|
||||
|
||||
|
||||
def create_new(path, base_version, new_version):
|
||||
commits = CommitList.create_new(path, base_version, new_version)
|
||||
commits.write_to_disk()
|
||||
|
||||
def update_existing(path, new_version):
|
||||
commits = CommitList.from_existing(path)
|
||||
commits.update_to(new_version)
|
||||
commits.write_to_disk()
|
||||
|
||||
def to_markdown(commit_list, category):
|
||||
def cleanup_title(commit):
|
||||
match = re.match('(.*) \(#\d+\)', commit.title)
|
||||
if match is None:
|
||||
return commit.title
|
||||
return match.group(1)
|
||||
|
||||
cdc = CommitDataCache()
|
||||
lines = [f'\n## {category}\n']
|
||||
for topic in topics:
|
||||
lines.append(f'### {topic}\n')
|
||||
commits = commit_list.filter(category=category, topic=topic)
|
||||
for commit in commits:
|
||||
result = cleanup_title(commit)
|
||||
maybe_pr_number = cdc.get(commit.commit_hash).pr_number
|
||||
if maybe_pr_number is None:
|
||||
result = f'- {result} ({commit.commit_hash})\n'
|
||||
else:
|
||||
result = f'- {result} ([#{maybe_pr_number}](https://github.com/pytorch/pytorch/pull/{maybe_pr_number}))\n'
|
||||
lines.append(result)
|
||||
return lines
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Tool to create a commit list')
|
||||
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--create_new', nargs=2)
|
||||
group.add_argument('--update_to')
|
||||
group.add_argument('--stat', action='store_true')
|
||||
group.add_argument('--export_markdown', action='store_true')
|
||||
|
||||
parser.add_argument('--path', default='results/commitlist.csv')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.create_new:
|
||||
create_new(args.path, args.create_new[0], args.create_new[1])
|
||||
return
|
||||
if args.update_to:
|
||||
update_existing(args.path, args.update_to)
|
||||
return
|
||||
if args.stat:
|
||||
commits = CommitList.from_existing(args.path)
|
||||
stats = commits.stat()
|
||||
pprint.pprint(stats)
|
||||
return
|
||||
if args.export_markdown:
|
||||
commits = CommitList.from_existing(args.path)
|
||||
categories = list(commits.stat().keys())
|
||||
lines = []
|
||||
for category in categories:
|
||||
lines += to_markdown(commits, category)
|
||||
filename = f'results/result.md'
|
||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||
with open(filename, 'w') as f:
|
||||
f.writelines(lines)
|
||||
return
|
||||
assert False
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
196
scripts/release_notes/common.py
Normal file
196
scripts/release_notes/common.py
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
from collections import namedtuple
|
||||
from os.path import expanduser
|
||||
import locale
|
||||
import subprocess
|
||||
import re
|
||||
import requests
|
||||
import os
|
||||
import json
|
||||
|
||||
categories = [
|
||||
'Uncategorized',
|
||||
'distributed',
|
||||
'mobile',
|
||||
'jit',
|
||||
'visualization',
|
||||
'onnx',
|
||||
'caffe2',
|
||||
'quantization',
|
||||
'amd',
|
||||
'benchmark',
|
||||
'profiler',
|
||||
'dispatcher',
|
||||
'releng',
|
||||
'fx',
|
||||
'code_coverage',
|
||||
'vulkan',
|
||||
'skip',
|
||||
'cpp_frontend',
|
||||
'python_frontend',
|
||||
'complex_frontend',
|
||||
'vmap_frontend',
|
||||
'autograd_frontend',
|
||||
'build_frontend',
|
||||
'memory_format_frontend',
|
||||
'foreach_frontend',
|
||||
]
|
||||
|
||||
topics = [
|
||||
'bc_breaking',
|
||||
'deprecations',
|
||||
'new_features',
|
||||
'improvements',
|
||||
'bug_fixes',
|
||||
'performance',
|
||||
'docs',
|
||||
'devs',
|
||||
'Untopiced',
|
||||
]
|
||||
|
||||
|
||||
Features = namedtuple('Features', [
|
||||
'title',
|
||||
'body',
|
||||
'pr_number',
|
||||
'files_changed',
|
||||
'labels',
|
||||
])
|
||||
|
||||
|
||||
def dict_to_features(dct):
|
||||
return Features(
|
||||
title=dct['title'],
|
||||
body=dct['body'],
|
||||
pr_number=dct['pr_number'],
|
||||
files_changed=dct['files_changed'],
|
||||
labels=dct['labels'])
|
||||
|
||||
|
||||
def features_to_dict(features):
|
||||
return dict(features._asdict())
|
||||
|
||||
|
||||
def run(command):
|
||||
"""Returns (return-code, stdout, stderr)"""
|
||||
p = subprocess.Popen(command, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE, shell=True)
|
||||
output, err = p.communicate()
|
||||
rc = p.returncode
|
||||
enc = locale.getpreferredencoding()
|
||||
output = output.decode(enc)
|
||||
err = err.decode(enc)
|
||||
return rc, output.strip(), err.strip()
|
||||
|
||||
|
||||
def commit_body(commit_hash):
|
||||
cmd = f'git log -n 1 --pretty=format:%b {commit_hash}'
|
||||
ret, out, err = run(cmd)
|
||||
return out if ret == 0 else None
|
||||
|
||||
|
||||
def commit_title(commit_hash):
|
||||
cmd = f'git log -n 1 --pretty=format:%s {commit_hash}'
|
||||
ret, out, err = run(cmd)
|
||||
return out if ret == 0 else None
|
||||
|
||||
|
||||
def commit_files_changed(commit_hash):
|
||||
cmd = f'git diff-tree --no-commit-id --name-only -r {commit_hash}'
|
||||
ret, out, err = run(cmd)
|
||||
return out.split('\n') if ret == 0 else None
|
||||
|
||||
|
||||
def parse_pr_number(body, commit_hash, title):
|
||||
regex = r'Pull Request resolved: https://github.com/pytorch/pytorch/pull/([0-9]+)'
|
||||
matches = re.findall(regex, body)
|
||||
if len(matches) == 0:
|
||||
if 'revert' not in title.lower() and 'updating submodules' not in title.lower():
|
||||
print(f'[{commit_hash}: {title}] Could not parse PR number, ignoring PR')
|
||||
return None
|
||||
if len(matches) > 1:
|
||||
print(f'[{commit_hash}: {title}] Got two PR numbers, using the first one')
|
||||
return matches[0]
|
||||
return matches[0]
|
||||
|
||||
|
||||
def get_ghstack_token():
|
||||
pattern = 'github_oauth = (.*)'
|
||||
with open(expanduser('~/.ghstackrc'), 'r+') as f:
|
||||
config = f.read()
|
||||
matches = re.findall(pattern, config)
|
||||
if len(matches) == 0:
|
||||
raise RuntimeError("Can't find a github oauth token")
|
||||
return matches[0]
|
||||
|
||||
token = get_ghstack_token()
|
||||
headers = {"Authorization": f"token {token}"}
|
||||
|
||||
def run_query(query):
|
||||
request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
|
||||
if request.status_code == 200:
|
||||
return request.json()
|
||||
else:
|
||||
raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))
|
||||
|
||||
|
||||
def gh_labels(pr_number):
|
||||
query = f"""
|
||||
{{
|
||||
repository(owner: "pytorch", name: "pytorch") {{
|
||||
pullRequest(number: {pr_number}) {{
|
||||
labels(first: 10) {{
|
||||
edges {{
|
||||
node {{
|
||||
name
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
query = run_query(query)
|
||||
edges = query['data']['repository']['pullRequest']['labels']['edges']
|
||||
return [edge['node']['name'] for edge in edges]
|
||||
|
||||
|
||||
def get_features(commit_hash, return_dict=False):
|
||||
title, body, files_changed = (
|
||||
commit_title(commit_hash),
|
||||
commit_body(commit_hash),
|
||||
commit_files_changed(commit_hash))
|
||||
pr_number = parse_pr_number(body, commit_hash, title)
|
||||
labels = []
|
||||
if pr_number is not None:
|
||||
labels = gh_labels(pr_number)
|
||||
result = Features(title, body, pr_number, files_changed, labels)
|
||||
if return_dict:
|
||||
return features_to_dict(result)
|
||||
return result
|
||||
|
||||
class CommitDataCache:
|
||||
def __init__(self, path='results/data.json'):
|
||||
self.path = path
|
||||
self.data = {}
|
||||
if os.path.exists(path):
|
||||
self.data = self.read_from_disk()
|
||||
|
||||
def get(self, commit):
|
||||
if commit not in self.data.keys():
|
||||
# Fetch and cache the data
|
||||
self.data[commit] = get_features(commit)
|
||||
self.write_to_disk()
|
||||
return self.data[commit]
|
||||
|
||||
def read_from_disk(self):
|
||||
with open(self.path, 'r') as f:
|
||||
data = json.load(f)
|
||||
data = {commit: dict_to_features(dct)
|
||||
for commit, dct in data.items()}
|
||||
return data
|
||||
|
||||
def write_to_disk(self):
|
||||
data = {commit: features._asdict() for commit, features in self.data.items()}
|
||||
with open(self.path, 'w') as f:
|
||||
json.dump(data, f)
|
||||
|
||||
1
scripts/release_notes/requirements.txt
Normal file
1
scripts/release_notes/requirements.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
PyGithub
|
||||
45
scripts/release_notes/test_release_notes.py
Normal file
45
scripts/release_notes/test_release_notes.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import unittest
|
||||
import tempfile
|
||||
from commitlist import CommitList
|
||||
|
||||
class TestCommitList(unittest.TestCase):
|
||||
def test_create_new(self):
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
commit_list_path = f'{tempdir}/commitlist.csv'
|
||||
commit_list = CommitList.create_new(commit_list_path, 'v1.5.0', '7543e7e558')
|
||||
self.assertEqual(len(commit_list.commits), 2143)
|
||||
self.assertEqual(commit_list.commits[0].commit_hash, '7335f079ab')
|
||||
self.assertTrue(commit_list.commits[0].title.startswith('[pt][quant] qmul and qadd'))
|
||||
self.assertEqual(commit_list.commits[-1].commit_hash, '7543e7e558')
|
||||
self.assertTrue(commit_list.commits[-1].title.startswith('Migrate minall, max, maxall'))
|
||||
|
||||
def test_read_write(self):
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
commit_list_path = f'{tempdir}/commitlist.csv'
|
||||
initial = CommitList.create_new(commit_list_path, 'v1.5.0', '7543e7e558')
|
||||
initial.write_to_disk()
|
||||
|
||||
expected = CommitList.from_existing(commit_list_path)
|
||||
expected.commits[-2].category = 'foobar'
|
||||
expected.write_to_disk()
|
||||
|
||||
commit_list = CommitList.from_existing(commit_list_path)
|
||||
for commit, expected in zip(commit_list.commits, expected.commits):
|
||||
self.assertEqual(commit, expected)
|
||||
|
||||
def test_update_to(self):
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
commit_list_path = f'{tempdir}/commitlist.csv'
|
||||
initial = CommitList.create_new(commit_list_path, 'v1.5.0', '7543e7e558')
|
||||
initial.commits[-2].category = 'foobar'
|
||||
self.assertEqual(len(initial.commits), 2143)
|
||||
initial.write_to_disk()
|
||||
|
||||
commit_list = CommitList.from_existing(commit_list_path)
|
||||
commit_list.update_to('5702a28b26')
|
||||
self.assertEqual(len(commit_list.commits), 2143 + 4)
|
||||
self.assertEqual(commit_list.commits[-5], initial.commits[-1])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Reference in New Issue
Block a user