mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: - Dump instance activations, some statistics about each neuron for model introspection visualization in flow - It is a part of minsuk's summer intern project. See the following link for high-level details: https://www.dropbox.com/s/m89rwpoomqkc9jb/aml-talk-nnvis-minsuk.pptx?dl=0 - Will combine the following two visualizations: https://our.intern.facebook.com/intern/fblearner/c2graphvis/13795371/ and https://our.intern.facebook.com/intern/fblearner/model-introspection-nn/11910201/ Differential Revision: D4303679 fbshipit-source-id: eeac699891b17cea0b29324d584937460a8d7a25
105 lines
4.6 KiB
Python
105 lines
4.6 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
import numpy as np
|
|
import json
|
|
|
|
|
|
class IntrospectVisData():
|
|
|
|
def __init__(self, model_name, first_outputs, meta_info, lab_arr):
|
|
self.model_name = model_name
|
|
self.max_num_instances = 1000
|
|
self.count = 0
|
|
self.instances = []
|
|
self.labels = [{"id": i, "name": c} for i, c in enumerate(lab_arr)]
|
|
self.is_multilabel = True if 'multi_label' in meta_info\
|
|
and meta_info['multi_label'] else False
|
|
|
|
self.neuron_groups = [{"idx": i, "name": b,
|
|
"size": len(first_outputs[2][i -
|
|
len(meta_info['output_names'])][0])}
|
|
for i, b in enumerate(meta_info['output_names'])]
|
|
self.summaries = map(lambda x: np.array([[0. for _ in range(x['size'])]
|
|
for _ in range(len(self.labels))]),
|
|
self.neuron_groups)
|
|
|
|
def getInstanceActivations(self, outputs):
|
|
outputs = outputs[(-1) * len(self.neuron_groups):]
|
|
return [[round(_val, 4) for _val in out[0]] for out in outputs]
|
|
|
|
def updateNeuronSummaries(self, activations, true_idxs):
|
|
self.count += 1
|
|
for out_idx in range(len(self.summaries)):
|
|
if self.is_multilabel:
|
|
for true_idx in true_idxs:
|
|
self.summaries[out_idx][true_idx] += activations[out_idx]
|
|
else:
|
|
self.summaries[out_idx][true_idxs] += activations[out_idx]
|
|
|
|
def appendInstance(self, instance):
|
|
self.instances.append(instance)
|
|
|
|
def processInstance(self, idx, labels, scores, outputs, model_specific):
|
|
activations = []
|
|
if self.model_name in ['DocNN']:
|
|
activations = self.getInstanceActivations(outputs)
|
|
self.updateNeuronSummaries(activations, labels)
|
|
if idx < self.max_num_instances:
|
|
if len(activations) == 0:
|
|
activations = self.getInstanceActivations(outputs),
|
|
instance = {
|
|
"id": idx,
|
|
"labels": labels,
|
|
"scores": scores,
|
|
"activations": activations,
|
|
}
|
|
for key, val in model_specific.items():
|
|
instance[key] = val
|
|
self.appendInstance(instance)
|
|
|
|
def updateArrangements(self):
|
|
if self.model_name in ['DocNN']:
|
|
# sort class scores based on score values
|
|
for instance in self.instances:
|
|
instance['scores'] =\
|
|
sorted([{"class_id": j, "score": round(_s, 3)}
|
|
for j, _s in enumerate(instance['scores'])],
|
|
key=lambda x: x['score'], reverse=True)
|
|
# instance positions based on scores
|
|
inst_sort_vals = [[] for _ in range(len(self.labels))]
|
|
for i, x in enumerate(self.instances):
|
|
sort_val = 1.0
|
|
# if multi_label, get the first label
|
|
label = x['labels'] if type(x['labels']) == int\
|
|
else x['labels'][0]
|
|
if label == x['scores'][0]['class_id']:
|
|
# How much score difference from that of rank 2 class
|
|
sort_val = x['scores'][0]['score'] - x['scores'][1]['score']
|
|
else:
|
|
# How much score difference from that of rank 1 class
|
|
sort_val = x['scores'][label]['score'] -\
|
|
x['scores'][0]['score']
|
|
inst_sort_vals[label].append({"inst_id": i, "val": sort_val})
|
|
for class_id, inst_vals in enumerate(inst_sort_vals):
|
|
for i, r in enumerate(sorted(inst_vals, key=lambda x: x['val'],
|
|
reverse=True)):
|
|
self.instances[r['inst_id']]['position'] = i
|
|
|
|
def postprocess(self, filepath):
|
|
self.neuron_summaries = [np.around((np.swapaxes(_s, 0, 1) /
|
|
float(self.count)), 4).tolist()
|
|
for _s in self.summaries] if self.count > 0 else None
|
|
|
|
self.updateArrangements()
|
|
|
|
with open(filepath, 'w') as vf:
|
|
json.dump({
|
|
"model_type": self.model_name,
|
|
"neuron_groups": self.neuron_groups,
|
|
"classes": self.labels,
|
|
"instances": self.instances,
|
|
"neuron_summaries": self.neuron_summaries,
|
|
}, vf)
|