mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Take user inputs for the introspection visualization: convolutions output layer activations, filters using containing phrases, and number of samples Reviewed By: Mortimerp9 Differential Revision: D4603797 fbshipit-source-id: dc972dcb8ad36e30defab266d710e047b11cff73
133 lines
5.9 KiB
Python
133 lines
5.9 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
import numpy as np
|
|
import json
|
|
|
|
|
|
class IntrospectVisData():
|
|
|
|
def __init__(self, inputs, model_name, first_outputs, meta_info, lab_arr):
|
|
self.inputs = inputs
|
|
self.model_name = model_name
|
|
self.max_num_instances = inputs['num_instances']
|
|
self.count = 0
|
|
self.instances = []
|
|
self.labels = [{"id": i, "name": c} for i, c in enumerate(lab_arr)]
|
|
self.is_multilabel = True if 'multi_label' in meta_info\
|
|
and meta_info['multi_label'] else False
|
|
|
|
self.conv_groups = meta_info['conv_output_names']\
|
|
if 'conv_output_names' in meta_info else []
|
|
self.neuron_groups = [{"idx": i, "name": b,
|
|
"size": len(first_outputs[2][i -
|
|
len(meta_info['output_names'])][0])}
|
|
for i, b in enumerate(meta_info['output_names'])]
|
|
self.selections = [
|
|
{"id": i, "label": "Class " + c, "type": "class"}
|
|
for i, c in enumerate(lab_arr)]
|
|
for i, sel in enumerate(inputs['phrase_filters']):
|
|
self.selections.append({
|
|
"id": i + len(self.labels), "label": sel, "type": "user"})
|
|
|
|
self.summaries = map(lambda x: np.array([[0. for _ in range(x['size'])]
|
|
for _ in range(len(self.selections))]),
|
|
self.neuron_groups)
|
|
|
|
def getInstanceActivations(self, outputs):
|
|
outputs = outputs[(-1) * len(self.neuron_groups):]
|
|
return [[round(_val, 3) for _val in out[0]] for out in outputs]
|
|
|
|
def getInstanceConvActivations(self, outputs):
|
|
outputs = outputs[(-1) * (len(self.neuron_groups) +
|
|
len(self.conv_groups)):(-1) * len(self.neuron_groups)]
|
|
return [np.round(out.astype(np.float64), decimals=2).tolist()
|
|
for out in outputs]
|
|
|
|
def updateNeuronSummaries(self, activations, true_idxs, model_specific):
|
|
self.count += 1
|
|
for out_idx in range(len(self.summaries)):
|
|
if self.is_multilabel:
|
|
for true_idx in true_idxs:
|
|
self.summaries[out_idx][true_idx] += activations[out_idx]
|
|
else:
|
|
self.summaries[out_idx][true_idxs] += activations[out_idx]
|
|
|
|
if "text" in model_specific:
|
|
text = model_specific["text"]
|
|
for sel_idx, user_sel in enumerate(self.inputs['phrase_filters']):
|
|
if user_sel in text:
|
|
self.summaries[out_idx][sel_idx + len(self.labels)] +=\
|
|
activations[out_idx]
|
|
|
|
def appendInstance(self, instance):
|
|
self.instances.append(instance)
|
|
|
|
def processInstance(self, idx, labels, scores, outputs, model_specific):
|
|
activations = []
|
|
convActivations = None
|
|
if self.model_name in ['DocNN']:
|
|
activations = self.getInstanceActivations(outputs)
|
|
convActivations = self.getInstanceConvActivations(outputs)
|
|
self.updateNeuronSummaries(activations, labels, model_specific)
|
|
if idx < self.max_num_instances:
|
|
if len(activations) == 0:
|
|
activations = self.getInstanceActivations(outputs),
|
|
instance = {
|
|
"id": idx,
|
|
"labels": labels,
|
|
"scores": scores,
|
|
"activations": activations,
|
|
"convout": convActivations,
|
|
}
|
|
for key, val in model_specific.items():
|
|
instance[key] = val
|
|
self.appendInstance(instance)
|
|
|
|
def updateArrangements(self):
|
|
if self.model_name in ['DocNN']:
|
|
# sort class scores based on score values
|
|
for instance in self.instances:
|
|
instance['scores'] =\
|
|
sorted([{"class_id": j, "score": round(_s, 3)}
|
|
for j, _s in enumerate(instance['scores'])],
|
|
key=lambda x: x['score'], reverse=True)
|
|
# instance positions based on scores
|
|
inst_sort_vals = [[] for _ in range(len(self.labels))]
|
|
for i, x in enumerate(self.instances):
|
|
sort_val = 1.0
|
|
# if multi_label, get the first label
|
|
label = x['labels'] if type(x['labels']) == int\
|
|
else x['labels'][0]
|
|
if label == x['scores'][0]['class_id']:
|
|
# How much score difference from that of rank 2 class
|
|
sort_val = x['scores'][0]['score'] - x['scores'][1]['score']
|
|
else:
|
|
# How much score difference from that of rank 1 class
|
|
sort_val = x['scores'][label]['score'] -\
|
|
x['scores'][0]['score']
|
|
inst_sort_vals[label].append({"inst_id": i, "val": sort_val})
|
|
for inst_vals in inst_sort_vals:
|
|
for i, r in enumerate(sorted(inst_vals, key=lambda x: x['val'],
|
|
reverse=True)):
|
|
self.instances[r['inst_id']]['position'] = i
|
|
|
|
def postprocess(self, filepath):
|
|
self.neuron_summaries = [np.around((np.swapaxes(_s, 0, 1) /
|
|
float(self.count)), 4).tolist()
|
|
for _s in self.summaries] if self.count > 0 else None
|
|
|
|
self.updateArrangements()
|
|
|
|
with open(self.inputs['vis_file'], 'w') as vf:
|
|
json.dump({
|
|
"model_type": self.model_name,
|
|
"neuron_groups": self.neuron_groups,
|
|
"conv_groups": self.conv_groups,
|
|
"selections": self.selections,
|
|
"classes": self.labels,
|
|
"instances": self.instances,
|
|
"neuron_summaries": self.neuron_summaries,
|
|
}, vf)
|