pytorch/caffe2/python/layers/build_index.py
Jiyan Yang a8695178aa Adding parameter sharing API to Dper2
Summary:
To achive this, I modified the blob name scheme defined in a layer.
Before it was scope/fc_w and scope/fc_w_auto_0 (if there is another fc
    within the same scope).
Now I change it to scope/fc/w and scope/fc_auto_0/w.
That is, we rely on the uniqueness of the scoped layer name to define
names for blobs.

I also overwrote the create_param method in LayerModelHelper to let it
use the resolved name for blobs given the sharingparameter context.

There are some details such as making the initializer more structured
that I need to finalize.

Reviewed By: kennyhorror

Differential Revision: D5435132

fbshipit-source-id: a0525f5ea0977e255dd5ea765b38913f5951d455
2017-08-03 00:33:18 -07:00

71 lines
2.0 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
from caffe2.python import core, schema
from caffe2.python.layers.layers import ModelLayer
class MapToRange(ModelLayer):
"""
This layer aims to build a mapping from raw keys to indices within [0, max_index).
The mapping is continuously built during training. The mapping will be frozen during
evaluation and prediction. Unseen keys will be assigned to index 0.
"""
def __init__(
self, model,
input_record,
max_index,
name='map_to_range',
**kwargs
):
super(MapToRange, self).__init__(model, name, input_record, **kwargs)
assert max_index > 0
assert isinstance(input_record, schema.Scalar)
self.max_index = max_index
self.handler = self.create_param(
param_name='handler',
shape=None,
initializer=('LongIndexCreate', {'max_elements': self.max_index}),
optimizer=model.NoOptim
)
self.output_schema = schema.Struct(
('indices', schema.Scalar(
np.int64, self.get_next_blob_reference("indices")
)),
('handler', schema.Scalar(
np.void, self.handler
)),
)
def add_train_ops(self, net):
if self.input_record.field_type().base != np.int64:
keys = net.Cast(
self.input_record(),
net.NextScopedBlob("indices_before_mapping"),
to=core.DataType.INT64
)
else:
keys = self.input_record()
# Load keys into indices
indices = net.IndexGet([self.handler, keys],
self.output_schema.indices())
net.StopGradient(indices, indices)
def add_eval_ops(self, net):
net.IndexFreeze(self.handler, self.handler)
self.add_train_ops(net)
def add_ops(self, net):
self.add_eval_ops(net)