mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: There is a long lasting problem of scoping which was introduced in original python wrappers early in H1. Basically each RNNCell implemented has to manually scope outputs of each of the operators. If somebody forgets, then there could be weird bugs with layers etc. Approach is the following. User has to explicitly specify current scope when using apply_over_sequence function and others if the function is going to be called several times (like for stacking layers). This way we use Caffe2 native scoping approach instead of inventing one extra API people have to use (i.e. passing scope name as an argument to the RNNCell constructor). Closes https://github.com/caffe2/caffe2/pull/1681 Differential Revision: D6777536 Pulled By: salexspb fbshipit-source-id: 73d860b8d4857589e04bdea5a6fcd3080d68427c
91 lines
2.9 KiB
Python
91 lines
2.9 KiB
Python
# Copyright (c) 2016-present, Facebook, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
##############################################################################
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
from caffe2.python import workspace, scope
|
|
from caffe2.python.model_helper import ModelHelper
|
|
|
|
import numpy as np
|
|
|
|
|
|
def sigmoid(x):
|
|
return 1.0 / (1.0 + np.exp(-x))
|
|
|
|
|
|
def tanh(x):
|
|
return 2.0 * sigmoid(2.0 * x) - 1
|
|
|
|
|
|
def _prepare_rnn(
|
|
t, n, dim_in, create_rnn, outputs_with_grads,
|
|
forget_bias, memory_optim=False,
|
|
forward_only=False, drop_states=False, T=None,
|
|
two_d_initial_states=None, dim_out=None,
|
|
num_states=2,
|
|
**kwargs
|
|
):
|
|
if dim_out is None:
|
|
dim_out = [dim_in]
|
|
print("Dims: ", t, n, dim_in, dim_out)
|
|
|
|
model = ModelHelper(name='external')
|
|
|
|
if two_d_initial_states is None:
|
|
two_d_initial_states = np.random.randint(2)
|
|
|
|
def generate_input_state(n, d):
|
|
if two_d_initial_states:
|
|
return np.random.randn(n, d).astype(np.float32)
|
|
else:
|
|
return np.random.randn(1, n, d).astype(np.float32)
|
|
|
|
states = []
|
|
for layer_id, d in enumerate(dim_out):
|
|
for i in range(num_states):
|
|
state_name = "state_{}/layer_{}".format(i, layer_id)
|
|
states.append(model.net.AddExternalInput(state_name))
|
|
workspace.FeedBlob(
|
|
states[-1], generate_input_state(n, d).astype(np.float32))
|
|
|
|
# Due to convoluted RNN scoping logic we make sure that things
|
|
# work from a namescope
|
|
with scope.NameScope("test_name_scope"):
|
|
input_blob, seq_lengths = model.net.AddScopedExternalInputs(
|
|
'input_blob', 'seq_lengths')
|
|
|
|
outputs = create_rnn(
|
|
model, input_blob, seq_lengths, states,
|
|
dim_in=dim_in, dim_out=dim_out, scope="external/recurrent",
|
|
outputs_with_grads=outputs_with_grads,
|
|
memory_optimization=memory_optim,
|
|
forget_bias=forget_bias,
|
|
forward_only=forward_only,
|
|
drop_states=drop_states,
|
|
static_rnn_unroll_size=T,
|
|
**kwargs
|
|
)
|
|
|
|
workspace.RunNetOnce(model.param_init_net)
|
|
|
|
workspace.FeedBlob(
|
|
seq_lengths,
|
|
np.random.randint(1, t + 1, size=(n,)).astype(np.int32)
|
|
)
|
|
return outputs, model.net, states + [input_blob]
|