First thank you for your implement of attention.
when I built a lstm seq2seq chatbot use your implement, I got an error in line
attn_out, attn_states = attn_layer([encoder_out, decoder_lstm])
which throw me an error like
TypeError: __int__ returned non-int (type NoneType)
And my core code here:
embed_layer = Embedding(input_dim=vocab_size, output_dim=50, trainable=True)
embed_layer.build((None,)) embed_layer.set_weights([embedding_matrix])
LSTM_cell = Bidirectional(LSTM(128, return_sequences=True, return_state=True)) LSTM_decoder = LSTM(256, return_sequences=True, return_state=True)
dense = TimeDistributed(Dense(vocab_size, activation='softmax'))
input_context = Input(shape=(maxLen, ), dtype='int32', name='input_context') #maxLen=20
input_target = Input(shape=(maxLen, ), dtype='int32', name='input_target')
input_context_embed = embed_layer(input_context) input_target_embed = embed_layer(input_target)
encoder_out, forward_h, forward_c, backward_h, backward_c = LSTM_cell(input_context_embed) context_h = Concatenate()([forward_h, backward_h]) context_c = Concatenate()([forward_c, backward_c])
decoder_lstm, _, _ = LSTM_decoder(input_target_embed, initial_state=[context_h, context_c])
print('decoder_lstm.shape: ', decoder_lstm.shape) #(?, ?, 256) print('encoder_out.shape: ', encoder_out.shape) #(?, ?, 256)
# ***********************Start Code Here**********************
''' Attention layer ***** A '''
attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_lstm]) merge = Concatenate(axis=-1, name='concat_layer' )([decoder_lstm, attn_out])
# ***********************End Code Here**********************
output = dense(merge) model.summary() model = Model([input_context, input_target, s0, c0], output)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit([context_, final_target_], outs, epochs=2, batch_size=128, validation_split=0.2)
And the error detail below:
`---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\array_ops.py in zeros(shape, dtype, name)
1810 shape = constant_op._tensor_shape_tensor_conversion_function(
-> 1811 tensor_shape.TensorShape(shape))
1812 except (TypeError, ValueError):
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\constant_op.py in _tensor_shape_tensor_conversion_function(s, dtype, name, as_ref)
324 raise ValueError(
--> 325 "Cannot convert a partially known TensorShape to a Tensor: %s" % s)
326 s_list = s.as_list()
ValueError: Cannot convert a partially known TensorShape to a Tensor: (?, 256)
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
in
35 '''
36 attn_layer = AttentionLayer(name='attention_layer')
---> 37 attn_out, attn_states = attn_layer([encoder_out, decoder_lstm])
38 merge = Concatenate(axis=-1,
39 name='concat_layer'
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in call(self, inputs, *args, **kwargs)
552 # In graph mode, failure to build the layer's graph
553 # implies a user-side bug. We don't catch exceptions.
--> 554 outputs = self.call(inputs, *args, **kwargs)
555 else:
556 try:
~\AppData\Roaming\Python\Python36\site-packages\keras\layers\attention.py in call(self, inputs, verbose)
93
94 # We are not using initial states, but need to pass something to K.rnn funciton
---> 95 fake_state_c = K.zeros(shape=(encoder_out_seq.shape[0], encoder_out_seq.shape[-1]))
96 fake_state_e = K.zeros(shape=(encoder_out_seq.shape[0], encoder_out_seq.shape[1]))
97
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\keras\backend.py in zeros(shape, dtype, name)
1066 dtype = floatx()
1067 tf_dtype = dtypes_module.as_dtype(dtype)
-> 1068 v = array_ops.zeros(shape=shape, dtype=tf_dtype, name=name)
1069 if py_all(v.shape.as_list()):
1070 return variable(v, dtype=dtype, name=name)
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\array_ops.py in zeros(shape, dtype, name)
1812 except (TypeError, ValueError):
1813 # Happens when shape is a list with tensor elements
-> 1814 shape = ops.convert_to_tensor(shape, dtype=dtypes.int32)
1815 if not shape._shape_tuple():
1816 shape = reshape(shape, [-1]) # Ensure it's a vector
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
1037 ValueError: If the value
is a tensor not of given dtype
in graph mode.
1038 """
-> 1039 return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
1040
1041
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py in convert_to_tensor_v2(value, dtype, dtype_hint, name)
1095 name=name,
1096 preferred_dtype=dtype_hint,
-> 1097 as_ref=False)
1098
1099
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
1173
1174 if ret is None:
-> 1175 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1176
1177 if ret is NotImplemented:
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
302 as_ref=False):
303 _ = as_ref
--> 304 return constant(v, dtype=dtype, name=name)
305
306
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\constant_op.py in constant(value, dtype, shape, name)
243 """
244 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245 allow_broadcast=True)
246
247
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
281 tensor_util.make_tensor_proto(
282 value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283 allow_broadcast=allow_broadcast))
284 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
285 const_tensor = g.create_op(
c:\users\rnn_n\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
465 else:
466 _AssertCompatible(values, dtype)
--> 467 nparray = np.array(values, dtype=np_dt)
468 # check to them.
469 # We need to pass in quantized values as tuples, so don't apply the shape
TypeError: int returned non-int (type NoneType)`
Thank you for your contribution!