I am a student who want to reproduce the code. When I tried to run the code in ucla_demo at the FW Training loop, I got the error message that
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
~\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1355 try:
-> 1356 return fn(*args)
1357 except errors.OpError as e:
~\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\client\session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1340 return self._call_tf_sessionrun(
-> 1341 options, feed_dict, fetch_list, target_list, run_metadata)
1342
~\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\client\session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
1428 self._session, options, feed_dict, fetch_list, target_list,
-> 1429 run_metadata)
1430
InternalError: 2 root error(s) found.
(0) Internal: Blas GEMM launch failed : a.shape=(3200, 60), b.shape=(2048, 60), m=3200, n=2048, k=60
[[{{node gradients/prediction/pred_xyz/pred_skel/Tensordot/MatMul_grad/MatMul}}]]
[[Adam/update/_154]]
(1) Internal: Blas GEMM launch failed : a.shape=(3200, 60), b.shape=(2048, 60), m=3200, n=2048, k=60
[[{{node gradients/prediction/pred_xyz/pred_skel/Tensordot/MatMul_grad/MatMul}}]]
0 successful operations.
0 derived errors ignored.
During handling of the above exception, another exception occurred:
InternalError Traceback (most recent call last)
<ipython-input-21-fb710833f5b5> in <module>
5 for i in range(0,iterations+1):
6 encoder_inputs_xyz,decoder_inputs_xyz,seq_len_enc = mini_batch(dsamp_train, seq_len=50, input_size=60, batch_size=64)
----> 7 _,train_loss = model.step(sess,encoder_inputs_xyz,decoder_inputs_xyz,seq_len_enc, False)
8 if i%100 == 0:
9 loss.append(train_loss)
<ipython-input-12-adcec0763b40> in step(self, session, encoder_inputs_xyz, decoder_inputs_xyz, seq_len, forward_only)
59 self.seq_len: seq_len}
60 output_feed = [self.updates,self.loss]
---> 61 outputs = session.run(output_feed, input_feed)
62 return outputs[0], outputs[1]
~\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
948 try:
949 result = self._run(None, fetches, feed_dict, options_ptr,
--> 950 run_metadata_ptr)
951 if run_metadata:
952 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1171 if final_fetches or final_targets or (handle and feed_dict_tensor):
1172 results = self._do_run(handle, final_targets, final_fetches,
-> 1173 feed_dict_tensor, options, run_metadata)
1174 else:
1175 results = []
~\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1348 if handle is None:
1349 return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1350 run_metadata)
1351 else:
1352 return self._do_call(_prun_fn, handle, feeds, fetches)
~\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1368 pass
1369 message = error_interpolation.interpolate(message, self._graph)
-> 1370 raise type(e)(node_def, op, message)
1371
1372 def _extend_graph(self):
InternalError: 2 root error(s) found.
(0) Internal: Blas GEMM launch failed : a.shape=(3200, 60), b.shape=(2048, 60), m=3200, n=2048, k=60
[[node gradients/prediction/pred_xyz/pred_skel/Tensordot/MatMul_grad/MatMul (defined at <ipython-input-12-adcec0763b40>:49) ]]
[[Adam/update/_154]]
(1) Internal: Blas GEMM launch failed : a.shape=(3200, 60), b.shape=(2048, 60), m=3200, n=2048, k=60
[[node gradients/prediction/pred_xyz/pred_skel/Tensordot/MatMul_grad/MatMul (defined at <ipython-input-12-adcec0763b40>:49) ]]
0 successful operations.
0 derived errors ignored.
Errors may have originated from an input operation.
Input Source operations connected to node gradients/prediction/pred_xyz/pred_skel/Tensordot/MatMul_grad/MatMul:
prediction/pred_xyz/pred_skel/Tensordot/Reshape_1 (defined at C:\Users\Qisr\AppData\Local\Temp\tmpu6n28dvh.py:23)
Input Source operations connected to node gradients/prediction/pred_xyz/pred_skel/Tensordot/MatMul_grad/MatMul:
prediction/pred_xyz/pred_skel/Tensordot/Reshape_1 (defined at C:\Users\Qisr\AppData\Local\Temp\tmpu6n28dvh.py:23)
Original stack trace for 'gradients/prediction/pred_xyz/pred_skel/Tensordot/MatMul_grad/MatMul':
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\traitlets\config\application.py", line 664, in launch_instance
app.start()
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\ipykernel\kernelapp.py", line 612, in start
self.io_loop.start()
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
self.asyncio_loop.run_forever()
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\asyncio\base_events.py", line 442, in run_forever
self._run_once()
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\asyncio\base_events.py", line 1462, in _run_once
handle._run()
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\asyncio\events.py", line 145, in _run
self._callback(*self._args)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\ioloop.py", line 688, in <lambda>
lambda f: self._run_callback(functools.partial(callback, future))
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\ioloop.py", line 741, in _run_callback
ret = callback()
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 814, in inner
self.ctx_run(self.run)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 162, in _fake_ctx_run
return f(*args, **kw)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 775, in run
yielded = self.gen.send(value)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\ipykernel\kernelbase.py", line 381, in dispatch_queue
yield self.process_one()
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 250, in wrapper
runner = Runner(ctx_run, result, future, yielded)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 741, in __init__
self.ctx_run(self.run)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 162, in _fake_ctx_run
return f(*args, **kw)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 775, in run
yielded = self.gen.send(value)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\ipykernel\kernelbase.py", line 365, in process_one
yield gen.maybe_future(dispatch(*args))
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 234, in wrapper
yielded = ctx_run(next, result)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 162, in _fake_ctx_run
return f(*args, **kw)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\ipykernel\kernelbase.py", line 268, in dispatch_shell
yield gen.maybe_future(handler(stream, idents, msg))
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 234, in wrapper
yielded = ctx_run(next, result)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 162, in _fake_ctx_run
return f(*args, **kw)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\ipykernel\kernelbase.py", line 545, in execute_request
user_expressions, allow_stdin,
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 234, in wrapper
yielded = ctx_run(next, result)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tornado\gen.py", line 162, in _fake_ctx_run
return f(*args, **kw)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\ipykernel\ipkernel.py", line 306, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\IPython\core\interactiveshell.py", line 2867, in run_cell
raw_cell, store_history, silent, shell_futures)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\IPython\core\interactiveshell.py", line 2895, in _run_cell
return runner(coro)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
coro.send(None)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\IPython\core\interactiveshell.py", line 3072, in run_cell_async
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\IPython\core\interactiveshell.py", line 3263, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\IPython\core\interactiveshell.py", line 3343, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-20-bd2e281ad5d3>", line 3, in <module>
model = Seq2SeqModelFW(max_seq_len, input_size,rnn_size, batch_size, lr,train_keep_prob)
File "<ipython-input-12-adcec0763b40>", line 49, in __init__
gradients, self.pred_vars = zip(*opt.compute_gradients(self.loss))
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\training\optimizer.py", line 512, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 158, in gradients
unconnected_gradients)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\ops\gradients_util.py", line 731, in _GradientsHelper
lambda: grad_fn(op, *out_grads))
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\ops\gradients_util.py", line 403, in _MaybeCompile
return grad_fn() # Exit early
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\ops\gradients_util.py", line 731, in <lambda>
lambda: grad_fn(op, *out_grads))
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\ops\math_grad.py", line 1387, in _MatMulGrad
grad_a = gen_math_ops.mat_mul(grad, b, transpose_b=True)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 6295, in mat_mul
name=name)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 788, in _apply_op_helper
op_def=op_def)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\framework\ops.py", line 3616, in create_op
op_def=op_def)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\framework\ops.py", line 2005, in __init__
self._traceback = tf_stack.extract_stack()
...which was originally created as op 'prediction/pred_xyz/pred_skel/Tensordot/MatMul', defined at:
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
[elided 35 identical lines from previous traceback]
File "<ipython-input-20-bd2e281ad5d3>", line 3, in <module>
model = Seq2SeqModelFW(max_seq_len, input_size,rnn_size, batch_size, lr,train_keep_prob)
File "<ipython-input-12-adcec0763b40>", line 42, in __init__
pred_xyz2xyz = FC(self.dec_outputs_xyz)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\layers\base.py", line 537, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 634, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 146, in wrapper
), args, kwargs)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 450, in converted_call
result = converted_f(*effective_args, **kwargs)
File "C:\Users\Qisr\AppData\Local\Temp\tmpu6n28dvh.py", line 63, in tf__call
outputs = ag__.if_stmt(cond_2, if_true_2, if_false_2, get_state_2, set_state_2)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 441, in if_stmt
return _py_if_stmt(cond, body, orelse)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 528, in _py_if_stmt
return body() if cond else orelse()
File "C:\Users\Qisr\AppData\Local\Temp\tmpu6n28dvh.py", line 23, in if_true_2
outputs = ag__.converted_call('tensordot', standard_ops, ag__.ConversionOptions(recursive=True, force_conversion=False, optional_features=(), internal_convert_user_code=True), (inputs, self.kernel, [[rank - 1], [0]]), None)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 356, in converted_call
return _call_unconverted(f, args, kwargs)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 255, in _call_unconverted
return f(*args)
File "C:\Users\Qisr\anaconda3\envs\tf114\lib\site-packages\tensorflow\python\ops\math_ops.py", line 3799, in tensordot
ab_matmul = matmul(a_reshape, b_reshape)
I try to find out the issue of that but I'm still not sure which kind of problem is it? I checked all of the environments and they are correct. Also, the GPU memory is enough.