TFBertForSequenceClassification for multi label classification - tensorflow2.0

I am trying to fine-tune a bert model for multi-label classification.
here is how my data looks like. I have put the entire code on this colab notebook
({'input_ids': <tf.Tensor: shape=(128,), dtype=int32, numpy=
array([ 2, 8318, 1379, 7892, 2791, 20630, 1, 4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0], dtype=int32)>,
'attention_mask': <tf.Tensor: shape=(128,), dtype=int32, numpy=
array([1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)>},
<tf.Tensor: shape=(7,), dtype=int64, numpy=array([1, 0, 0, 0, 0, 0, 0])>)
The first element is the id,
The second element corresponds to the attention_masks
the third one are the labels - here I have 7 lables.
First effort:
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
NUM_LABELS = 7
from transformers import TFBertForSequenceClassification, BertConfig
model = TFBertForSequenceClassification.from_pretrained(
MODEL_NAME_OR_PATH,
config=BertConfig.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS, problem_type="multi_label_classification")
)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
which ends up with the following error
InvalidArgumentError Traceback (most recent call last)
<ipython-input-48-4408a1f17fbe> in <module>()
10 loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
11 model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
---> 12 history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
13
14
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'Equal' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 577, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 606, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 556, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-48-4408a1f17fbe>", line 12, in <module>
history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1384, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1156, in train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 459, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 178, in update_state_fn
return ag_update_state(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 729, in update_state
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 4086, in sparse_categorical_accuracy
return tf.cast(tf.equal(y_true, y_pred), backend.floatx())
Node: 'Equal'
required broadcastable shapes
[[{{node Equal}}]] [Op:__inference_train_function_187978]
Second Effort inspired by this piece of code
from transformers import TFBertPreTrainedModel
from transformers import TFBertMainLayer
class TFBertForMultilabelClassification(TFBertPreTrainedModel):
def __init__(self, config, *inputs, **kwargs):
super(TFBertForMultilabelClassification, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, name='bert')
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(config.num_labels,
kernel_initializer='random_normal', #get_initializer(config.initializer_range),
name='classifier',
activation='sigmoid')
def call(self, inputs, **kwargs):
outputs = self.bert(inputs, **kwargs)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output, training=kwargs.get('training', False))
logits = self.classifier(pooled_output)
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
return outputs # logits, (hidden_states), (attentions)
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
NUM_LABELS = len(y_train[0])
model = TFBertForMultilabelClassification.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,epsilon=1e-08, clipnorm=1)
# we do not have one-hot vectors, we can use sparce categorical cross entropy and accuracy
loss = tf.keras.losses.BinaryCrossentropy()
metric = tf.keras.metrics.CategoricalAccuracy()
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
returns the following error
InvalidArgumentError Traceback (most recent call last)
<ipython-input-49-8aa1173bef76> in <module>()
4 metric = tf.keras.metrics.CategoricalAccuracy()
5 model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
----> 6 history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'Equal' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 577, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 606, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 556, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-49-8aa1173bef76>", line 6, in <module>
history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1384, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1156, in train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 459, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 178, in update_state_fn
return ag_update_state(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 729, in update_state
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 4086, in sparse_categorical_accuracy
return tf.cast(tf.equal(y_true, y_pred), backend.floatx())
Node: 'Equal'
required broadcastable shapes
[[{{node Equal}}]] [Op:__inference_train_function_214932]
I hope I believe given major changes both in tf2 and (TF-based) huggingface transformers
UPDATE
Here is the entire code with a dummy dataset; the whole thing is also available on this colab notebook
load the libraries
import os
import pandas as pd
import numpy as np
from transformers import TFBertPreTrainedModel
from transformers import TFBertMainLayer
from keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm
from transformers import BertTokenizer
import tensorflow as tf
make a dummy data
x_train = ['هان از وقتی که زفتم مدرسه',
'معاویه برادر شمر',
'وقتی که از پنجره سرشرو میاره بیرون دالی میکنه',
'هر دو سحرند این کجا و آن کجا']
y_train = [[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0]]
x_test, x_valid = x_train, x_train
y_test, y_valid = y_train, y_train
add the configs
# general config
MAX_LEN = 128
batch_size = 32
TRAIN_BATCH_SIZE = batch_size
VALID_BATCH_SIZE = batch_size
TEST_BATCH_SIZE = batch_size
EPOCHS = 3
EEVERY_EPOCH = 1000
LEARNING_RATE = 2e-5
CLIP = 0.0
make the data huggingface friendly
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME_OR_PATH)
MAX_LEN = 128
def tokenize_sentences(sentences, tokenizer, max_seq_len = 128):
tokenized_sentences = []
for sentence in tqdm(sentences):
tokenized_sentence = tokenizer.encode(
sentence, # Sentence to encode.
add_special_tokens = True, # Add '[CLS]' and '[SEP]'
max_length = max_seq_len, # Truncate all sentences.
)
tokenized_sentences.append(tokenized_sentence)
return tokenized_sentences
def create_attention_masks(tokenized_and_padded_sentences):
attention_masks = []
for sentence in tokenized_and_padded_sentences:
att_mask = [int(token_id > 0) for token_id in sentence]
attention_masks.append(att_mask)
return np.asarray(attention_masks)
train_ids = tokenize_sentences(x_train, tokenizer, max_seq_len = 128)
train_ids = pad_sequences(train_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
train_masks = create_attention_masks(train_ids)
valid_ids = tokenize_sentences(x_valid, tokenizer, max_seq_len = 128)
valid_ids = pad_sequences(valid_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
valid_masks = create_attention_masks(valid_ids)
test_ids = tokenize_sentences(x_test, tokenizer, max_seq_len = 128)
test_ids = pad_sequences(test_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
test_masks = create_attention_masks(test_ids)
create the datasets
def create_dataset(ids, masks, labels):
def gen():
for i in range(len(ids)):
yield (
{
"input_ids": ids[i],
"attention_mask": masks[i]
},
labels[i],
)
return tf.data.Dataset.from_generator(
gen,
({"input_ids": tf.int32, "attention_mask": tf.int32}, tf.int64),
(
{
"input_ids": tf.TensorShape([None]),
"attention_mask": tf.TensorShape([None])
},
tf.TensorShape([None]),
),
)
train_dataset = create_dataset(train_ids, train_masks, y_train)
valid_dataset = create_dataset(valid_ids, valid_masks, y_valid)
test_dataset = create_dataset(test_ids, test_masks, y_test)
that is how the data looks like
for item in train_dataset.take(1):
print(item)
Approach 1
class TFBertForMultilabelClassification(TFBertPreTrainedModel):
def __init__(self, config, *inputs, **kwargs):
super(TFBertForMultilabelClassification, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, name='bert')
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(config.num_labels,
kernel_initializer='random_normal', #get_initializer(config.initializer_range),
name='classifier',
activation='sigmoid')
def call(self, inputs, **kwargs):
outputs = self.bert(inputs, **kwargs)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output, training=kwargs.get('training', False))
logits = self.classifier(pooled_output)
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
return outputs # logits, (hidden_states), (attentions)
NUM_LABELS = len(y_train[0])
model = TFBertForMultilabelClassification.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,epsilon=1e-08, clipnorm=1)
# we do not have one-hot vectors, we can use sparce categorical cross entropy and accuracy
loss = tf.keras.losses.BinaryCrossentropy()
metric = tf.keras.metrics.CategoricalAccuracy()
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
with an error
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-36-8aa1173bef76> in <module>()
4 metric = tf.keras.metrics.CategoricalAccuracy()
5 model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
----> 6 history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in autograph_handler(*args, **kwargs)
1145 except Exception as e: # pylint:disable=broad-except
1146 if hasattr(e, "ag_error_metadata"):
-> 1147 raise e.ag_error_metadata.to_exception(e)
1148 else:
1149 raise
AttributeError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1145, in train_step
if list(y_pred.keys())[0] == "loss":
AttributeError: 'tuple' object has no attribute 'keys'
Approach 2:
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
NUM_LABELS = 7
from transformers import TFBertForSequenceClassification, BertConfig
model = TFBertForSequenceClassification.from_pretrained(
MODEL_NAME_OR_PATH,
config=BertConfig.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS, problem_type="multi_label_classification")
)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
and the error
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1151, in train_step
loss = self.compiled_loss(y, y_pred, sample_weight, regularization_losses=self.losses)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 141, in __call__
losses = call_fn(y_true, y_pred)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 245, in call
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 1863, in sparse_categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/keras/backend.py", line 5203, in sparse_categorical_crossentropy
labels=target, logits=output)
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
logits and labels must have the same first dimension, got logits shape [128,7] and labels shape [7]
[[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_67923]

Related

ValueError: Shapes (None, 128, 18) and (None, 128) are incompatible

I'm having a problem with the shapes of my input to Keras/Tensorflow.
EDIT - I found that when I just specify 'accuracy' for my metrics it works fine and I can train my model, however when I do it the other way by adding precision, recall it fails with the error.
My Model summary is like this
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (None, 128, 64) 2251520
_________________________________________________________________
bidirectional (Bidirectional (None, 128, 128) 66048
_________________________________________________________________
time_distributed (TimeDistri (None, 128, 18) 2322
=================================================================
Total params: 2,319,890
Trainable params: 2,319,890
Non-trainable params: 0
I'm doing NER and padded my sentences to 128 words.
My Code is as follows and dataset is from here
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, KFold
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
physical_devices = tf.config.list_physical_devices()
tf.config.experimental.set_memory_growth(physical_devices[1], enable=True)
df = pd.read_csv("ner_dataset.csv", encoding="latin1")
# fill NaN - propogate non null values forward
df = df.fillna(method="ffill")
sent_count = len(set(df["Sentence #"].values))
print(sent_count)
words = list(set(df["Word"].values))
words_count = len(words)
print(words_count)
word2idx = {}
# add the padding and unknown token
word2idx["PAD_TOKEN"] = len(word2idx)
word2idx["UNK_TOKEN"] = len(word2idx)
# add the rest
for i in range(0, len(words)):
word2idx[words[i]] = len(word2idx)
# index to word mapping
idx2word = {i: w for w, i in word2idx.items()}
# number of unique tags
tags = list(set(df["Tag"].values))
tags_count = len(tags)
print(tags_count)
tag2idx = {}
tag2idx['PAD_TAG'] = 0 # this is the tag that is assigned to the pad tokens 'PAD_TOKEN'
for i in range(0, len(tags)):
tag2idx[tags[i]] = len(tag2idx)
# index to tag mapping
idx2tag = {i: w for w, i in tag2idx.items()}
def getSentences(dataframe):
sentences = []
groups = dataframe.groupby("Sentence #")
for name, group in groups:
zipped = zip(group["Word"], group["Tag"])
sentences.append(list(zipped))
return sentences
sents = getSentences(df)
len(sents[0]), len(sents[1]) # sentences are of different lengths
max_len = 128
y = [[tag2idx[word[1]] for word in s] for s in sents]
y = pad_sequences(maxlen=max_len, sequences=y, value=tag2idx["PAD_TAG"], padding='post', truncating='post')
x = [[word2idx[word[0]] for word in s] for s in sents]
x = pad_sequences(maxlen=max_len, sequences=x, value=word2idx["PAD_TOKEN"], padding='post', truncating='post')
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
vocab_size = len(word2idx)
vocab_size
batch_size = 32
model = models.Sequential()
embeddinglayer = layers.Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len)
model.add(embeddinglayer)
bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=True))
model.add(bilstm)
num_tags = len(tag2idx)
timedist = layers.TimeDistributed(layers.Dense(num_tags, activation="softmax"))
model.add(timedist)
model.summary()
METRICS = [
'accuracy',
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall')
]
model.compile(optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=METRICS)
history = model.fit(x_train, np.array(y_train), batch_size=batch_size, epochs=25, validation_data=(x_test, y_test), verbose=1)
The types after splitting are all np array
type(x_train), type(x_test), type(y_train), type(y_test)
(numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)
And the shapes are
((38367, 128), (9592, 128), (38367, 128), (9592, 128))
Each training item (sentence) is an array of word index of length 128, for example the x_train[0] looks like the array below (the y values are similar (same length 128) except they are index of the tags/labels for each word.)
array([25653, 1878, 26510, 12653, 33524, 15752, 30488, 14594, 33943,
3656, 22478, 596, 13235, 10080, 16432, 18190, 20273, 10254,
34463, 15526, 24899, 4359, 30488, 10525, 19165, 30439, 16205,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0], dtype=int32)
EDIT: Error Below
Epoch 1/25
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-35a0fcfc79ab> in <module>
111
112
--> 113 history = model.fit(x_train, np.array(y_train), batch_size=batch_size, epochs=25, validation_data=(x_test, y_test), verbose=1)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1098 _r=1):
1099 callbacks.on_train_batch_begin(step)
-> 1100 tmp_logs = self.train_function(iterator)
1101 if data_handler.should_sync:
1102 context.async_wait()
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
826 tracing_count = self.experimental_get_tracing_count()
827 with trace.Trace(self._name) as tm:
--> 828 result = self._call(*args, **kwds)
829 compiler = "xla" if self._experimental_compile else "nonXla"
830 new_tracing_count = self.experimental_get_tracing_count()
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
869 # This is the first call of __call__, so we have to initialize.
870 initializers = []
--> 871 self._initialize(args, kwds, add_initializers_to=initializers)
872 finally:
873 # At this point we know that the initialization is complete (or less
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
724 self._concrete_stateful_fn = (
725 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 726 *args, **kwds))
727
728 def invalid_creator_scope(*unused_args, **unused_kwds):
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2967 args, kwargs = None, None
2968 with self._lock:
-> 2969 graph_function, _ = self._maybe_define_function(args, kwargs)
2970 return graph_function
2971
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs)
3359
3360 self._function_cache.missed.add(call_context_key)
-> 3361 graph_function = self._create_graph_function(args, kwargs)
3362 self._function_cache.primary[cache_key] = graph_function
3363
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3204 arg_names=arg_names,
3205 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3206 capture_by_value=self._capture_by_value),
3207 self._function_attributes,
3208 function_spec=self.function_spec,
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
988 _, original_func = tf_decorator.unwrap(python_func)
989
--> 990 func_outputs = python_func(*func_args, **func_kwargs)
991
992 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds)
632 xla_context.Exit()
633 else:
--> 634 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
635 return out
636
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function *
return step_function(self, iterator)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:788 run_step **
outputs = model.train_step(data)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:758 train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:408 update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\utils\metrics_utils.py:90 decorated
update_op = update_state_fn(*args, **kwargs)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\metrics.py:177 update_state_fn
return ag_update_state(*args, **kwargs)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\metrics.py:1291 update_state **
sample_weight=sample_weight)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\utils\metrics_utils.py:354 update_confusion_matrix_variables
y_pred.shape.assert_is_compatible_with(y_true.shape)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1134 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 128, 18) and (None, 128) are incompatible
I think that your x, y arguments to model.fit() are inconsistent: x is a list, y -s a numpy array. Try:
history = model.fit(np_array(x_train), np.array(y_train))

Estimator.predict() has Shape Issues?

I can train and evalaute a Tensorflow Estimator model without any problems. When I do prediction, this error arises:
InvalidArgumentError (see above for traceback): output_shape has incorrect number of elements: 68 should be: 2
[[Node: output = SparseToDense[T=DT_INT32, Tindices=DT_INT32, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ToInt32, ToInt32_1, ToInt32_2, bidirectional_rnn/bidirectional_rnn/fw/fw/time)]]
All of the model functions use the same architecture:
def _train_model_fn(features, labels, mode, params):
features = _network_fn(features, mode, params)
outputs = _get_output(features, params["output_layer"],
params["num_classes"])
predictions = {
"outputs": outputs
}
... # loss initialization and whatnot
def _eval_model_fn(features, labels, mode, params):
features = _network_fn(features, mode, params)
outputs = _get_output(features, params["output_layer"], params["num_classes"])
predictions = {
"outputs": outputs
}
... # loss initialization and whatnot
def _predict_model_fn(features, mode, params):
features = _network_fn(features, mode, params)
outputs = _get_output(features, params["output_layer"], params["num_classes"])
predictions = {
"outputs": outputs
}
...
Here's the predict code:
def predict(params, features, checkpoint_dir):
estimator = tf.estimator.Estimator(model_fn=_predict_model_fn,
params=params,
model_dir=checkpoint_dir)
predictions = estimator.predict(input_fn=_input_fn(features))
for i, p in enumerate(predictions):
print(i, p)
I also checked the shapes given every time the input passes a layer when training, and the same thing for predicting. They give the same shapes:
Training:
conv2d [1, 358, 358, 16]
max_pool2d [1, 179, 179, 16]
collapse_to_rnn_dims [1, 179, 2864]
birnn [1, 179, 64]
Prediction:
conv2d [1, 358, 358, 16]
max_pool2d [1, 179, 179, 16]
collapse_to_rnn_dims [1, 179, 2864]
birnn [1, 179, 64]
Here are the SparseTensors I passed to sparse_to_dense:
Training:
SparseTensor(indices=Tensor("CTCBeamSearchDecoder:0", shape=(?, 2), dtype=int64), values=Tensor("CTCBeamSearchDecoder:1", shape=(?,), dtype=int64), dense_shape=Tensor("CTCBeamSearchDecoder:2", shape=(2,), dtype=int64))
Evaluation:
SparseTensor(indices=Tensor("CTCBeamSearchDecoder:0", shape=(?, 2), dtype=int64), values=Tensor("CTCBeamSearchDecoder:1", shape=(?,), dtype=int64), dense_shape=Tensor("CTCBeamSearchDecoder:2", shape=(2,), dtype=int64))
Prediction:
SparseTensor(indices=Tensor("CTCBeamSearchDecoder:0", shape=(?, 2), dtype=int64), values=Tensor("CTCBeamSearchDecoder:1", shape=(?,), dtype=int64), dense_shape=Tensor("CTCBeamSearchDecoder:2", shape=(2,), dtype=int64))
Which are all pretty much the same.
Any reason why this is happening? Shouldn't the _predict_model_fn work given that it follows the same architecture as that of the other model_fns?
Here's the full stacktrace:
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_log_step_count_steps': 100, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_service': None, '_save_summary_steps': 100, '_model_dir': 'checkpoint\\model-20180419-150303', '_task_id': 0, '_evaluation_master': '', '_tf_random_seed': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000091F58B3080>, '_num_ps_replicas': 0, '_master': '', '_save_checkpoints_secs': 600, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_global_id_in_cluster': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from checkpoint\model-20180419-150303\model.ckpt-1
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Process Process-2:
Traceback (most recent call last):
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1361, in _do_call
return fn(*args)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1340, in _run_fn
target_list, status, run_metadata)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 516, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: output_shape has incorrect number of elements: 68 should be: 2
[[Node: output = SparseToDense[T=DT_INT32, Tindices=DT_INT32, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ToInt32, ToInt32_1, ToInt32_2, bidirectional_rnn/bidirectional_rnn/fw/fw/time)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\process.py", line 249, in _bootstrap
self.run()
File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\train_ocr.py", line 42, in evaluate_model
evaluate(architecture_params, images, labels, checkpoint_dir)
File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 82, in evaluate
predict(params, features, checkpoint_dir)
File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 90, in predict
for i, p in enumerate(predictions):
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 492, in predict
preds_evaluated = mon_sess.run(predictions)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 546, in run
run_metadata=run_metadata)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1022, in run
run_metadata=run_metadata)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1113, in run
raise six.reraise(*original_exc_info)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\six.py", line 693, in reraise
raise value
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1098, in run
return self._sess.run(*args, **kwargs)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1170, in run
run_metadata=run_metadata)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 950, in run
return self._sess.run(*args, **kwargs)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 905, in run
run_metadata_ptr)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1137, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1355, in _do_run
options, run_metadata)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1374, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: output_shape has incorrect number of elements: 68 should be: 2
[[Node: output = SparseToDense[T=DT_INT32, Tindices=DT_INT32, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ToInt32, ToInt32_1, ToInt32_2, bidirectional_rnn/bidirectional_rnn/fw/fw/time)]]
Caused by op 'output', defined at:
File "<string>", line 1, in <module>
File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\spawn.py", line 106, in spawn_main
exitcode = _main(fd)
File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\spawn.py", line 119, in _main
return self._bootstrap()
File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\process.py", line 249, in _bootstrap
self.run()
File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\train_ocr.py", line 42, in evaluate_model
evaluate(architecture_params, images, labels, checkpoint_dir)
File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 82, in evaluate
predict(params, features, checkpoint_dir)
File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 90, in predict
for i, p in enumerate(predictions):
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 479, in predict
features, None, model_fn_lib.ModeKeys.PREDICT, self.config)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 793, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 217, in _predict_model_fn
outputs = _get_output(features, params["output_layer"], params["num_classes"])
File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 134, in _get_output
return _sparse_to_dense(decoded, name="output")
File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 38, in _sparse_to_dense
name=name)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\ops\sparse_ops.py", line 791, in sparse_to_dense
name=name)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_sparse_ops.py", line 2401, in _sparse_to_dense
name=name)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3271, in create_op
op_def=op_def)
File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1650, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): output_shape has incorrect number of elements: 68 should be: 2
[[Node: output = SparseToDense[T=DT_INT32, Tindices=DT_INT32, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ToInt32, ToInt32_1, ToInt32_2, bidirectional_rnn/bidirectional_rnn/fw/fw/time)]]
Update
I tried using the same architecture in a different training run, I encountered a different shap error:
InvalidArgumentError (see above for traceback): output_shape has incorrect number of elements: 69 should be: 2
[[Node: output = SparseToDense[T=DT_INT32, Tindices=DT_INT32, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ToInt32, ToInt32_1, ToInt32_2, bidirectional_rnn/bidirectional_rnn/fw/fw/time)]]
Apparently, the problem seems to lie in the ctc_beam_search_decoder. Switching to ctc_greedy_decoder doesn't help either. Why is it doing this?
More updates
I have uploaded the reproducible example: https://github.com/selcouthlyBlue/ShapeErrorReproduce
I have finally figured out the error. The problem actually lies in the way I used sparse_to_dense. Apparently, the order I gave is wrong where the values came first before the shape:
return tf.sparse_to_dense(tf.to_int32(decoded[0].indices),
tf.to_int32(decoded[0].values),
tf.to_int32(decoded[0].dense_shape),
name="output")
The order should be (shape comes first before values):
return tf.sparse_to_dense(tf.to_int32(decoded[0].indices),
tf.to_int32(decoded[0].dense_shape),
tf.to_int32(decoded[0].values),
name="output")

tensorflow tf.cond does not execute true_fn or false_fn for tf.reduce_mean

I am trying to condition the output of the loss function tf.reduce_mean so as to avoid NaN errors. My code is:
limit=[]
for i in xrange(12):
limit.append(10000.0)
limit = tf.constant(limit)
predictions["loss"] =tf.cond(tf.reduce_mean(
(prediction - transformed_values) ** 2, axis=-1) < limit,
lambda:tf.reduce_mean(
(prediction - transformed_values) ** 2, axis=-1),
lambda:tf.reduce_mean(
(prediction - transformed_values), axis=-1)).
However, I get the error
INFO:tensorflow:Using default config.
WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpfnvr6j
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7eaa5bd750>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '/tmp/tmpfnvr6j', '_save_summary_steps': 100}
shape: pred (12,) true_t (12,) false_t (12,)
Traceback (most recent call last):
File "/home/paul/workspace/workspace/Master/Elec_Price_Prediction/Time_Series.py", line 302, in <module>
obtain_prediction()
File "/home/paul/workspace/workspace/Master/Elec_Price_Prediction/Time_Series.py", line 212, in obtain_prediction
estimator.train(input_fn=train_input_fn, steps=10000)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 302, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 711, in _train_model
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 694, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/head.py", line 201, in create_estimator_spec
return self._train_ops(features)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/head.py", line 60, in _train_ops
estimator_lib.ModeKeys.TRAIN)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/state_management.py", line 67, in define_loss
return model.define_loss(features, mode)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 196, in define_loss
return self.get_batch_loss(features=features, mode=mode, state=start_state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 509, in get_batch_loss
features, mode, state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 609, in per_step_batch_loss
outputs=["loss"] + self._train_output_names)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 775, in _state_update_loop
loop_vars=initial_loop_arguments)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 726, in _state_update_step
state=state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 605, in _batch_loss_filtering_step
predictions=predictions)
File "/home/paul/workspace/workspace/Master/Elec_Price_Prediction/Time_Series.py", line 105, in _filtering_step
prediction=tf.cond(pred,lambda:true_t,lambda:false_t)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/deprecation.py", line 316, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1844, in cond
p_2, p_1 = switch(pred, pred)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 305, in switch
return gen_control_flow_ops._switch(data, pred, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_control_flow_ops.py", line 562, in _switch
"Switch", data=data, pred=pred, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2958, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2209, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2159, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/common_shapes.py", line 627, in call_cpp_shape_fn
require_shape_fn)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/common_shapes.py", line 691, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Shape must be rank 0 but is rank 1 for 'head/model/while/state_update_step/cond/Switch' (op: 'Switch') with input shapes: [12], [12].
My question would be why this is impossible and how to work around it. I tried checking if pred and true_fn as well as false_fn have the same shape and they do.
I prefer tf.where. How about using tf.where?

Adding dropout (tf.nn.dropout) results in Nan

Being a beginner to tensorflow and CNN I'm working on emotion recognition to understand these.
The following code works when dropout layer is removed, however results in Nan when added. I've googled around and came across solutions such as reducing learning rate etc. None has worked for me.
The net :
def cnn(self, data):
conv = tf.nn.conv2d(data, self.w_1, [1, 1, 1, 1], padding='SAME')
hidden = tf.nn.relu(conv + self.b_1)
pool = tf.nn.max_pool(hidden, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
norm = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
conv = tf.nn.conv2d(norm, self.w_2, [1, 1, 1, 1], padding='SAME')
hidden = tf.nn.relu(conv + self.b_2)
pool = tf.nn.max_pool(hidden, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
norm = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
list_shape = norm.get_shape().as_list()
reshape = tf.reshape(pool, [list_shape[0], list_shape[1] * list_shape[2] * list_shape[3]])
hidden = tf.nn.relu(tf.matmul(reshape, self.w_3) + self.b_3)
hidden = tf.nn.relu(tf.matmul(hidden, self.w_4) + self.b_4)
dropout = tf.nn.dropout(hidden, self.dropout_prob)
return tf.matmul(dropout, self.w_5) + self.b_5
The model:
self.tf_x = tf.placeholder(tf.float32, shape=(self.batch_size, self.image_size, self.image_size, 1))
self.tf_y = tf.placeholder(tf.float32, shape=(self.batch_size, self.num_labels))
self.dropout_prob = tf.placeholder(tf.float32)
self.w_1 = tf.Variable(tf.truncated_normal([5, 5, 1, 64], stddev=0.1))
self.b_1 = tf.Variable(tf.zeros([64]))
self.w_2 = tf.Variable(tf.truncated_normal([9, 9, 64, 128], stddev=0.04))
self.b_2 = tf.Variable(tf.constant(1.0, shape=[128]))
self.w_3 = tf.Variable(tf.truncated_normal([self.image_size//4 * self.image_size//4 * 128, 392], stddev=0.1))
self.b_3 = tf.Variable(tf.constant(1.0, shape=(392,)))
self.w_4 = tf.Variable(tf.truncated_normal([392, 196], stddev=0.1))
self.b_4 = tf.Variable(tf.constant(1.0, shape=(196,)))
self.w_5 = tf.Variable(tf.truncated_normal([196, self.num_labels], stddev=0.04))
self.b_5 = tf.Variable(tf.constant(1.0, shape=[self.num_labels]))
self.logits = self.cnn(self.tf_x)
self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.tf_y, logits=self.logits))
self.optimizer = tf.train.AdamOptimizer(1e-6).minimize(self.loss)
self.train_pred = tf.nn.softmax(self.logits)
tf.summary.histogram('weights_1', self.w_1)
tf.summary.histogram('weights_2', self.w_2)
tf.summary.histogram('weights_3', self.w_3)
tf.summary.histogram('weights_4', self.w_4)
tf.summary.scalar('loss', self.loss)
self.merged = tf.summary.merge_all()
The error:
Traceback (most recent call last):
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 75, in <module>
main()
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 64, in main
emotion_cnn.train_test_validate()
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\Emotion.py", line 127, in train_test_validate
_,summary, l1, predictions1 = self.session.run([self.optimizer, self.merged, self.loss, self.train_pred], feed_dict=feed_dict1)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 767, in run
run_metadata_ptr)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 965, in _run
feed_dict_string, options, run_metadata)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 1015, in _do_run
target_list, options, run_metadata)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 1035, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Nan in summary histogram for: weights_1
[[Node: weights_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](weights_1/tag, Variable/read/_81)]]
Caused by op 'weights_1', defined at:
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 75, in <module>
main()
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 64, in main
emotion_cnn.train_test_validate()
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\Emotion.py", line 104, in train_test_validate
self.model()
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\Emotion.py", line 82, in model
tf.summary.histogram('weights_1', self.w_1)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\summary\summary.py", line 203, in histogram
tag=scope.rstrip('/'), values=values, name=scope)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\ops\gen_logging_ops.py", line 139, in _histogram_summary
name=name)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 763, in apply_op
op_def=op_def)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\framework\ops.py", line 2327, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\framework\ops.py", line 1226, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Nan in summary histogram for: weights_1
[[Node: weights_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](weights_1/tag, Variable/read/_81)]]
E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_gpu_executor.cc:637] Deallocating stream with pending work

Tensorflow CNN Batch size error

I had made CNN model for my dataset.
I had used batch for feed data.
when I used batch size is one, It is working.
but if I used batch size is not one (ex :128)
it make error.
this is my code.
I attach all my code.
there is 1623 columns data.
import tensorflow as tf
import numpy as np
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden):
l1a = tf.nn.relu(tf.nn.conv2d(X, w, strides=[1, 1, 1, 1], padding='SAME')) # l1a shape=(?, 24, 60, 32)
l1 = tf.nn.avg_pool(l1a, ksize=[1, 4, 4, 1],strides=[1, 2, 2, 1], padding='SAME')# l1 shape=(?, 6, 30, 32)
l1 = tf.nn.dropout(l1, p_keep_conv)
l2a = tf.nn.relu(tf.nn.conv2d(l1, w2, strides=[1, 1, 1, 1], padding='SAME')) # l2a shape=(?, 6, 30, 64)
l2 = tf.nn.avg_pool(l2a, ksize=[1, 2, 3, 1], strides=[1, 2, 3, 1], padding='SAME') # l2 shape=(?, 3, 10, 64)
l2 = tf.nn.dropout(l2, p_keep_conv)
l3a = tf.nn.relu(tf.nn.conv2d(l2, w3, strides=[1, 1, 1, 1], padding='SAME')) # l3a shape=(?, 3, 10, 128)
l3 = tf.nn.max_pool(l3a, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='SAME') # l3 shape=(?, 3, 5, 128)
l3 = tf.reshape(l3, [-1, w4.get_shape().as_list()[0]]) # reshape to (?, 1920)
l3 = tf.nn.dropout(l3, p_keep_conv)
l4 = tf.nn.relu(tf.matmul(l3, w4))
l4 = tf.nn.dropout(l4, p_keep_hidden)
pyx = tf.matmul(l4, w_o)
return pyx
X = tf.placeholder(tf.float32, [None, 24,60,1])
Y = tf.placeholder(tf.float32, [None, 1])
w = init_weights([4, 4, 1, 32]) # 4x4x1 conv, 32 outputs
w2 = init_weights([2, 3, 32, 64]) # 2x3x32 conv, 64 outputs
w3 = init_weights([1, 2, 64, 128]) # 1x2x64 conv, 128 outputs
w4 = init_weights([128 * 5 * 3, 625]) # FC 128 * 5 * 3 inputs, 625 outputs
w_o = init_weights([625, 1]) # FC 625 inputs, 1 outputs (labels)
#B = tf.Variable(tf.random_normal([625]))
print ("W shape:", w.get_shape())
print ("W2 shape:", w2.get_shape())
print ("W3 shape:", w3.get_shape())
print ("W4 shape:", w4.get_shape())
print ("Wo shape:", w_o.get_shape())
p_keep_conv = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
py_x = model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden)
squared_deltas1 = tf.square(Y - py_x)
squared_deltas = tf.sqrt(squared_deltas1)
cost = tf.reduce_mean(squared_deltas)
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
cost_sum = tf.summary.scalar("cost",cost)
def read_my_file_format(filename_queue):
reader = tf.TextLineReader(skip_header_lines=1)
_, value = reader.read(filename_queue)
record_defaults = [[1],[1],[1],.........[1],[1],[1]]
#1623
record_defaults = [tf.constant([1], dtype=tf.float32),
tf.constant([1], dtype=tf.float32),
..................
tf.constant([1], dtype=tf.float32),
tf.constant([1], dtype=tf.float32),
]
Col1,Col2,Col3,......,Col1621,Col1622,Col1623=tf.decode_csv(value, record_defaults=record_defaults)
features = tf.pack([Col4,Col5,Col6, ....... Col1618,Col1619,Col1620])
label = tf.pack([Col29])
return features, label
def input_pipeline(batch_size, num_epochs):
min_after_dequeue = 10000
capacity = min_after_dequeue + 3 * batch_size
'''
filename_queue = tf.train.string_input_producer(["G:\CNN\1999.csv","G:\CNN\2000.csv","G:\CNN\2001.csv","G:\CNN\2002.csv",
"G:\CNN\2003.csv","G:\CNN\2004.csv","G:\CNN\2005.csv","G:\CNN\2006.csv",
"G:\CNN\2007.csv","G:\CNN\2008.csv"], num_epochs=num_epochs, shuffle=True)
'''
filename_queue = tf.train.string_input_producer(["test_1000.csv"], num_epochs=num_epochs, shuffle=True)
example, label = read_my_file_format(filename_queue)
example_batch, label_batch = tf.train.shuffle_batch([example, label],
batch_size=batch_size,
capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch
examples, labels = input_pipeline(128,1)
print (examples)
examples = tf.reshape(examples, [-1,24,60,1])
print (examples)
#examples = examples.reshape(-1, 24, 60, 1) # 28x28x1 input img
i = 0
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess = tf.Session()
merged = tf.summary.merge_all()
trainwriter =tf.summary.FileWriter("./board/custom", sess.graph)
sess.run(init_op)
print(w.eval(session = sess))
print(w2.eval(session = sess))
print(w3.eval(session = sess))
print(w4.eval(session = sess))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
while not coord.should_stop():
i = i + 1
example_batch, label_batch = sess.run([examples, labels])
sess.run(train_op , feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5})
if i % 1 == 0:
summary = sess.run(merged, feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 1, p_keep_hidden: 1})
trainwriter.add_summary(summary,i)
print(cost.eval(feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 1, p_keep_hidden: 1}, session = sess))
'''
loss = tf.abs(y-y_)
accuracy = tf.reduce_mean(loss)
print(cross_entropy.eval(feed_dict={x: example_batch, y_: label_batch}, session = sess))
'''
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
# When done, ask the threads to stop.
coord.request_stop()
# Wait for threads to finish.
coord.join(threads)
sess.close()
this is code for select batch size.
examples, labels = input_pipeline(128,1)
if I write batch size to bigger than one, It makes this error
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1020 try:
-> 1021 return fn(*args)
1022 except errors.OpError as e:
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
1002 feed_dict, fetch_list, target_list,
-> 1003 status, run_metadata)
1004
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\contextlib.py in __exit__(self, type, value, traceback)
65 try:
---> 66 next(self.gen)
67 except StopIteration:
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\errors_impl.py in raise_exception_on_not_ok_status()
468 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 469 pywrap_tensorflow.TF_GetCode(status))
470 finally:
InvalidArgumentError: Incompatible shapes: [128,1] vs. [256,1]
[[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-1-d05205b7cce1> in <module>()
1866 i = i + 1
1867 example_batch, label_batch = sess.run([examples, labels])
-> 1868 sess.run(train_op , feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5})
1869
1870 if i % 1 == 0:
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
764 try:
765 result = self._run(None, fetches, feed_dict, options_ptr,
--> 766 run_metadata_ptr)
767 if run_metadata:
768 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
962 if final_fetches or final_targets:
963 results = self._do_run(handle, final_targets, final_fetches,
--> 964 feed_dict_string, options, run_metadata)
965 else:
966 results = []
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1012 if handle is None:
1013 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1014 target_list, options, run_metadata)
1015 else:
1016 return self._do_call(_prun_fn, self._session, handle, feed_dict,
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1032 except KeyError:
1033 pass
-> 1034 raise type(e)(node_def, op, message)
1035
1036 def _extend_graph(self):
InvalidArgumentError: Incompatible shapes: [128,1] vs. [256,1]
[[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]]
Caused by op 'gradients/sub_grad/BroadcastGradientArgs', defined at:
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 184, in _run_module_as_main
"__main__", mod_spec)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\__main__.py", line 3, in <module>
app.launch_new_instance()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
app.start()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelapp.py", line 474, in start
ioloop.IOLoop.instance().start()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 390, in execute_request
user_expressions, allow_stdin)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\zmqshell.py", line 501, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2821, in run_ast_nodes
if self.run_code(code, result):
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-d05205b7cce1>", line 51, in <module>
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\training\optimizer.py", line 269, in minimize
grad_loss=grad_loss)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\training\optimizer.py", line 335, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 482, in gradients
in_grads = grad_fn(op, *out_grads)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\math_grad.py", line 594, in _SubGrad
rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 390, in _broadcast_gradient_args
name=name)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 759, in apply_op
op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1128, in __init__
self._traceback = _extract_stack()
...which was originally created as op 'sub', defined at:
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 184, in _run_module_as_main
"__main__", mod_spec)
[elided 18 identical lines from previous traceback]
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-d05205b7cce1>", line 48, in <module>
squared_deltas1 = tf.square(Y - py_x)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\math_ops.py", line 814, in binary_op_wrapper
return func(x, y, name=name)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 2758, in sub
result = _op_def_lib.apply_op("Sub", x=x, y=y, name=name)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 759, in apply_op
op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1128, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [128,1] vs. [256,1]
[[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]]
I want use batch function but in this case I can not use this.
how can I solve this problem?