TFBertForSequenceClassification for multi label classification - tensorflow2.0
I am trying to fine-tune a bert model for multi-label classification.
here is how my data looks like. I have put the entire code on this colab notebook
({'input_ids': <tf.Tensor: shape=(128,), dtype=int32, numpy=
array([ 2, 8318, 1379, 7892, 2791, 20630, 1, 4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0], dtype=int32)>,
'attention_mask': <tf.Tensor: shape=(128,), dtype=int32, numpy=
array([1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)>},
<tf.Tensor: shape=(7,), dtype=int64, numpy=array([1, 0, 0, 0, 0, 0, 0])>)
The first element is the id,
The second element corresponds to the attention_masks
the third one are the labels - here I have 7 lables.
First effort:
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
NUM_LABELS = 7
from transformers import TFBertForSequenceClassification, BertConfig
model = TFBertForSequenceClassification.from_pretrained(
MODEL_NAME_OR_PATH,
config=BertConfig.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS, problem_type="multi_label_classification")
)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
which ends up with the following error
InvalidArgumentError Traceback (most recent call last)
<ipython-input-48-4408a1f17fbe> in <module>()
10 loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
11 model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
---> 12 history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
13
14
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'Equal' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 577, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 606, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 556, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-48-4408a1f17fbe>", line 12, in <module>
history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1384, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1156, in train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 459, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 178, in update_state_fn
return ag_update_state(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 729, in update_state
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 4086, in sparse_categorical_accuracy
return tf.cast(tf.equal(y_true, y_pred), backend.floatx())
Node: 'Equal'
required broadcastable shapes
[[{{node Equal}}]] [Op:__inference_train_function_187978]
Second Effort inspired by this piece of code
from transformers import TFBertPreTrainedModel
from transformers import TFBertMainLayer
class TFBertForMultilabelClassification(TFBertPreTrainedModel):
def __init__(self, config, *inputs, **kwargs):
super(TFBertForMultilabelClassification, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, name='bert')
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(config.num_labels,
kernel_initializer='random_normal', #get_initializer(config.initializer_range),
name='classifier',
activation='sigmoid')
def call(self, inputs, **kwargs):
outputs = self.bert(inputs, **kwargs)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output, training=kwargs.get('training', False))
logits = self.classifier(pooled_output)
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
return outputs # logits, (hidden_states), (attentions)
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
NUM_LABELS = len(y_train[0])
model = TFBertForMultilabelClassification.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,epsilon=1e-08, clipnorm=1)
# we do not have one-hot vectors, we can use sparce categorical cross entropy and accuracy
loss = tf.keras.losses.BinaryCrossentropy()
metric = tf.keras.metrics.CategoricalAccuracy()
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
returns the following error
InvalidArgumentError Traceback (most recent call last)
<ipython-input-49-8aa1173bef76> in <module>()
4 metric = tf.keras.metrics.CategoricalAccuracy()
5 model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
----> 6 history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'Equal' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 577, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 606, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 556, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-49-8aa1173bef76>", line 6, in <module>
history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1384, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1156, in train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 459, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 178, in update_state_fn
return ag_update_state(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 729, in update_state
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 4086, in sparse_categorical_accuracy
return tf.cast(tf.equal(y_true, y_pred), backend.floatx())
Node: 'Equal'
required broadcastable shapes
[[{{node Equal}}]] [Op:__inference_train_function_214932]
I hope I believe given major changes both in tf2 and (TF-based) huggingface transformers
UPDATE
Here is the entire code with a dummy dataset; the whole thing is also available on this colab notebook
load the libraries
import os
import pandas as pd
import numpy as np
from transformers import TFBertPreTrainedModel
from transformers import TFBertMainLayer
from keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm
from transformers import BertTokenizer
import tensorflow as tf
make a dummy data
x_train = ['هان از وقتی که زفتم مدرسه',
'معاویه برادر شمر',
'وقتی که از پنجره سرشرو میاره بیرون دالی میکنه',
'هر دو سحرند این کجا و آن کجا']
y_train = [[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0]]
x_test, x_valid = x_train, x_train
y_test, y_valid = y_train, y_train
add the configs
# general config
MAX_LEN = 128
batch_size = 32
TRAIN_BATCH_SIZE = batch_size
VALID_BATCH_SIZE = batch_size
TEST_BATCH_SIZE = batch_size
EPOCHS = 3
EEVERY_EPOCH = 1000
LEARNING_RATE = 2e-5
CLIP = 0.0
make the data huggingface friendly
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME_OR_PATH)
MAX_LEN = 128
def tokenize_sentences(sentences, tokenizer, max_seq_len = 128):
tokenized_sentences = []
for sentence in tqdm(sentences):
tokenized_sentence = tokenizer.encode(
sentence, # Sentence to encode.
add_special_tokens = True, # Add '[CLS]' and '[SEP]'
max_length = max_seq_len, # Truncate all sentences.
)
tokenized_sentences.append(tokenized_sentence)
return tokenized_sentences
def create_attention_masks(tokenized_and_padded_sentences):
attention_masks = []
for sentence in tokenized_and_padded_sentences:
att_mask = [int(token_id > 0) for token_id in sentence]
attention_masks.append(att_mask)
return np.asarray(attention_masks)
train_ids = tokenize_sentences(x_train, tokenizer, max_seq_len = 128)
train_ids = pad_sequences(train_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
train_masks = create_attention_masks(train_ids)
valid_ids = tokenize_sentences(x_valid, tokenizer, max_seq_len = 128)
valid_ids = pad_sequences(valid_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
valid_masks = create_attention_masks(valid_ids)
test_ids = tokenize_sentences(x_test, tokenizer, max_seq_len = 128)
test_ids = pad_sequences(test_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
test_masks = create_attention_masks(test_ids)
create the datasets
def create_dataset(ids, masks, labels):
def gen():
for i in range(len(ids)):
yield (
{
"input_ids": ids[i],
"attention_mask": masks[i]
},
labels[i],
)
return tf.data.Dataset.from_generator(
gen,
({"input_ids": tf.int32, "attention_mask": tf.int32}, tf.int64),
(
{
"input_ids": tf.TensorShape([None]),
"attention_mask": tf.TensorShape([None])
},
tf.TensorShape([None]),
),
)
train_dataset = create_dataset(train_ids, train_masks, y_train)
valid_dataset = create_dataset(valid_ids, valid_masks, y_valid)
test_dataset = create_dataset(test_ids, test_masks, y_test)
that is how the data looks like
for item in train_dataset.take(1):
print(item)
Approach 1
class TFBertForMultilabelClassification(TFBertPreTrainedModel):
def __init__(self, config, *inputs, **kwargs):
super(TFBertForMultilabelClassification, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, name='bert')
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(config.num_labels,
kernel_initializer='random_normal', #get_initializer(config.initializer_range),
name='classifier',
activation='sigmoid')
def call(self, inputs, **kwargs):
outputs = self.bert(inputs, **kwargs)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output, training=kwargs.get('training', False))
logits = self.classifier(pooled_output)
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
return outputs # logits, (hidden_states), (attentions)
NUM_LABELS = len(y_train[0])
model = TFBertForMultilabelClassification.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,epsilon=1e-08, clipnorm=1)
# we do not have one-hot vectors, we can use sparce categorical cross entropy and accuracy
loss = tf.keras.losses.BinaryCrossentropy()
metric = tf.keras.metrics.CategoricalAccuracy()
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
with an error
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-36-8aa1173bef76> in <module>()
4 metric = tf.keras.metrics.CategoricalAccuracy()
5 model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
----> 6 history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in autograph_handler(*args, **kwargs)
1145 except Exception as e: # pylint:disable=broad-except
1146 if hasattr(e, "ag_error_metadata"):
-> 1147 raise e.ag_error_metadata.to_exception(e)
1148 else:
1149 raise
AttributeError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1145, in train_step
if list(y_pred.keys())[0] == "loss":
AttributeError: 'tuple' object has no attribute 'keys'
Approach 2:
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
NUM_LABELS = 7
from transformers import TFBertForSequenceClassification, BertConfig
model = TFBertForSequenceClassification.from_pretrained(
MODEL_NAME_OR_PATH,
config=BertConfig.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS, problem_type="multi_label_classification")
)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
and the error
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1151, in train_step
loss = self.compiled_loss(y, y_pred, sample_weight, regularization_losses=self.losses)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 141, in __call__
losses = call_fn(y_true, y_pred)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 245, in call
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 1863, in sparse_categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/keras/backend.py", line 5203, in sparse_categorical_crossentropy
labels=target, logits=output)
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
logits and labels must have the same first dimension, got logits shape [128,7] and labels shape [7]
[[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_67923]
Related
ValueError: Shapes (None, 128, 18) and (None, 128) are incompatible
I'm having a problem with the shapes of my input to Keras/Tensorflow. EDIT - I found that when I just specify 'accuracy' for my metrics it works fine and I can train my model, however when I do it the other way by adding precision, recall it fails with the error. My Model summary is like this Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding (Embedding) (None, 128, 64) 2251520 _________________________________________________________________ bidirectional (Bidirectional (None, 128, 128) 66048 _________________________________________________________________ time_distributed (TimeDistri (None, 128, 18) 2322 ================================================================= Total params: 2,319,890 Trainable params: 2,319,890 Non-trainable params: 0 I'm doing NER and padded my sentences to 128 words. My Code is as follows and dataset is from here import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from keras.preprocessing.sequence import pad_sequences from sklearn.model_selection import train_test_split from sklearn.model_selection import StratifiedKFold, KFold import tensorflow as tf from tensorflow.keras import layers from tensorflow.keras import models physical_devices = tf.config.list_physical_devices() tf.config.experimental.set_memory_growth(physical_devices[1], enable=True) df = pd.read_csv("ner_dataset.csv", encoding="latin1") # fill NaN - propogate non null values forward df = df.fillna(method="ffill") sent_count = len(set(df["Sentence #"].values)) print(sent_count) words = list(set(df["Word"].values)) words_count = len(words) print(words_count) word2idx = {} # add the padding and unknown token word2idx["PAD_TOKEN"] = len(word2idx) word2idx["UNK_TOKEN"] = len(word2idx) # add the rest for i in range(0, len(words)): word2idx[words[i]] = len(word2idx) # index to word mapping idx2word = {i: w for w, i in word2idx.items()} # number of unique tags tags = list(set(df["Tag"].values)) tags_count = len(tags) print(tags_count) tag2idx = {} tag2idx['PAD_TAG'] = 0 # this is the tag that is assigned to the pad tokens 'PAD_TOKEN' for i in range(0, len(tags)): tag2idx[tags[i]] = len(tag2idx) # index to tag mapping idx2tag = {i: w for w, i in tag2idx.items()} def getSentences(dataframe): sentences = [] groups = dataframe.groupby("Sentence #") for name, group in groups: zipped = zip(group["Word"], group["Tag"]) sentences.append(list(zipped)) return sentences sents = getSentences(df) len(sents[0]), len(sents[1]) # sentences are of different lengths max_len = 128 y = [[tag2idx[word[1]] for word in s] for s in sents] y = pad_sequences(maxlen=max_len, sequences=y, value=tag2idx["PAD_TAG"], padding='post', truncating='post') x = [[word2idx[word[0]] for word in s] for s in sents] x = pad_sequences(maxlen=max_len, sequences=x, value=word2idx["PAD_TOKEN"], padding='post', truncating='post') x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0) vocab_size = len(word2idx) vocab_size batch_size = 32 model = models.Sequential() embeddinglayer = layers.Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len) model.add(embeddinglayer) bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=True)) model.add(bilstm) num_tags = len(tag2idx) timedist = layers.TimeDistributed(layers.Dense(num_tags, activation="softmax")) model.add(timedist) model.summary() METRICS = [ 'accuracy', tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall') ] model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=METRICS) history = model.fit(x_train, np.array(y_train), batch_size=batch_size, epochs=25, validation_data=(x_test, y_test), verbose=1) The types after splitting are all np array type(x_train), type(x_test), type(y_train), type(y_test) (numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray) And the shapes are ((38367, 128), (9592, 128), (38367, 128), (9592, 128)) Each training item (sentence) is an array of word index of length 128, for example the x_train[0] looks like the array below (the y values are similar (same length 128) except they are index of the tags/labels for each word.) array([25653, 1878, 26510, 12653, 33524, 15752, 30488, 14594, 33943, 3656, 22478, 596, 13235, 10080, 16432, 18190, 20273, 10254, 34463, 15526, 24899, 4359, 30488, 10525, 19165, 30439, 16205, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32) EDIT: Error Below Epoch 1/25 --------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-1-35a0fcfc79ab> in <module> 111 112 --> 113 history = model.fit(x_train, np.array(y_train), batch_size=batch_size, epochs=25, validation_data=(x_test, y_test), verbose=1) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing) 1098 _r=1): 1099 callbacks.on_train_batch_begin(step) -> 1100 tmp_logs = self.train_function(iterator) 1101 if data_handler.should_sync: 1102 context.async_wait() c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds) 826 tracing_count = self.experimental_get_tracing_count() 827 with trace.Trace(self._name) as tm: --> 828 result = self._call(*args, **kwds) 829 compiler = "xla" if self._experimental_compile else "nonXla" 830 new_tracing_count = self.experimental_get_tracing_count() c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds) 869 # This is the first call of __call__, so we have to initialize. 870 initializers = [] --> 871 self._initialize(args, kwds, add_initializers_to=initializers) 872 finally: 873 # At this point we know that the initialization is complete (or less c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to) 724 self._concrete_stateful_fn = ( 725 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access --> 726 *args, **kwds)) 727 728 def invalid_creator_scope(*unused_args, **unused_kwds): c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs) 2967 args, kwargs = None, None 2968 with self._lock: -> 2969 graph_function, _ = self._maybe_define_function(args, kwargs) 2970 return graph_function 2971 c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs) 3359 3360 self._function_cache.missed.add(call_context_key) -> 3361 graph_function = self._create_graph_function(args, kwargs) 3362 self._function_cache.primary[cache_key] = graph_function 3363 c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes) 3204 arg_names=arg_names, 3205 override_flat_arg_shapes=override_flat_arg_shapes, -> 3206 capture_by_value=self._capture_by_value), 3207 self._function_attributes, 3208 function_spec=self.function_spec, c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 988 _, original_func = tf_decorator.unwrap(python_func) 989 --> 990 func_outputs = python_func(*func_args, **func_kwargs) 991 992 # invariant: `func_outputs` contains only Tensors, CompositeTensors, c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds) 632 xla_context.Exit() 633 else: --> 634 out = weak_wrapped_fn().__wrapped__(*args, **kwds) 635 return out 636 c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs) 975 except Exception as e: # pylint:disable=broad-except 976 if hasattr(e, "ag_error_metadata"): --> 977 raise e.ag_error_metadata.to_exception(e) 978 else: 979 raise ValueError: in user code: c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function * return step_function(self, iterator) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function ** outputs = model.distribute_strategy.run(run_step, args=(data,)) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica return self._call_for_each_replica(fn, args, kwargs) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica return fn(*args, **kwargs) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:788 run_step ** outputs = model.train_step(data) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:758 train_step self.compiled_metrics.update_state(y, y_pred, sample_weight) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:408 update_state metric_obj.update_state(y_t, y_p, sample_weight=mask) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\utils\metrics_utils.py:90 decorated update_op = update_state_fn(*args, **kwargs) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\metrics.py:177 update_state_fn return ag_update_state(*args, **kwargs) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\metrics.py:1291 update_state ** sample_weight=sample_weight) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\utils\metrics_utils.py:354 update_confusion_matrix_variables y_pred.shape.assert_is_compatible_with(y_true.shape) c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1134 assert_is_compatible_with raise ValueError("Shapes %s and %s are incompatible" % (self, other)) ValueError: Shapes (None, 128, 18) and (None, 128) are incompatible
I think that your x, y arguments to model.fit() are inconsistent: x is a list, y -s a numpy array. Try: history = model.fit(np_array(x_train), np.array(y_train))
Estimator.predict() has Shape Issues?
I can train and evalaute a Tensorflow Estimator model without any problems. When I do prediction, this error arises: InvalidArgumentError (see above for traceback): output_shape has incorrect number of elements: 68 should be: 2 [[Node: output = SparseToDense[T=DT_INT32, Tindices=DT_INT32, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ToInt32, ToInt32_1, ToInt32_2, bidirectional_rnn/bidirectional_rnn/fw/fw/time)]] All of the model functions use the same architecture: def _train_model_fn(features, labels, mode, params): features = _network_fn(features, mode, params) outputs = _get_output(features, params["output_layer"], params["num_classes"]) predictions = { "outputs": outputs } ... # loss initialization and whatnot def _eval_model_fn(features, labels, mode, params): features = _network_fn(features, mode, params) outputs = _get_output(features, params["output_layer"], params["num_classes"]) predictions = { "outputs": outputs } ... # loss initialization and whatnot def _predict_model_fn(features, mode, params): features = _network_fn(features, mode, params) outputs = _get_output(features, params["output_layer"], params["num_classes"]) predictions = { "outputs": outputs } ... Here's the predict code: def predict(params, features, checkpoint_dir): estimator = tf.estimator.Estimator(model_fn=_predict_model_fn, params=params, model_dir=checkpoint_dir) predictions = estimator.predict(input_fn=_input_fn(features)) for i, p in enumerate(predictions): print(i, p) I also checked the shapes given every time the input passes a layer when training, and the same thing for predicting. They give the same shapes: Training: conv2d [1, 358, 358, 16] max_pool2d [1, 179, 179, 16] collapse_to_rnn_dims [1, 179, 2864] birnn [1, 179, 64] Prediction: conv2d [1, 358, 358, 16] max_pool2d [1, 179, 179, 16] collapse_to_rnn_dims [1, 179, 2864] birnn [1, 179, 64] Here are the SparseTensors I passed to sparse_to_dense: Training: SparseTensor(indices=Tensor("CTCBeamSearchDecoder:0", shape=(?, 2), dtype=int64), values=Tensor("CTCBeamSearchDecoder:1", shape=(?,), dtype=int64), dense_shape=Tensor("CTCBeamSearchDecoder:2", shape=(2,), dtype=int64)) Evaluation: SparseTensor(indices=Tensor("CTCBeamSearchDecoder:0", shape=(?, 2), dtype=int64), values=Tensor("CTCBeamSearchDecoder:1", shape=(?,), dtype=int64), dense_shape=Tensor("CTCBeamSearchDecoder:2", shape=(2,), dtype=int64)) Prediction: SparseTensor(indices=Tensor("CTCBeamSearchDecoder:0", shape=(?, 2), dtype=int64), values=Tensor("CTCBeamSearchDecoder:1", shape=(?,), dtype=int64), dense_shape=Tensor("CTCBeamSearchDecoder:2", shape=(2,), dtype=int64)) Which are all pretty much the same. Any reason why this is happening? Shouldn't the _predict_model_fn work given that it follows the same architecture as that of the other model_fns? Here's the full stacktrace: INFO:tensorflow:Using default config. INFO:tensorflow:Using config: {'_log_step_count_steps': 100, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_service': None, '_save_summary_steps': 100, '_model_dir': 'checkpoint\\model-20180419-150303', '_task_id': 0, '_evaluation_master': '', '_tf_random_seed': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000091F58B3080>, '_num_ps_replicas': 0, '_master': '', '_save_checkpoints_secs': 600, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_global_id_in_cluster': 0, '_num_worker_replicas': 1} INFO:tensorflow:Calling model_fn. INFO:tensorflow:Done calling model_fn. INFO:tensorflow:Graph was finalized. INFO:tensorflow:Restoring parameters from checkpoint\model-20180419-150303\model.ckpt-1 INFO:tensorflow:Running local_init_op. INFO:tensorflow:Done running local_init_op. Process Process-2: Traceback (most recent call last): File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1361, in _do_call return fn(*args) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1340, in _run_fn target_list, status, run_metadata) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 516, in __exit__ c_api.TF_GetCode(self.status.status)) tensorflow.python.framework.errors_impl.InvalidArgumentError: output_shape has incorrect number of elements: 68 should be: 2 [[Node: output = SparseToDense[T=DT_INT32, Tindices=DT_INT32, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ToInt32, ToInt32_1, ToInt32_2, bidirectional_rnn/bidirectional_rnn/fw/fw/time)]] During handling of the above exception, another exception occurred: Traceback (most recent call last): File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\process.py", line 249, in _bootstrap self.run() File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\process.py", line 93, in run self._target(*self._args, **self._kwargs) File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\train_ocr.py", line 42, in evaluate_model evaluate(architecture_params, images, labels, checkpoint_dir) File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 82, in evaluate predict(params, features, checkpoint_dir) File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 90, in predict for i, p in enumerate(predictions): File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 492, in predict preds_evaluated = mon_sess.run(predictions) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 546, in run run_metadata=run_metadata) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1022, in run run_metadata=run_metadata) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1113, in run raise six.reraise(*original_exc_info) File "C:\Users\asus.11\Anaconda3\lib\site-packages\six.py", line 693, in reraise raise value File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1098, in run return self._sess.run(*args, **kwargs) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1170, in run run_metadata=run_metadata) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 950, in run return self._sess.run(*args, **kwargs) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 905, in run run_metadata_ptr) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1137, in _run feed_dict_tensor, options, run_metadata) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1355, in _do_run options, run_metadata) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1374, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.InvalidArgumentError: output_shape has incorrect number of elements: 68 should be: 2 [[Node: output = SparseToDense[T=DT_INT32, Tindices=DT_INT32, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ToInt32, ToInt32_1, ToInt32_2, bidirectional_rnn/bidirectional_rnn/fw/fw/time)]] Caused by op 'output', defined at: File "<string>", line 1, in <module> File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\spawn.py", line 106, in spawn_main exitcode = _main(fd) File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\spawn.py", line 119, in _main return self._bootstrap() File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\process.py", line 249, in _bootstrap self.run() File "C:\Users\asus.11\Anaconda3\lib\multiprocessing\process.py", line 93, in run self._target(*self._args, **self._kwargs) File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\train_ocr.py", line 42, in evaluate_model evaluate(architecture_params, images, labels, checkpoint_dir) File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 82, in evaluate predict(params, features, checkpoint_dir) File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 90, in predict for i, p in enumerate(predictions): File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 479, in predict features, None, model_fn_lib.ModeKeys.PREDICT, self.config) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 793, in _call_model_fn model_fn_results = self._model_fn(features=features, **kwargs) File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 217, in _predict_model_fn outputs = _get_output(features, params["output_layer"], params["num_classes"]) File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 134, in _get_output return _sparse_to_dense(decoded, name="output") File "C:\Users\asus.11\Documents\Optimized_OCR\trainer\backend\tf\experiment_ops.py", line 38, in _sparse_to_dense name=name) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\ops\sparse_ops.py", line 791, in sparse_to_dense name=name) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_sparse_ops.py", line 2401, in _sparse_to_dense name=name) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper op_def=op_def) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3271, in create_op op_def=op_def) File "C:\Users\asus.11\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1650, in __init__ self._traceback = self._graph._extract_stack() # pylint: disable=protected-access InvalidArgumentError (see above for traceback): output_shape has incorrect number of elements: 68 should be: 2 [[Node: output = SparseToDense[T=DT_INT32, Tindices=DT_INT32, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ToInt32, ToInt32_1, ToInt32_2, bidirectional_rnn/bidirectional_rnn/fw/fw/time)]] Update I tried using the same architecture in a different training run, I encountered a different shap error: InvalidArgumentError (see above for traceback): output_shape has incorrect number of elements: 69 should be: 2 [[Node: output = SparseToDense[T=DT_INT32, Tindices=DT_INT32, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ToInt32, ToInt32_1, ToInt32_2, bidirectional_rnn/bidirectional_rnn/fw/fw/time)]] Apparently, the problem seems to lie in the ctc_beam_search_decoder. Switching to ctc_greedy_decoder doesn't help either. Why is it doing this? More updates I have uploaded the reproducible example: https://github.com/selcouthlyBlue/ShapeErrorReproduce
I have finally figured out the error. The problem actually lies in the way I used sparse_to_dense. Apparently, the order I gave is wrong where the values came first before the shape: return tf.sparse_to_dense(tf.to_int32(decoded[0].indices), tf.to_int32(decoded[0].values), tf.to_int32(decoded[0].dense_shape), name="output") The order should be (shape comes first before values): return tf.sparse_to_dense(tf.to_int32(decoded[0].indices), tf.to_int32(decoded[0].dense_shape), tf.to_int32(decoded[0].values), name="output")
tensorflow tf.cond does not execute true_fn or false_fn for tf.reduce_mean
I am trying to condition the output of the loss function tf.reduce_mean so as to avoid NaN errors. My code is: limit=[] for i in xrange(12): limit.append(10000.0) limit = tf.constant(limit) predictions["loss"] =tf.cond(tf.reduce_mean( (prediction - transformed_values) ** 2, axis=-1) < limit, lambda:tf.reduce_mean( (prediction - transformed_values) ** 2, axis=-1), lambda:tf.reduce_mean( (prediction - transformed_values), axis=-1)). However, I get the error INFO:tensorflow:Using default config. WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpfnvr6j INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7eaa5bd750>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '/tmp/tmpfnvr6j', '_save_summary_steps': 100} shape: pred (12,) true_t (12,) false_t (12,) Traceback (most recent call last): File "/home/paul/workspace/workspace/Master/Elec_Price_Prediction/Time_Series.py", line 302, in <module> obtain_prediction() File "/home/paul/workspace/workspace/Master/Elec_Price_Prediction/Time_Series.py", line 212, in obtain_prediction estimator.train(input_fn=train_input_fn, steps=10000) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 302, in train loss = self._train_model(input_fn, hooks, saving_listeners) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 711, in _train_model features, labels, model_fn_lib.ModeKeys.TRAIN, self.config) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 694, in _call_model_fn model_fn_results = self._model_fn(features=features, **kwargs) File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/head.py", line 201, in create_estimator_spec return self._train_ops(features) File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/head.py", line 60, in _train_ops estimator_lib.ModeKeys.TRAIN) File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/state_management.py", line 67, in define_loss return model.define_loss(features, mode) File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 196, in define_loss return self.get_batch_loss(features=features, mode=mode, state=start_state) File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 509, in get_batch_loss features, mode, state) File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 609, in per_step_batch_loss outputs=["loss"] + self._train_output_names) File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 775, in _state_update_loop loop_vars=initial_loop_arguments) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop pred, body, original_loop_vars, loop_vars, shape_invariants) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop body_result = body(*packed_vars_for_body) File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 726, in _state_update_step state=state) File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/timeseries/python/timeseries/model.py", line 605, in _batch_loss_filtering_step predictions=predictions) File "/home/paul/workspace/workspace/Master/Elec_Price_Prediction/Time_Series.py", line 105, in _filtering_step prediction=tf.cond(pred,lambda:true_t,lambda:false_t) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/deprecation.py", line 316, in new_func return func(*args, **kwargs) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1844, in cond p_2, p_1 = switch(pred, pred) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 305, in switch return gen_control_flow_ops._switch(data, pred, name=name) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_control_flow_ops.py", line 562, in _switch "Switch", data=data, pred=pred, name=name) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper op_def=op_def) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2958, in create_op set_shapes_for_outputs(ret) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2209, in set_shapes_for_outputs shapes = shape_func(op) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2159, in call_with_requiring return call_cpp_shape_fn(op, require_shape_fn=True) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/common_shapes.py", line 627, in call_cpp_shape_fn require_shape_fn) File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/common_shapes.py", line 691, in _call_cpp_shape_fn_impl raise ValueError(err.message) ValueError: Shape must be rank 0 but is rank 1 for 'head/model/while/state_update_step/cond/Switch' (op: 'Switch') with input shapes: [12], [12]. My question would be why this is impossible and how to work around it. I tried checking if pred and true_fn as well as false_fn have the same shape and they do.
I prefer tf.where. How about using tf.where?
Adding dropout (tf.nn.dropout) results in Nan
Being a beginner to tensorflow and CNN I'm working on emotion recognition to understand these. The following code works when dropout layer is removed, however results in Nan when added. I've googled around and came across solutions such as reducing learning rate etc. None has worked for me. The net : def cnn(self, data): conv = tf.nn.conv2d(data, self.w_1, [1, 1, 1, 1], padding='SAME') hidden = tf.nn.relu(conv + self.b_1) pool = tf.nn.max_pool(hidden, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') norm = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75) conv = tf.nn.conv2d(norm, self.w_2, [1, 1, 1, 1], padding='SAME') hidden = tf.nn.relu(conv + self.b_2) pool = tf.nn.max_pool(hidden, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') norm = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75) list_shape = norm.get_shape().as_list() reshape = tf.reshape(pool, [list_shape[0], list_shape[1] * list_shape[2] * list_shape[3]]) hidden = tf.nn.relu(tf.matmul(reshape, self.w_3) + self.b_3) hidden = tf.nn.relu(tf.matmul(hidden, self.w_4) + self.b_4) dropout = tf.nn.dropout(hidden, self.dropout_prob) return tf.matmul(dropout, self.w_5) + self.b_5 The model: self.tf_x = tf.placeholder(tf.float32, shape=(self.batch_size, self.image_size, self.image_size, 1)) self.tf_y = tf.placeholder(tf.float32, shape=(self.batch_size, self.num_labels)) self.dropout_prob = tf.placeholder(tf.float32) self.w_1 = tf.Variable(tf.truncated_normal([5, 5, 1, 64], stddev=0.1)) self.b_1 = tf.Variable(tf.zeros([64])) self.w_2 = tf.Variable(tf.truncated_normal([9, 9, 64, 128], stddev=0.04)) self.b_2 = tf.Variable(tf.constant(1.0, shape=[128])) self.w_3 = tf.Variable(tf.truncated_normal([self.image_size//4 * self.image_size//4 * 128, 392], stddev=0.1)) self.b_3 = tf.Variable(tf.constant(1.0, shape=(392,))) self.w_4 = tf.Variable(tf.truncated_normal([392, 196], stddev=0.1)) self.b_4 = tf.Variable(tf.constant(1.0, shape=(196,))) self.w_5 = tf.Variable(tf.truncated_normal([196, self.num_labels], stddev=0.04)) self.b_5 = tf.Variable(tf.constant(1.0, shape=[self.num_labels])) self.logits = self.cnn(self.tf_x) self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.tf_y, logits=self.logits)) self.optimizer = tf.train.AdamOptimizer(1e-6).minimize(self.loss) self.train_pred = tf.nn.softmax(self.logits) tf.summary.histogram('weights_1', self.w_1) tf.summary.histogram('weights_2', self.w_2) tf.summary.histogram('weights_3', self.w_3) tf.summary.histogram('weights_4', self.w_4) tf.summary.scalar('loss', self.loss) self.merged = tf.summary.merge_all() The error: Traceback (most recent call last): File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 75, in <module> main() File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 64, in main emotion_cnn.train_test_validate() File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\Emotion.py", line 127, in train_test_validate _,summary, l1, predictions1 = self.session.run([self.optimizer, self.merged, self.loss, self.train_pred], feed_dict=feed_dict1) File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 767, in run run_metadata_ptr) File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 965, in _run feed_dict_string, options, run_metadata) File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 1015, in _do_run target_list, options, run_metadata) File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 1035, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.InvalidArgumentError: Nan in summary histogram for: weights_1 [[Node: weights_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](weights_1/tag, Variable/read/_81)]] Caused by op 'weights_1', defined at: File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 75, in <module> main() File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 64, in main emotion_cnn.train_test_validate() File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\Emotion.py", line 104, in train_test_validate self.model() File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\Emotion.py", line 82, in model tf.summary.histogram('weights_1', self.w_1) File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\summary\summary.py", line 203, in histogram tag=scope.rstrip('/'), values=values, name=scope) File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\ops\gen_logging_ops.py", line 139, in _histogram_summary name=name) File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 763, in apply_op op_def=op_def) File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\framework\ops.py", line 2327, in create_op original_op=self._default_original_op, op_def=op_def) File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\framework\ops.py", line 1226, in __init__ self._traceback = _extract_stack() InvalidArgumentError (see above for traceback): Nan in summary histogram for: weights_1 [[Node: weights_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](weights_1/tag, Variable/read/_81)]] E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_gpu_executor.cc:637] Deallocating stream with pending work
Tensorflow CNN Batch size error
I had made CNN model for my dataset. I had used batch for feed data. when I used batch size is one, It is working. but if I used batch size is not one (ex :128) it make error. this is my code. I attach all my code. there is 1623 columns data. import tensorflow as tf import numpy as np def init_weights(shape): return tf.Variable(tf.random_normal(shape, stddev=0.01)) def model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden): l1a = tf.nn.relu(tf.nn.conv2d(X, w, strides=[1, 1, 1, 1], padding='SAME')) # l1a shape=(?, 24, 60, 32) l1 = tf.nn.avg_pool(l1a, ksize=[1, 4, 4, 1],strides=[1, 2, 2, 1], padding='SAME')# l1 shape=(?, 6, 30, 32) l1 = tf.nn.dropout(l1, p_keep_conv) l2a = tf.nn.relu(tf.nn.conv2d(l1, w2, strides=[1, 1, 1, 1], padding='SAME')) # l2a shape=(?, 6, 30, 64) l2 = tf.nn.avg_pool(l2a, ksize=[1, 2, 3, 1], strides=[1, 2, 3, 1], padding='SAME') # l2 shape=(?, 3, 10, 64) l2 = tf.nn.dropout(l2, p_keep_conv) l3a = tf.nn.relu(tf.nn.conv2d(l2, w3, strides=[1, 1, 1, 1], padding='SAME')) # l3a shape=(?, 3, 10, 128) l3 = tf.nn.max_pool(l3a, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='SAME') # l3 shape=(?, 3, 5, 128) l3 = tf.reshape(l3, [-1, w4.get_shape().as_list()[0]]) # reshape to (?, 1920) l3 = tf.nn.dropout(l3, p_keep_conv) l4 = tf.nn.relu(tf.matmul(l3, w4)) l4 = tf.nn.dropout(l4, p_keep_hidden) pyx = tf.matmul(l4, w_o) return pyx X = tf.placeholder(tf.float32, [None, 24,60,1]) Y = tf.placeholder(tf.float32, [None, 1]) w = init_weights([4, 4, 1, 32]) # 4x4x1 conv, 32 outputs w2 = init_weights([2, 3, 32, 64]) # 2x3x32 conv, 64 outputs w3 = init_weights([1, 2, 64, 128]) # 1x2x64 conv, 128 outputs w4 = init_weights([128 * 5 * 3, 625]) # FC 128 * 5 * 3 inputs, 625 outputs w_o = init_weights([625, 1]) # FC 625 inputs, 1 outputs (labels) #B = tf.Variable(tf.random_normal([625])) print ("W shape:", w.get_shape()) print ("W2 shape:", w2.get_shape()) print ("W3 shape:", w3.get_shape()) print ("W4 shape:", w4.get_shape()) print ("Wo shape:", w_o.get_shape()) p_keep_conv = tf.placeholder("float") p_keep_hidden = tf.placeholder("float") py_x = model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden) squared_deltas1 = tf.square(Y - py_x) squared_deltas = tf.sqrt(squared_deltas1) cost = tf.reduce_mean(squared_deltas) train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost) cost_sum = tf.summary.scalar("cost",cost) def read_my_file_format(filename_queue): reader = tf.TextLineReader(skip_header_lines=1) _, value = reader.read(filename_queue) record_defaults = [[1],[1],[1],.........[1],[1],[1]] #1623 record_defaults = [tf.constant([1], dtype=tf.float32), tf.constant([1], dtype=tf.float32), .................. tf.constant([1], dtype=tf.float32), tf.constant([1], dtype=tf.float32), ] Col1,Col2,Col3,......,Col1621,Col1622,Col1623=tf.decode_csv(value, record_defaults=record_defaults) features = tf.pack([Col4,Col5,Col6, ....... Col1618,Col1619,Col1620]) label = tf.pack([Col29]) return features, label def input_pipeline(batch_size, num_epochs): min_after_dequeue = 10000 capacity = min_after_dequeue + 3 * batch_size ''' filename_queue = tf.train.string_input_producer(["G:\CNN\1999.csv","G:\CNN\2000.csv","G:\CNN\2001.csv","G:\CNN\2002.csv", "G:\CNN\2003.csv","G:\CNN\2004.csv","G:\CNN\2005.csv","G:\CNN\2006.csv", "G:\CNN\2007.csv","G:\CNN\2008.csv"], num_epochs=num_epochs, shuffle=True) ''' filename_queue = tf.train.string_input_producer(["test_1000.csv"], num_epochs=num_epochs, shuffle=True) example, label = read_my_file_format(filename_queue) example_batch, label_batch = tf.train.shuffle_batch([example, label], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue) return example_batch, label_batch examples, labels = input_pipeline(128,1) print (examples) examples = tf.reshape(examples, [-1,24,60,1]) print (examples) #examples = examples.reshape(-1, 24, 60, 1) # 28x28x1 input img i = 0 init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.Session() merged = tf.summary.merge_all() trainwriter =tf.summary.FileWriter("./board/custom", sess.graph) sess.run(init_op) print(w.eval(session = sess)) print(w2.eval(session = sess)) print(w3.eval(session = sess)) print(w4.eval(session = sess)) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): i = i + 1 example_batch, label_batch = sess.run([examples, labels]) sess.run(train_op , feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5}) if i % 1 == 0: summary = sess.run(merged, feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 1, p_keep_hidden: 1}) trainwriter.add_summary(summary,i) print(cost.eval(feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 1, p_keep_hidden: 1}, session = sess)) ''' loss = tf.abs(y-y_) accuracy = tf.reduce_mean(loss) print(cross_entropy.eval(feed_dict={x: example_batch, y_: label_batch}, session = sess)) ''' except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join(threads) sess.close() this is code for select batch size. examples, labels = input_pipeline(128,1) if I write batch size to bigger than one, It makes this error --------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call last) C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args) 1020 try: -> 1021 return fn(*args) 1022 except errors.OpError as e: C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata) 1002 feed_dict, fetch_list, target_list, -> 1003 status, run_metadata) 1004 C:\Program Files\Anaconda3\envs\tensorflow_env\lib\contextlib.py in __exit__(self, type, value, traceback) 65 try: ---> 66 next(self.gen) 67 except StopIteration: C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\errors_impl.py in raise_exception_on_not_ok_status() 468 compat.as_text(pywrap_tensorflow.TF_Message(status)), --> 469 pywrap_tensorflow.TF_GetCode(status)) 470 finally: InvalidArgumentError: Incompatible shapes: [128,1] vs. [256,1] [[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]] During handling of the above exception, another exception occurred: InvalidArgumentError Traceback (most recent call last) <ipython-input-1-d05205b7cce1> in <module>() 1866 i = i + 1 1867 example_batch, label_batch = sess.run([examples, labels]) -> 1868 sess.run(train_op , feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5}) 1869 1870 if i % 1 == 0: C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata) 764 try: 765 result = self._run(None, fetches, feed_dict, options_ptr, --> 766 run_metadata_ptr) 767 if run_metadata: 768 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr) C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata) 962 if final_fetches or final_targets: 963 results = self._do_run(handle, final_targets, final_fetches, --> 964 feed_dict_string, options, run_metadata) 965 else: 966 results = [] C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata) 1012 if handle is None: 1013 return self._do_call(_run_fn, self._session, feed_dict, fetch_list, -> 1014 target_list, options, run_metadata) 1015 else: 1016 return self._do_call(_prun_fn, self._session, handle, feed_dict, C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args) 1032 except KeyError: 1033 pass -> 1034 raise type(e)(node_def, op, message) 1035 1036 def _extend_graph(self): InvalidArgumentError: Incompatible shapes: [128,1] vs. [256,1] [[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]] Caused by op 'gradients/sub_grad/BroadcastGradientArgs', defined at: File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 184, in _run_module_as_main "__main__", mod_spec) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 85, in _run_code exec(code, run_globals) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\__main__.py", line 3, in <module> app.launch_new_instance() File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance app.start() File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelapp.py", line 474, in start ioloop.IOLoop.instance().start() File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start super(ZMQIOLoop, self).start() File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\ioloop.py", line 887, in start handler_func(fd_obj, events) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper return fn(*args, **kwargs) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events self._handle_recv() File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv self._run_callback(callback, msg) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback callback(*args, **kwargs) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper return fn(*args, **kwargs) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 276, in dispatcher return self.dispatch_shell(stream, msg) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 228, in dispatch_shell handler(stream, idents, msg) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 390, in execute_request user_expressions, allow_stdin) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute res = shell.run_cell(code, store_history=store_history, silent=silent) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\zmqshell.py", line 501, in run_cell return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell interactivity=interactivity, compiler=compiler, result=result) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2821, in run_ast_nodes if self.run_code(code, result): File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-1-d05205b7cce1>", line 51, in <module> train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\training\optimizer.py", line 269, in minimize grad_loss=grad_loss) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\training\optimizer.py", line 335, in compute_gradients colocate_gradients_with_ops=colocate_gradients_with_ops) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 482, in gradients in_grads = grad_fn(op, *out_grads) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\math_grad.py", line 594, in _SubGrad rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 390, in _broadcast_gradient_args name=name) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 759, in apply_op op_def=op_def) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 2240, in create_op original_op=self._default_original_op, op_def=op_def) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1128, in __init__ self._traceback = _extract_stack() ...which was originally created as op 'sub', defined at: File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 184, in _run_module_as_main "__main__", mod_spec) [elided 18 identical lines from previous traceback] File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-1-d05205b7cce1>", line 48, in <module> squared_deltas1 = tf.square(Y - py_x) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\math_ops.py", line 814, in binary_op_wrapper return func(x, y, name=name) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 2758, in sub result = _op_def_lib.apply_op("Sub", x=x, y=y, name=name) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 759, in apply_op op_def=op_def) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 2240, in create_op original_op=self._default_original_op, op_def=op_def) File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1128, in __init__ self._traceback = _extract_stack() InvalidArgumentError (see above for traceback): Incompatible shapes: [128,1] vs. [256,1] [[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]] I want use batch function but in this case I can not use this. how can I solve this problem?