Print Bert model summary using Pytorch - input

Hi I would like to print the model summary of my BERT model for text classification. I am using command print(summary(model, inputsize=(channels, height, width)). I would like to know what would be the dimensions of input_size in case of text classification?
I have use print(model) as well but the output is confusing and I want to see the output in the layered form.
Below is my model summary.
BertClassifier(
(bert): BertModel(
(embeddings): BertEmbeddings(
(word_embeddings): Embedding(28996, 768, padding_idx=0)
(position_embeddings): Embedding(512, 768)
(token_type_embeddings): Embedding(2, 768)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): BertEncoder(
(layer): ModuleList(
(0): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(1): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(2): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(3): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(4): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(5): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(6): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(7): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(8): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(9): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(10): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(11): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
(pooler): BertPooler(
(dense): Linear(in_features=768, out_features=768, bias=True)
(activation): Tanh()
)
)
(dropout): Dropout(p=0.5, inplace=False)
(linear1): Linear(in_features=768, out_features=256, bias=True)
(linear2): Linear(in_features=256, out_features=141, bias=True)
(relu): ReLU()
)

I used torch-summary module-
pip install torch-summary
summary(model,input_size=(768,),depth=1,batch_dim=1, dtypes=[‘torch.IntTensor’])

minor supplement to #Flash, to import summary library
from torchsummary import summary

Related

Evaluate ROC AUC for Keras sequential multiclass model

I want to evaluate the ROC AUC for my multiclass sequential Keras model using the multiclass_roc_auc_score function. My code raised ValueError: Shapes (None, 1) and (None, 4) are incompatible.
I want to perform multiclass classification:
class MulticlassTruePositives(tf.keras.metrics.Metric):
def __init__(self, name='multiclass_true_positives', **kwargs):
super(MulticlassTruePositives, self).__init__(name=name, **kwargs)
self.true_positives = self.add_weight(name='tp', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_pred = tf.reshape(tf.argmax(y_pred, axis=1), shape=(-1, 1))
values = tf.cast(y_true, 'int32') == tf.cast(y_pred, 'int32')
values = tf.cast(values, 'float32')
if sample_weight is not None:
sample_weight = tf.cast(sample_weight, 'float32')
values = tf.multiply(values, sample_weight)
self.true_positives.assign_add(tf.reduce_sum(values))
def result(self):
return self.true_positives
def reset_states(self):
# The state of the metric will be reset at the start of each epoch.
self.true_positives.assign(0.)
I compile the Keras model with the metrics:
# Report the AUC of a model outputting a probability.
hypermodel.compile(optimizer='sgd',
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=[tf.keras.metrics.AUC(), MulticlassTruePositives()])
I implement Keras callback that plots the ROC curve and Confusion Matrix to a folder:
class PerformanceVisualizationCallback(Callback):
def __init__(self, model, test_data, image_dir):
super().__init__()
self.model = model
self.test_data = test_data
os.makedirs(image_dir, exist_ok=True)
self.image_dir = image_dir
def on_epoch_end(self, epoch, logs={}):
y_pred = np.asarray(self.model.predict(self.test_data[0]))
y_true = self.test_data[1]
y_pred_class = np.argmax(y_pred, axis=1)
# plot and save confusion matrix
fig, ax = plt.subplots(figsize=(16,12))
plot_confusion_matrix(y_true, y_pred_class, ax=ax)
fig.savefig(os.path.join(self.image_dir, f'confusion_matrix_epoch_{epoch}'))
# plot and save roc curve
fig, ax = plt.subplots(figsize=(16,12))
plot_roc(y_true, y_pred, ax=ax)
fig.savefig(os.path.join(self.image_dir, f'roc_curve_epoch_{epoch}'))
performance_viz_cbk = PerformanceVisualizationCallback(
model=model,
test_data=X_test,
image_dir='perorfmance_charts')
history = hypermodel.fit(x=X_train,
y=y_train,
epochs=5,
callbacks=[performance_viz_cbk])
Traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_17/963709483.py in <module>
2 y=y_train,
3 epochs=5,
----> 4 callbacks=[performance_viz_cbk])
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1182 _r=1):
1183 callbacks.on_train_batch_begin(step)
-> 1184 tmp_logs = self.train_function(iterator)
1185 if data_handler.should_sync:
1186 context.async_wait()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
883
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
886
887 new_tracing_count = self.experimental_get_tracing_count()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
922 # In this case we have not created variables on the first call. So we can
923 # run the first trace but we should fail if variables are created.
--> 924 results = self._stateful_fn(*args, **kwds)
925 if self._created_variables and not ALLOW_DYNAMIC_VARIABLE_CREATION:
926 raise ValueError("Creating variables on a non-first call to a function"
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
3036 with self._lock:
3037 (graph_function,
-> 3038 filtered_flat_args) = self._maybe_define_function(args, kwargs)
3039 return graph_function._call_flat(
3040 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3458 call_context_key in self._function_cache.missed):
3459 return self._define_function_with_shape_relaxation(
-> 3460 args, kwargs, flat_args, filtered_flat_args, cache_key_context)
3461
3462 self._function_cache.missed.add(call_context_key)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _define_function_with_shape_relaxation(self, args, kwargs, flat_args, filtered_flat_args, cache_key_context)
3380
3381 graph_function = self._create_graph_function(
-> 3382 args, kwargs, override_flat_arg_shapes=relaxed_arg_shapes)
3383 self._function_cache.arg_relaxed[rank_only_cache_key] = graph_function
3384
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3306 arg_names=arg_names,
3307 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3308 capture_by_value=self._capture_by_value),
3309 self._function_attributes,
3310 function_spec=self.function_spec,
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
1005 _, original_func = tf_decorator.unwrap(python_func)
1006
-> 1007 func_outputs = python_func(*func_args, **func_kwargs)
1008
1009 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
666 # the function a weak reference to itself to avoid a reference cycle.
667 with OptionalXlaContext(compile_with_xla):
--> 668 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
669 return out
670
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
992 except Exception as e: # pylint:disable=broad-except
993 if hasattr(e, "ag_error_metadata"):
--> 994 raise e.ag_error_metadata.to_exception(e)
995 else:
996 raise
ValueError: in user code:
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:789 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/opt/conda/lib/python3.7/site-packages/keras/engine/compile_utils.py:201 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/opt/conda/lib/python3.7/site-packages/keras/losses.py:141 __call__
losses = call_fn(y_true, y_pred)
/opt/conda/lib/python3.7/site-packages/keras/losses.py:245 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/keras/losses.py:1666 categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/keras/backend.py:4839 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 1) and (None, 4) are incompatible
I took time to make your display graph make sense with my sample data.
Sample: First of all you need to create dimension output matching to your display calculations matrix see loss Fn, Optimizer, Matrix, and the network output. Then you need to create senses of predict data, they are not allowed multiple input value then I take time to make it significant as a sample. Max or Sum is significant
Sum of all Max is not significant as all priority in the my queue.
import os
from os.path import exists
import tensorflow as tf
import tensorflow_text as tft
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.svm import SVC
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
list_accuracy = []
checkpoint_path = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\TF_DataSets_01.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
if not exists(checkpoint_dir) :
os.mkdir(checkpoint_dir)
print("Create directory: " + checkpoint_dir)
input_word = tf.constant(' \'Cause it\'s easy as an ice cream sundae Slipping outta your hand into the dirt Easy as an ice cream sundae Every dancer gets a little hurt Easy as an ice cream sundae Slipping outta your hand into the dirt Easy as an ice cream sundae Every dancer gets a little hurt Easy as an ice cream sundae Oh, easy as an ice cream sundae ')
dataset = tf.data.Dataset.from_tensors( tf.strings.bytes_split(input_word) )
window_size = 6
dataset = dataset.map( lambda x: tft.sliding_window(x, width=window_size, axis=0) ).flat_map(tf.data.Dataset.from_tensor_slices)
dataset = dataset.batch(1)
list_word = []
label = []
vocab = [ "a", "b", "c", "d", "e", "f", "g", "h", "I", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "_" ]
vocab_hot = [ "ice" ]
layer = tf.keras.layers.StringLookup(vocabulary=vocab)
layer_hot = tf.keras.layers.StringLookup(vocabulary=vocab_hot)
for example in dataset.take(200):
sequences_mapping_string = layer(example[0])
sequences_mapping_string = tf.constant( sequences_mapping_string, shape=(1, 6) )
list_word.append(sequences_mapping_string.numpy())
sequences_mapping_string = tf.reduce_sum(layer_hot( example[0][0] + example[0][1] + example[0][2] ))
sequences_mapping_string = tf.constant( sequences_mapping_string, shape=(1, 1) )
label.append(sequences_mapping_string.numpy())
list_word = tf.constant(list_word, shape=(200, 1, 6, 1), dtype=tf.int64)
label = tf.constant(label, shape=(200, 1, 1, 1), dtype=tf.int64)
dataset = tf.data.Dataset.from_tensor_slices((list_word, label))
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Definition
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MulticlassTruePositives(tf.keras.metrics.Metric):
def __init__(self, name='multiclass_true_positives', **kwargs):
super(MulticlassTruePositives, self).__init__(name=name, **kwargs)
self.true_positives = self.add_weight(name='tp', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_pred = tf.reshape(tf.argmax(y_pred, axis=1), shape=(-1, 1))
values = tf.cast(y_true, 'int32') == tf.cast(y_pred, 'int32')
values = tf.cast(values, 'float32')
if sample_weight is not None:
sample_weight = tf.cast(sample_weight, 'float32')
values = tf.multiply(values, sample_weight)
self.true_positives.assign_add(tf.reduce_sum(values))
def result(self):
return self.true_positives
def reset_state(self):
# The state of the metric will be reset at the start of each epoch.
self.true_positives.assign(0.)
class MyLSTMLayer( tf.keras.layers.LSTM ):
def __init__(self, units, return_sequences, return_state):
super(MyLSTMLayer, self).__init__( units, return_sequences=True, return_state=False )
self.num_units = units
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_units])
def call(self, inputs):
return tf.matmul(inputs, self.kernel)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
mycustomlayer = MyLSTMLayer( 64, True, False )
mycustomlayer_2 = MyLSTMLayer( 16, True, False )
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=(6, 1)),
tf.keras.layers.Embedding(1000, 128, input_length=1),
tf.keras.layers.Reshape(( 6, 128 )),
tf.keras.layers.SpatialDropout1D( rate = 0.4 ),
tf.keras.layers.Conv1D(32, 6, activation="relu"),
tf.keras.layers.MaxPooling1D(strides=1, pool_size=1),
### LSTM
mycustomlayer,
tf.keras.layers.Reshape(( 1, 1, 64 )),
tf.keras.layers.UpSampling2D( size=(4, 4), data_format=None, interpolation='nearest' ),
tf.keras.layers.Conv1D(16, 3, activation="relu"),
tf.keras.layers.Reshape(( 8, 16 )),
tf.keras.layers.MaxPooling1D(),
tf.keras.layers.GlobalMaxPooling1D(),
### LSTM
tf.keras.layers.Reshape(( 1, 16 )),
mycustomlayer_2,
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1),
], name="MyModelClassification")
model.build()
model.summary()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Callback
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class PerformanceVisualizationCallback(tf.keras.callbacks.Callback):
def __init__(self, model, test_data, image_dir):
super().__init__()
self.model = model
self.test_data = test_data
self.test_data = tf.constant( self.test_data, shape=(20, 1, 6, 1) )
os.makedirs(image_dir, exist_ok=True)
self.image_dir = image_dir
def on_epoch_end(self, epoch, logs={}):
y_pred = tf.constant(self.model.predict(self.test_data[0])).numpy()
y_true = self.test_data[1]
y_pred_class = tf.math.argmax(y_pred, axis=1).numpy()
clf = SVC(random_state=0)
clf.fit(tf.constant( self.test_data, shape=(20, 6) ).numpy(), tf.cast( tf.linspace( 0, 19, 20, name='linspace', axis=0 ), dtype=tf.int64 ).numpy())
predictions = clf.predict(tf.constant( self.test_data, shape=(20, 6) ).numpy())
cm = sklearn.metrics.confusion_matrix(
[tf.math.argmax(self.test_data[1], axis=1).numpy()[0], tf.math.argmax(self.test_data[2], axis=1).numpy()[0],
tf.math.argmax(self.test_data[3], axis=1).numpy()[0], tf.math.argmax(self.test_data[4], axis=1).numpy()[0], tf.math.argmax(self.test_data[5], axis=1).numpy()[0]],
[1, 2, 3, 4, 5], labels=clf.classes_)
disp = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
disp.plot()
plt.show()
# fig.savefig(os.path.join(self.image_dir, f'confusion_matrix_epoch_{epoch}'))
clf = sklearn.svm.SVC(random_state=0)
clf.fit(tf.constant( self.test_data, shape=(20, 6) ).numpy(), tf.linspace( 0, 19, 20, name='linspace', axis=0 ).numpy())
if (epoch <= 2):
list_accuracy.append( logs['accuracy'] )
if (epoch == 2):
fpr, tpr, thresholds = sklearn.metrics.roc_curve([0, 0, 1, 1], [0, list_accuracy[0], list_accuracy[1], list_accuracy[2]])
roc_auc = sklearn.metrics.auc(fpr, tpr)
display = sklearn.metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='example estimator')
display.plot()
plt.show()
# fig.savefig(os.path.join(self.image_dir, f'roc_curve_epoch_{epoch}'))
list_word = []
label = []
test_dataset = tf.data.Dataset.from_tensors( tf.strings.bytes_split(input_word) )
test_dataset = test_dataset.map( lambda x: tft.sliding_window(x, width=window_size, axis=0) ).flat_map(tf.data.Dataset.from_tensor_slices)
test_dataset = test_dataset.batch(1)
for example in test_dataset.take(20):
sequences_mapping_string = layer(example[0])
sequences_mapping_string = tf.constant( sequences_mapping_string, shape=(1, 6) )
list_word.append(sequences_mapping_string.numpy())
sequences_mapping_string = tf.reduce_sum(layer_hot( example[0][0] + example[0][1] + example[0][2] ))
sequences_mapping_string = tf.constant( sequences_mapping_string, shape=(1, 1) )
label.append(sequences_mapping_string.numpy())
list_word = tf.constant(list_word, shape=(20, 1, 6, 1), dtype=tf.int64)
label = tf.constant(label, shape=(20, 1, 1, 1), dtype=tf.int64)
test_dataset = tf.data.Dataset.from_tensor_slices((list_word, label))
performance_viz_cbk = PerformanceVisualizationCallback(
model=model,
test_data=list_word,
image_dir='c:\perorfmance_charts')
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.SGD(
learning_rate=0.000001,
momentum=0.5,
nesterov=True,
name='SGD',
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.BinaryCrossentropy(
from_logits=False,
reduction=tf.keras.losses.Reduction.AUTO,
name='sparse_categorical_crossentropy'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
# Report the AUC of a model outputting a probability.
model.compile(optimizer=optimizer, loss=lossfn,
metrics=['accuracy', tf.keras.metrics.AUC(), MulticlassTruePositives()])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: FileWriter
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
if exists(checkpoint_path) :
model.load_weights(checkpoint_path)
print("model load: " + checkpoint_path)
input("Press Any Key!")
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit(dataset, batch_size=100, epochs=3, callbacks=[performance_viz_cbk] )

How to fit model to a multidimensional output space using pandas dataframe as input?

I want to fit a dataframe to a sequential deep learning model with multiple units in the final layer of the model. I'm new to deep learning.
Code:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
# Label encode
encoder = LabelEncoder()
df["survival"] = encoder.fit_transform(df[["survival"]])
df["type"] = encoder.fit_transform(df[["type"]])
df["subtype"] = encoder.fit_transform(df[["subtype"]])
# Define Sequential model
def get_model():
model = keras.Sequential(
[
layers.Dense(10, activation="relu", name="layer1"),
layers.Dense(10, activation="relu", name="layer2"),
layers.Dense(2, name="layer3"),
]
)
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
return model
model = get_model()
X2 = df.iloc[:,7:9] # Retrieve 2 features
tf.convert_to_tensor(X2) # Convert to tensor
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(X2)
model.fit(X2, df[["survival"]], epochs=15, batch_size=BATCH_SIZE)
Traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_17/2890126290.py in <module>
3 normalizer = tf.keras.layers.Normalization(axis=-1)
4 normalizer.adapt(X2)
----> 5 model.fit(X2, df[["survival"]], epochs=15, batch_size=BATCH_SIZE)
6 model.fit(X2, df[["type"]], epochs=15, batch_size=BATCH_SIZE)
7 model.fit(X2, df[["subtype"]], epochs=15, batch_size=BATCH_SIZE)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1182 _r=1):
1183 callbacks.on_train_batch_begin(step)
-> 1184 tmp_logs = self.train_function(iterator)
1185 if data_handler.should_sync:
1186 context.async_wait()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
883
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
886
887 new_tracing_count = self.experimental_get_tracing_count()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
931 # This is the first call of __call__, so we have to initialize.
932 initializers = []
--> 933 self._initialize(args, kwds, add_initializers_to=initializers)
934 finally:
935 # At this point we know that the initialization is complete (or less
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
758 self._concrete_stateful_fn = (
759 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 760 *args, **kwds))
761
762 def invalid_creator_scope(*unused_args, **unused_kwds):
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
3064 args, kwargs = None, None
3065 with self._lock:
-> 3066 graph_function, _ = self._maybe_define_function(args, kwargs)
3067 return graph_function
3068
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3461
3462 self._function_cache.missed.add(call_context_key)
-> 3463 graph_function = self._create_graph_function(args, kwargs)
3464 self._function_cache.primary[cache_key] = graph_function
3465
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3306 arg_names=arg_names,
3307 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3308 capture_by_value=self._capture_by_value),
3309 self._function_attributes,
3310 function_spec=self.function_spec,
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
1005 _, original_func = tf_decorator.unwrap(python_func)
1006
-> 1007 func_outputs = python_func(*func_args, **func_kwargs)
1008
1009 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
666 # the function a weak reference to itself to avoid a reference cycle.
667 with OptionalXlaContext(compile_with_xla):
--> 668 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
669 return out
670
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
992 except Exception as e: # pylint:disable=broad-except
993 if hasattr(e, "ag_error_metadata"):
--> 994 raise e.ag_error_metadata.to_exception(e)
995 else:
996 raise
ValueError: in user code:
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:789 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/opt/conda/lib/python3.7/site-packages/keras/engine/compile_utils.py:201 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/opt/conda/lib/python3.7/site-packages/keras/losses.py:141 __call__
losses = call_fn(y_true, y_pred)
/opt/conda/lib/python3.7/site-packages/keras/losses.py:245 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/keras/losses.py:1809 binary_crossentropy
backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/keras/backend.py:5000 binary_crossentropy
return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/nn_impl.py:246 sigmoid_cross_entropy_with_logits_v2
logits=logits, labels=labels, name=name)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/nn_impl.py:133 sigmoid_cross_entropy_with_logits
(logits.get_shape(), labels.get_shape()))
ValueError: logits and labels must have the same shape ((None, 2) vs (None, 1))
I can only fit the model if I set the units to 1 in the final layer.
model = keras.Sequential(
[
layers.Dense(10, activation="relu", name="layer1"),
layers.Dense(10, activation="relu", name="layer2"),
layers.Dense(1, name="layer3"),
]
)
How do I use multiple units in the final layer of the model? I tried reshape(-1,1) and still get the same error.
Data:
df.head().to_dict()
{'admin.disease_code': {'TCGA-2K-A9WE-01A': 'KIRP',
'TCGA-2Z-A9J1-01A': 'KIRP',
'TCGA-2Z-A9J2-01A': 'KIRP',
'TCGA-2Z-A9J3-01A': 'KIRP',
'TCGA-2Z-A9J5-01A': 'KIRP'},
'days_to_death': {'TCGA-2K-A9WE-01A': nan,
'TCGA-2Z-A9J1-01A': nan,
'TCGA-2Z-A9J2-01A': nan,
'TCGA-2Z-A9J3-01A': 1771.0,
'TCGA-2Z-A9J5-01A': nan},
'vital_status': {'TCGA-2K-A9WE-01A': 'alive',
'TCGA-2Z-A9J1-01A': 'alive',
'TCGA-2Z-A9J2-01A': 'alive',
'TCGA-2Z-A9J3-01A': 'dead',
'TCGA-2Z-A9J5-01A': 'alive'},
'age_at_initial_pathologic_diagnosis': {'TCGA-2K-A9WE-01A': 53.0,
'TCGA-2Z-A9J1-01A': 71.0,
'TCGA-2Z-A9J2-01A': 71.0,
'TCGA-2Z-A9J3-01A': 67.0,
'TCGA-2Z-A9J5-01A': 80.0},
'gender': {'TCGA-2K-A9WE-01A': 'male',
'TCGA-2Z-A9J1-01A': 'male',
'TCGA-2Z-A9J2-01A': 'female',
'TCGA-2Z-A9J3-01A': 'male',
'TCGA-2Z-A9J5-01A': 'male'},
'karnofsky_performance_score': {'TCGA-2K-A9WE-01A': nan,
'TCGA-2Z-A9J1-01A': nan,
'TCGA-2Z-A9J2-01A': nan,
'TCGA-2Z-A9J3-01A': nan,
'TCGA-2Z-A9J5-01A': nan},
'survival': {'TCGA-2K-A9WE-01A': 'lts',
'TCGA-2Z-A9J1-01A': 'lts',
'TCGA-2Z-A9J2-01A': 'lts',
'TCGA-2Z-A9J3-01A': 'lts',
'TCGA-2Z-A9J5-01A': 'lts'},
'cg00000029': {'TCGA-2K-A9WE-01A': 0.461440642939772,
'TCGA-2Z-A9J1-01A': 0.595894468074615,
'TCGA-2Z-A9J2-01A': 0.481304782143526,
'TCGA-2Z-A9J3-01A': 0.553849599144766,
'TCGA-2Z-A9J5-01A': 0.184349035247422},
'cg00000165': {'TCGA-2K-A9WE-01A': 0.143910373119058,
'TCGA-2Z-A9J1-01A': 0.0807243779293262,
'TCGA-2Z-A9J2-01A': 0.437447195378987,
'TCGA-2Z-A9J3-01A': 0.0642332527783939,
'TCGA-2Z-A9J5-01A': 0.126118535539944},
'cg00000236': {'TCGA-2K-A9WE-01A': 0.847164847154162,
'TCGA-2Z-A9J1-01A': 0.867305510246114,
'TCGA-2Z-A9J2-01A': 0.898927359292032,
'TCGA-2Z-A9J3-01A': 0.917290578229414,
'TCGA-2Z-A9J5-01A': 0.928017823091886},
'cg00000289': {'TCGA-2K-A9WE-01A': 0.737361955793681,
'TCGA-2Z-A9J1-01A': 0.70680600651273,
'TCGA-2Z-A9J2-01A': 0.758108726247342,
'TCGA-2Z-A9J3-01A': 0.675537604266578,
'TCGA-2Z-A9J5-01A': 0.677846427070521},
'cg00000292': {'TCGA-2K-A9WE-01A': 0.716794733144112,
'TCGA-2Z-A9J1-01A': 0.217862460492399,
'TCGA-2Z-A9J2-01A': 0.868604834806246,
'TCGA-2Z-A9J3-01A': 0.543087013952312,
'TCGA-2Z-A9J5-01A': 0.850473788130218},
'cg00000321': {'TCGA-2K-A9WE-01A': 0.351877113536983,
'TCGA-2Z-A9J1-01A': 0.169408257004071,
'TCGA-2Z-A9J2-01A': 0.577744851436078,
'TCGA-2Z-A9J3-01A': 0.85044433769089,
'TCGA-2Z-A9J5-01A': 0.44473521937132},
'cg00000363': {'TCGA-2K-A9WE-01A': 0.248986769373366,
'TCGA-2Z-A9J1-01A': 0.173115013795265,
'TCGA-2Z-A9J2-01A': 0.567241575633452,
'TCGA-2Z-A9J3-01A': 0.470810530680518,
'TCGA-2Z-A9J5-01A': 0.204529155293748},
'cg00000622': {'TCGA-2K-A9WE-01A': 0.0121360989202765,
'TCGA-2Z-A9J1-01A': 0.0108902025634162,
'TCGA-2Z-A9J2-01A': 0.0122683781097633,
'TCGA-2Z-A9J3-01A': 0.0125681212511168,
'TCGA-2Z-A9J5-01A': 0.0122330126903632},
'cg00000658': {'TCGA-2K-A9WE-01A': 0.876303885229884,
'TCGA-2Z-A9J1-01A': 0.813866558997356,
'TCGA-2Z-A9J2-01A': 0.881366097769717,
'TCGA-2Z-A9J3-01A': 0.870735609192125,
'TCGA-2Z-A9J5-01A': 0.906102120405464},
'cg00000721': {'TCGA-2K-A9WE-01A': 0.944311384947134,
'TCGA-2Z-A9J1-01A': 0.938576461648791,
'TCGA-2Z-A9J2-01A': 0.936584647488041,
'TCGA-2Z-A9J3-01A': 0.956356142020249,
'TCGA-2Z-A9J5-01A': 0.938145301973259},
'cg00000734': {'TCGA-2K-A9WE-01A': 0.0490407302658151,
'TCGA-2Z-A9J1-01A': 0.0426568318037534,
'TCGA-2Z-A9J2-01A': 0.0428379760439674,
'TCGA-2Z-A9J3-01A': 0.0577007291016598,
'TCGA-2Z-A9J5-01A': 0.0491650645308977},
'cg00000769': {'TCGA-2K-A9WE-01A': 0.0200484962577958,
'TCGA-2Z-A9J1-01A': 0.0133187057875756,
'TCGA-2Z-A9J2-01A': 0.0193220859926812,
'TCGA-2Z-A9J3-01A': 0.017072120017994,
'TCGA-2Z-A9J5-01A': 0.0184242706692516},
'cg00000905': {'TCGA-2K-A9WE-01A': 0.0623434271852525,
'TCGA-2Z-A9J1-01A': 0.0540543120983417,
'TCGA-2Z-A9J2-01A': 0.0551810635627895,
'TCGA-2Z-A9J3-01A': 0.055021036675329,
'TCGA-2Z-A9J5-01A': 0.0565152834168852},
'cg00000924': {'TCGA-2K-A9WE-01A': 0.489865398138095,
'TCGA-2Z-A9J1-01A': 0.317547629906197,
'TCGA-2Z-A9J2-01A': 0.5065017863301,
'TCGA-2Z-A9J3-01A': 0.504135768615145,
'TCGA-2Z-A9J5-01A': 0.466643054300025},
'cg00000948': {'TCGA-2K-A9WE-01A': 0.920994933496615,
'TCGA-2Z-A9J1-01A': 0.89911570032979,
'TCGA-2Z-A9J2-01A': 0.855015243009544,
'TCGA-2Z-A9J3-01A': 0.911116565506201,
'TCGA-2Z-A9J5-01A': 0.934397425301759},
'cg00000957': {'TCGA-2K-A9WE-01A': 0.92663932531651,
'TCGA-2Z-A9J1-01A': 0.525131175543627,
'TCGA-2Z-A9J2-01A': 0.86481442167794,
'TCGA-2Z-A9J3-01A': 0.855796126141919,
'TCGA-2Z-A9J5-01A': 0.907979957948096},
'cg00001245': {'TCGA-2K-A9WE-01A': 0.0149191766670711,
'TCGA-2Z-A9J1-01A': 0.0152198596492253,
'TCGA-2Z-A9J2-01A': 0.0154433022292077,
'TCGA-2Z-A9J3-01A': 0.0158006072782886,
'TCGA-2Z-A9J5-01A': 0.0149090955954903},
'type': {'TCGA-2K-A9WE-01A': 'tumor',
'TCGA-2Z-A9J1-01A': 'tumor',
'TCGA-2Z-A9J2-01A': 'tumor',
'TCGA-2Z-A9J3-01A': 'tumor',
'TCGA-2Z-A9J5-01A': 'tumor'},
'subtype': {'TCGA-2K-A9WE-01A': 'KIRP',
'TCGA-2Z-A9J1-01A': 'KIRP',
'TCGA-2Z-A9J2-01A': 'KIRP',
'TCGA-2Z-A9J3-01A': 'KIRP',
'TCGA-2Z-A9J5-01A': 'KIRP'}}

Fusing the ResNet-50 and YOLO algorithms for enhanced deep learning object detection

I am attempting to build a custom object detector that detects weeds in images taken by UAS (details have been spared to simplify the question). I have been experimenting with the YOLO algorithms as well as using the models in the TensorFlow 2 Detection Model Zoo, as there are many options available for the CNNs to be constructed.
Doing a literature review, I have come across a very interesting paper (Abdulsalam and Aouf 2020) on this topic, presented at a conference in France. The researchers did something very interesting when they fused the YOLO algorithm with the ResNet-50 algorithm:
"This function approach involves adopting the outcome of one of the layers from ResNet-50 as an input to YOLOv2. This network layer from the ResNet-50 is specified as for feature extraction in YOLOv2. In this work, we used the ReLU (activation_49_relu) layer for the feature extraction layer. This layer will now be the input of YOLOv2."
Doing this, they were able to obtain a very high classification accuracy when mapping weed infestations. Looking at the TensorFlow Zoo, there is an option to use a pre-trained model (Faster R-CNN ResNet50 V1 1024x1024) that uses the ResNet-50 architecture. Code is below:
# Faster R-CNN with Resnet-50 (v1)
# Trained on COCO, initialized from Imagenet classification checkpoint
# This config is TPU compatible.
model {
faster_rcnn {
num_classes: 90
image_resizer {
fixed_shape_resizer {
width: 1024
height: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet50_keras'
batch_norm_trainable: true
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
share_box_across_classes: true
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
use_static_shapes: true
use_matmul_crop_and_resize: true
clip_anchors_to_image: true
use_static_balanced_label_sampler: true
use_matmul_gather_in_matcher: true
}
}
train_config: {
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
num_steps: 100000
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 100000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED"
fine_tune_checkpoint_type: "classification"
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_adjust_hue {
}
}
data_augmentation_options {
random_adjust_contrast {
}
}
data_augmentation_options {
random_adjust_saturation {
}
}
data_augmentation_options {
random_square_crop_by_scale {
scale_min: 0.6
scale_max: 1.3
}
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
use_bfloat16: true # works only on TPUs
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
What exactly would I have to do in order to integrate this ResNet-50 algorithm structure in the YOLO algorithm, as was done in this example?
Here is the backbone file for the YOLOv4 file, as was able to get doing a pip install yolov4:
import tensorflow as tf
from tensorflow.keras import layers, Model, Sequential
from .common import YOLOConv2D
class _ResBlock(Model):
def __init__(
self,
filters_1: int,
filters_2: int,
activation: str = "mish",
kernel_regularizer=None,
):
super(_ResBlock, self).__init__()
self.conv1 = YOLOConv2D(
filters=filters_1,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv2 = YOLOConv2D(
filters=filters_2,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.add = layers.Add()
def call(self, x):
ret = self.conv1(x)
ret = self.conv2(ret)
x = self.add([x, ret])
return x
class ResBlock(Model):
def __init__(
self,
filters_1: int,
filters_2: int,
iteration: int,
activation: str = "mish",
kernel_regularizer=None,
):
super(ResBlock, self).__init__()
self.iteration = iteration
self.sequential = Sequential()
for _ in range(self.iteration):
self.sequential.add(
_ResBlock(
filters_1=filters_1,
filters_2=filters_2,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
)
def call(self, x):
return self.sequential(x)
class CSPResNet(Model):
"""
Cross Stage Partial connections(CSP)
"""
def __init__(
self,
filters_1: int,
filters_2: int,
iteration: int,
activation: str = "mish",
kernel_regularizer=None,
):
super(CSPResNet, self).__init__()
self.pre_conv = YOLOConv2D(
filters=filters_1,
kernel_size=3,
strides=2,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
# Do not change the order of declaration
self.part2_conv = YOLOConv2D(
filters=filters_2,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.part1_conv1 = YOLOConv2D(
filters=filters_2,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.part1_res_block = ResBlock(
filters_1=filters_1 // 2,
filters_2=filters_2,
iteration=iteration,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.part1_conv2 = YOLOConv2D(
filters=filters_2,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat1_2 = layers.Concatenate(axis=-1)
self.post_conv = YOLOConv2D(
filters=filters_1,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
def call(self, x):
x = self.pre_conv(x)
part2 = self.part2_conv(x)
part1 = self.part1_conv1(x)
part1 = self.part1_res_block(part1)
part1 = self.part1_conv2(part1)
x = self.concat1_2([part1, part2])
x = self.post_conv(x)
return x
class SPP(Model):
"""
Spatial Pyramid Pooling layer(SPP)
"""
def __init__(self):
super(SPP, self).__init__()
self.pool1 = layers.MaxPooling2D((13, 13), strides=1, padding="same")
self.pool2 = layers.MaxPooling2D((9, 9), strides=1, padding="same")
self.pool3 = layers.MaxPooling2D((5, 5), strides=1, padding="same")
self.concat = layers.Concatenate(axis=-1)
def call(self, x):
return self.concat([self.pool1(x), self.pool2(x), self.pool3(x), x])
class CSPDarknet53(Model):
def __init__(
self,
activation0: str = "mish",
activation1: str = "leaky",
kernel_regularizer=None,
):
super(CSPDarknet53, self).__init__(name="CSPDarknet53")
self.conv0 = YOLOConv2D(
filters=32,
kernel_size=3,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.res_block1 = CSPResNet(
filters_1=64,
filters_2=64,
iteration=1,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.res_block2 = CSPResNet(
filters_1=128,
filters_2=64,
iteration=2,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.res_block3 = CSPResNet(
filters_1=256,
filters_2=128,
iteration=8,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.res_block4 = CSPResNet(
filters_1=512,
filters_2=256,
iteration=8,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.res_block5 = CSPResNet(
filters_1=1024,
filters_2=512,
iteration=4,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.conv72 = YOLOConv2D(
filters=512,
kernel_size=1,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
self.conv73 = YOLOConv2D(
filters=1024,
kernel_size=3,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
self.conv74 = YOLOConv2D(
filters=512,
kernel_size=1,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
self.spp = SPP()
self.conv75 = YOLOConv2D(
filters=512,
kernel_size=1,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
self.conv76 = YOLOConv2D(
filters=1024,
kernel_size=3,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
self.conv77 = YOLOConv2D(
filters=512,
kernel_size=1,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
def call(self, x):
x = self.conv0(x)
x = self.res_block1(x)
x = self.res_block2(x)
x = self.res_block3(x)
route1 = x
x = self.res_block4(x)
route2 = x
x = self.res_block5(x)
x = self.conv72(x)
x = self.conv73(x)
x = self.conv74(x)
x = self.spp(x)
x = self.conv75(x)
x = self.conv76(x)
x = self.conv77(x)
route3 = x
return (route1, route2, route3)
class CSPDarknet53Tiny(Model):
def __init__(
self,
activation: str = "leaky",
kernel_regularizer=None,
):
super(CSPDarknet53Tiny, self).__init__(name="CSPDarknet53Tiny")
self.conv0 = YOLOConv2D(
filters=32,
kernel_size=3,
strides=2,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv1 = YOLOConv2D(
filters=64,
kernel_size=3,
strides=2,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv2 = YOLOConv2D(
filters=64,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv3 = YOLOConv2D(
filters=32,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv4 = YOLOConv2D(
filters=32,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat3_4 = layers.Concatenate(axis=-1)
self.conv5 = YOLOConv2D(
filters=64,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat2_5 = layers.Concatenate(axis=-1)
self.maxpool5 = layers.MaxPool2D((2, 2), strides=2, padding="same")
self.conv6 = YOLOConv2D(
filters=128,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv7 = YOLOConv2D(
filters=64,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv8 = YOLOConv2D(
filters=64,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat7_8 = layers.Concatenate(axis=-1)
self.conv9 = YOLOConv2D(
filters=128,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat6_9 = layers.Concatenate(axis=-1)
self.maxpool9 = layers.MaxPool2D((2, 2), strides=2, padding="same")
self.conv10 = YOLOConv2D(
filters=256,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv11 = YOLOConv2D(
filters=128,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv12 = YOLOConv2D(
filters=128,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat11_12 = layers.Concatenate(axis=-1)
self.conv13 = YOLOConv2D(
filters=256,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat10_13 = layers.Concatenate(axis=-1)
self.maxpool13 = layers.MaxPool2D((2, 2), strides=2, padding="same")
self.conv14 = YOLOConv2D(
filters=512,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
def call(self, x):
x1 = self.conv0(x)
x1 = self.conv1(x1)
x1 = self.conv2(x1)
_, x2 = tf.split(x1, 2, axis=-1)
x2 = self.conv3(x2)
x3 = self.conv4(x2)
x3 = self.concat3_4([x3, x2])
x3 = self.conv5(x3)
x3 = self.concat2_5([x1, x3])
x1 = self.maxpool5(x3)
x1 = self.conv6(x1)
_, x2 = tf.split(x1, 2, axis=-1)
x2 = self.conv7(x2)
x3 = self.conv8(x2)
x3 = self.concat7_8([x3, x2])
x3 = self.conv9(x3)
x3 = self.concat6_9([x1, x3])
x1 = self.maxpool9(x3)
x1 = self.conv10(x1)
_, x2 = tf.split(x1, 2, axis=-1)
x2 = self.conv11(x2)
x3 = self.conv12(x2)
x3 = self.concat11_12([x3, x2])
route1 = self.conv13(x3)
x3 = self.concat10_13([x1, route1])
x1 = self.maxpool13(x3)
route2 = self.conv14(x1)
return route1, route2

How to split an image dataset in X_train, y_train, X_test, y_test by tensorflow?

How can I split the image data into X_train, Y_train, X_test and Y_test?
I am using keras with tensorflow backend
Thanks.
For example, you have folder like this
full_dataset
|--horse (40 images)
|--donkey (30 images)
|--cow ((50 images)
|--zebra (70 images)
FIRST WAY
import glob
horse = glob.glob('full_dataset/horse/*.*')
donkey = glob.glob('full_dataset/donkey/*.*')
cow = glob.glob('full_dataset/cow/*.*')
zebra = glob.glob('full_dataset/zebra/*.*')
data = []
labels = []
for i in horse:
image=tf.keras.preprocessing.image.load_img(i, color_mode='RGB',
target_size= (280,280))
image=np.array(image)
data.append(image)
labels.append(0)
for i in donkey:
image=tf.keras.preprocessing.image.load_img(i, color_mode='RGB',
target_size= (280,280))
image=np.array(image)
data.append(image)
labels.append(1)
for i in cow:
image=tf.keras.preprocessing.image.load_img(i, color_mode='RGB',
target_size= (280,280))
image=np.array(image)
data.append(image)
labels.append(2)
for i in zebra:
image=tf.keras.preprocessing.image.load_img(i, color_mode='RGB',
target_size= (280,280))
image=np.array(image)
data.append(image)
labels.append(3)
data = np.array(data)
labels = np.array(labels)
from sklearn.model_selection import train_test_split
X_train, X_test, ytrain, ytest = train_test_split(data, labels, test_size=0.2,
random_state=42)
SECOND WAY
image_generator = ImageDataGenerator(rescale=1/255, validation_split=0.2)
train_dataset = image_generator.flow_from_directory(batch_size=32,
directory='full_dataset',
shuffle=True,
target_size=(280, 280),
subset="training",
class_mode='categorical')
validation_dataset = image_generator.flow_from_directory(batch_size=32,
directory='full_dataset',
shuffle=True,
target_size=(280, 280),
subset="validation",
class_mode='categorical')
Main drawback from Second way, you can't use for display a picture. It will error if you write validation_dataset[1]. But it worked if I use first way : X_test[1]
You don't have to use tensorflow or keras to divide your dataset.
If you have sklearn package installed then you can simply use it:
from sklearn.model_selection import train_test_split
X = ...
Y = ...
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
You can also use numpy for the same purpose:
import numpy
X = ...
Y = ...
test_size = 0.2
train_nsamples = (1-test_size) * len(Y)
x_train, x_test, y_train, y_test = X[:train_nsamples,:], X[train_nsamples:, :], Y[:train_nsamples, ], Y[train_nsamples:,]
Good Luck!

Training model on yolov3-tiny, but average loss always equals -nan

I'm experimenting with yolov3-tiny with darknet on windows 10 with cpu. However, I keep getting an average loss of nan. I have followed all the directions per the direction at https://github.com/AlexeyAB/darknet.git. I edited my cfg file with all three filter for both yolo's set to 21 (since I only have two classes.) I set the subdivisions to 8 and batch to 64. I'm using a little over 500 images that I made myself and I'm trying to do custom detection. I want yolo to determine if the image is a thumbs up or a thumbs down. I have run the train command numerous times but I never get past 100 iteration an
#config file:
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=8
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=1
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
[convolutional]
batch_normalize=1
filters=21
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=21
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=21
activation=linear
[yolo]
mask = 3,4,5
anchors = 38, 93, 55,120, 66,156, 90,259, 110,239, 118,283
classes=2
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=21
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 8
[convolutional]
batch_normalize=1
filters=21
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=21
activation=linear
[yolo]
mask = 0,1,2
anchors = 38, 93, 55,120, 66,156, 90,259, 110,239, 118,283
classes=2
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
try: random = 0
it works for me when training yolov3-tiny ~