How to write a custom resize layer that takes a resize value from an Input layer? - tensorflow

I am trying to add a custom resize layer that does not have a fixed resize value, instead, it takes a scale value from an input layer.
I found this but it has a fixed resize value: Add a resizing layer to a keras sequential model
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import tensorflow.keras.backend as K
class Resize(Layer):
def init(self):
super(Resize,self).__init__()
def build(self,input_shape):
super(Resize,self).build(input_shape)
def call(self, x, size):
out = tf.image.resize(x,size=size)
return out
def get_output_shape_for(self, input_shape):
return (None,None,3)
inp = Input((10,10,3))
size = Input((1,), dtype='int32')
out = Resize()(inp, size=(100,100)) #(inp, size=(size,size))
model = Model([inp,size], out)
model.summary()
When I try this:
inp = Input((10,10,3))
size = Input((1,), dtype='int32')
out = Resize()(inp, size=(size,size))
model = Model([inp,size], out)
model.summary()
error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/keras/api/_v1/keras/models/__init__.py in <module>()
2 size = Input((1,), dtype='int32')
3
----> 4 out = Resize()(inp, size=(size,size)) #(inp, size=(size,size))
5
6 model = Model([inp,size], out)
~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
634 outputs = base_layer_utils.mark_as_return(outputs, acd)
635 else:
--> 636 outputs = call_fn(inputs, *args, **kwargs)
637
638 except TypeError as e:
~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
147 except Exception as e: # pylint:disable=broad-except
148 if hasattr(e, 'ag_error_metadata'):
--> 149 raise e.ag_error_metadata.to_exception(type(e))
150 else:
151 raise
ValueError: in converted code:
<ipython-input-1-ab7021ffbc7d>:14 call *
out = tf.image.resize(x,size=size)
/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/ops/image_ops_impl.py:1182 resize_images
skip_resize_if_same=True)
/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/ops/image_ops_impl.py:1045 _resize_images_common
raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '
ValueError: 'size' must be a 1-D Tensor of 2 elements: new_height, new_width
​

One workaround is to set size=Input(tensor=K.variable([2,2], dtype=tf.int32)).
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import tensorflow.keras.backend as K
class Resize(Layer):
def init(self):
super(Resize,self).__init__()
def build(self,input_shape):
super(Resize,self).build(input_shape)
def call(self, inputs):
x = inputs[0]
size = inputs[1]
out = tf.image.resize(x,size=size)
return out
def get_output_shape_for(self, input_shape):
return (None,None,3)
inp = Input((10,10,3))
var_size = K.variable([2,2], dtype=tf.int32)
size = Input(tensor=var_size, name='size')
out = Resize()([inp, size])
model = Model([inp,size], out)
model.summary()
# Model: "model"
# __________________________________________________________________________________________________
# Layer (type) Output Shape Param # Connected to
# ==================================================================================================
# input_1 (InputLayer) [(None, 10, 10, 3)] 0
# __________________________________________________________________________________________________
# input_2 (InputLayer) [(2,)] 0
# __________________________________________________________________________________________________
# resize (Resize) (None, None, None, 3 0 input_1[0][0]
# input_2[0][0]
# ==================================================================================================
# Total params: 0
# Trainable params: 0
# Non-trainable params: 0
input_mat = np.random.randn(100,10,10,3)
K.set_value(var_size, [5,5])
res = model.predict({'x': input_mat})
# res.shape (100,5,5,3)
K.set_value(var_size, [3,3])
res = model.predict({'x': input_mat})
# res.shape (100,3,3,3)

Related

ValueError: Input 0 of layer is incompatible with the layer: expected shape=(None, 224, 224, 3), found shape=(224, 224, 3), what is the problem?

I am trying to build a machine learning model using pre-trained VGG16 with tensorflow, but I keep getting the same problem with the shape of the input. Compared to other public codes, the only difference is that I use a tf.data.dataset to share the data, instead of the DirectoryIterator of tf.image
Here is my code:
zip_ref = ZipFile(zip_file, 'r')
zip_ref.extractall(repository_dir)
zip_ref.close()
train_dir = os.path.join(repository_dir, "seg_train", "seg_train")
test_dir = os.path.join(repository_dir, "seg_test", "seg_test")
os.system(f"rm -r {os.path.join(repository_dir, 'seg_pred')}")
# load variables
validation_percentage = 0.2
label_mode = "int"
# for our model purposes
img_size = (224, 224)
color_mode='rgb'
data_train, data_val = image_dataset_from_directory(
train_dir,
batch_size=None,
label_mode=label_mode,
color_mode=color_mode,
image_size=img_size,
validation_split=validation_percentage,
subset="both",
seed=123,
)
data_test = image_dataset_from_directory(
test_dir,
batch_size=None,
label_mode=label_mode,
color_mode=color_mode,
image_size=img_size,
)
classes = data_train.class_names
print(classes)
scale = 1.0/255
normalization_layer = tf.keras.layers.Rescaling(scale)
data_train_norm = data_train.map(lambda x,y: (normalization_layer(x), y))
data_val_norm = data_val.map(lambda x,y: (normalization_layer(x), y))
data_test_norm = data_test.map(lambda x,y: (normalization_layer(x), y))
input_size = None
for img, label in data_train_norm.take(1).as_numpy_iterator():
input_size = img.shape
print(input_size)
base_model = VGG16(
input_shape=input_size, # Shape of our images
include_top = False, # Leave out the last fully connected layer
weights = 'imagenet'
)
# we do not train the parameters
for layer in base_model.layers:
layer.trainable = False
# Flatten the output layer to 1 dimension
x = layers.Flatten()(base_model.output)
# https://medium.com/analytics-vidhya/car-brand-classification-using-vgg16-transfer-learning-f219a0f09765
# FC layer very simple and with a softmax activation unit
x = layers.Dense(len(classes), activation="softmax")(x)
landscapeModel01 = Model(inputs=base_model.input, outputs=x, name="landscapeModel01")
loss = "sparse_categorical_crossentropy"
optimizer = "adam"
landscapeModel01.compile(
optimizer=optimizer,
loss=loss,
metrics=["loss","accuracy"]
)
#fit data
shuffle=True # variable
epochs=50 # variable, according if it is able to converge
batch_size = 200
print(landscapeModel01.input)
landscapeModel01.fit(
data_train_norm,
validation_data=data_val_norm,
epochs=epochs,
shuffle=shuffle,
batch_size=batch_size
)
and this is the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In [10], line 8
4 batch_size = 200
6 print(landscapeModel01.input)
----> 8 landscapeModel01.fit(
9 data_train_norm,
10 validation_data=data_val_norm,
11 epochs=epochs,
12 shuffle=shuffle,
13 batch_size=batch_size
14 )
File ~/anaconda3/envs/faa/lib/python3.10/site-packages/keras/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File /tmp/__autograph_generated_file8y_bf523.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/engine/training.py", line 1160, in train_function *
return step_function(self, iterator)
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/engine/training.py", line 1146, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/engine/training.py", line 1135, in run_step **
outputs = model.train_step(data)
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/engine/training.py", line 993, in train_step
y_pred = self(x, training=True)
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/engine/input_spec.py", line 295, in assert_input_compatibility
raise ValueError(
ValueError: Input 0 of layer "landscapeModel01" is incompatible with the layer: expected shape=(None, 224, 224, 3), found shape=(224, 224, 3)
What can I fix to make the code work?
versions:
tensorflow==2.10.0
#EDIT
I just found the solution: I was loading images with a batch size equals none, but the trained model demanded that the images had one, even if it was 1.
Solution
I just needed to load images in the image_dataset_from_directory with a batch_size parameter different from None. Considering my investigation did not consider data augmentation in the beginning, I chose 1.

tf.data multi output model has labels with incompatible shapes

I am trying to convert an workbook I did some time ago on Colab (using ImageDataGenerator) to one that uses tf.data.dataset as I now have a multi-gpu set up and am trying to learn how to do faster training. The model trains on the age/ gender/ race dataset from Kaggle but in this instance we're interested in just the sex and age prediction. Sex will either be 0 or 1 and the loss function is binarycrossentropy while age is an integer between 0 and 120 and the loss function is mse at it is regression.
import tensorflow as tf
import os
AUTOTUNE = tf.data.AUTOTUNE
batch_size = 64
#Load datasets from directories
train_gen = tf.data.Dataset.list_files(os.listdir(training_dir), shuffle = False)
valid_gen = tf.data.Dataset.list_files(os.listdir(validation_dir), shuffle = False)
def decode_img(img):
#Convert compressed string into a 3D tensor
img = tf.io.decode_jpeg(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
#Resize the image to the desired size
return tf.image.resize(img, [128,128])
def get_label(file):
gender = get_sex(file) #returns either 0 or 1
age = get_age(file) #returns interger between 0 and about 120
return gender, age
def process_path(file):
file = file.numpy()
file_path = str(bytes.decode(file))
file = file_path.split(' ')[-1].split("\\")[-1]
labels = get_label(file)
# Load data from file as a String
img = tf.io.read_file(file_path)
img = decode_img(img)
img = img / 255.0
return img, labels
def _set_shapes(t1, t2):
t1.set_shape((128,128,3))
t2.set_shape((2,))
return (t1,t2)
train_gen = train_gen.map(lambda x: tf.py_function(process_path, [x], [tf.float32, tf.int32]), num_parallel_calls=AUTOTUNE)
valid_gen = valid_gen.map(lambda x: tf.py_function(process_path, [x], [tf.float32, tf.int32]), num_parallel_calls=AUTOTUNE)
train_gen = train_gen.map(_set_shapes,num_parallel_calls=AUTOTUNE)
valid_gen = valid_gen.map(_set_shapes, num_parallel_calls=AUTOTUNE)
train_gen = train_gen.batch(batch_size)
valid_gen = valid_gen.batch(batch_size)
train_gen
Output: <BatchDataset shapes: ((None, 128, 128, 3), (None, 2)), types: (tf.float32, tf.int32)>
#configure for performance
def config_for_performance(ds):
ds = ds.cache()
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
train_gen = config_for_performance(train_gen)
valid_gen = config_for_performance(valid_gen)
The model itself:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Input, Activation, Flatten, BatchNormalization, PReLU
from tensorflow.keras.regularizers import l2
from tensorflow.keras.losses import BinaryCrossentropy
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')
gpus = tf.config.list_logical_devices('GPU')
#print(gpus)
strategy = tf.distribute.MirroredStrategy(gpus,cross_device_ops=tf.distribute.ReductionToOneDevice())
with strategy.scope():
#Define the convolution layers
inp = Input(shape=(128,128,3))
cl1 = Conv2D(32,(3,3), padding='same', kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(inp)
bn1 = BatchNormalization()(cl1)
pr1 = PReLU(alpha_initializer='he_uniform')(bn1)
cl2 = Conv2D(32,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(pr1)
bn2 = BatchNormalization()(cl2)
pr2 = PReLU(alpha_initializer='he_uniform')(bn2)
mp1 = MaxPool2D((2,2))(pr2)
cl3 = Conv2D(64,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(mp1)
bn3 = BatchNormalization()(cl3)
pr3 = PReLU(alpha_initializer='he_uniform')(bn3)
cl4 = Conv2D(64,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(pr3)
bn4 = BatchNormalization()(cl4)
pr4 = PReLU(alpha_initializer='he_uniform')(bn4)
mp2 = MaxPool2D((2,2))(pr4)
cl5 = Conv2D(128,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(mp2)
bn5 = BatchNormalization()(cl5)
pr5 = PReLU(alpha_initializer='he_uniform')(bn5)
mp3 = MaxPool2D((2,2))(pr5)
cl6 = Conv2D(256,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(mp3)
bn6 = BatchNormalization()(cl6)
pr6 = PReLU(alpha_initializer='he_uniform')(bn6)
mp4 = MaxPool2D((2,2))(pr6)
cl7 = Conv2D(512,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(mp4)
bn7 = BatchNormalization()(cl7)
pr7 = PReLU(alpha_initializer='he_uniform')(bn7)
mp5 = MaxPool2D((2,2))(pr7)
flt = Flatten()(mp5)
#This layer predicts age
agelayer = Dense(128, activation='relu',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(flt)
agelayer = BatchNormalization()(agelayer)
agelayer = Dropout(0.6)(agelayer)
agelayer = Dense(1, activation='relu', name='age_output', kernel_initializer='he_uniform', dtype='float32')(agelayer)
#This layer predicts gender
glayer = Dense(128, activation='relu',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(flt)
glayer = BatchNormalization()(glayer)
glayer = Dropout(0.5)(glayer)
glayer = Dense(1, activation='sigmoid', name='gender_output', kernel_initializer='he_uniform', dtype='float32')(glayer)
modelA = Model(inputs=inp, outputs=[glayer,agelayer])
model_folder = 'C:/Users/mm/OneDrive/Documents/Age estimation & gender classification/models'
if not os.path.exists(model_folder):
os.mkdir(model_folder)
#Callback to control learning rate during training. Reduces learning rate by 5% after 3 epochs of no improvement on validation loss
lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=3,min_lr=0.000005)
#Callback to stop training if after 100 epochs of no improvement it stops and restores the best weights
es_callback = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True, min_delta=0.001)
#Compile Model A
modelA.compile(optimizer='Adam', loss={'gender_output': BinaryCrossentropy(), 'age_output': 'mse'}, metrics={'gender_output': 'accuracy', 'age_output':'mae'})
#Training Model A
history = modelA.fit(train_gen, epochs=100, validation_data=valid_gen, callbacks=[es_callback,lr_callback])
The error message:
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Epoch 1/100
INFO:tensorflow:Error reported to Coordinator: logits and labels must have the same shape ((None, 1) vs (None, 2))
Traceback (most recent call last):
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\ops\nn_impl.py", line 130, in sigmoid_cross_entropy_with_logits
labels.get_shape().assert_is_compatible_with(logits.get_shape())
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\framework\tensor_shape.py", line 1161, in assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 2) and (None, 1) are incompatible
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\training\coordinator.py", line 297, in stop_on_exception
yield
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\distribute\mirrored_run.py", line 346, in run
self.main_result = self.main_fn(*self.main_args, **self.main_kwargs)
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\autograph\impl\api.py", line 692, in wrapper
return converted_call(f, args, kwargs, options=options)
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\autograph\impl\api.py", line 382, in converted_call
return _call_unconverted(f, args, kwargs, options)
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\autograph\impl\api.py", line 463, in _call_unconverted
return f(*args, **kwargs)
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 835, in run_step
outputs = model.train_step(data)
show more (open the raw output data in a text editor) ...
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\util\dispatch.py", line 206, in wrapper
return target(*args, **kwargs)
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\ops\nn_impl.py", line 132, in sigmoid_cross_entropy_with_logits
raise ValueError("logits and labels must have the same shape (%s vs %s)" %
ValueError: logits and labels must have the same shape ((None, 1) vs (None, 2))
Managed to resove this with a bit of research and trial and error. Main issues are:
The labels are being fed to the model as a tuple instead of being separated. When it is multiple output heads this is necessary:
def process_path(file):
file = file.numpy()
file_path = str(bytes.decode(file))
file = file_path.split("\\")[-1]
gender, age = get_label(file)
# Load data from file as a String
img = tf.io.read_file(file_path)
img = decode_img(img)
img = img / 255.0
return img, gender, age
NB: I made a modification to extracting the labels from the filename as it wasn't getting it right all the time:
file = file_path.split("\\")[-1]
Due to the change on 1, the map functions need an additional dtype for the other label, so it becomes:
train_gen = train_gen.map(lambda x: tf.py_function(process_path, [x], [tf.float32, tf.int32, tf.int32]), num_parallel_calls=AUTOTUNE)
valid_gen = valid_gen.map(lambda x: tf.py_function(process_path, [x], [tf.float32, tf.int32, tf.int32]), num_parallel_calls=AUTOTUNE)
Each label needs to be reshaped:
def _set_shapes(t1, t2, t3):
t1.set_shape((128,128,3))
t2.set_shape((1,))
t3.set_shape((1,))
t2 = tf.reshape(t2, [-1,1])
t3 = tf.reshape(t3, [-1,1])
return (t1,t2,t3)

InvalidArgumentError: Matrix size-incompatible: In[0]: [256,3], In[1]: [65,1] [[{{node dense_51/BiasAdd}}]]

As I am new please help me resolve. I have been trying since months. Please ignore any small mistake and focus. Thanks in advance. I have been reshaping since I got this code now reached the bottom most layer but still getting this error mentioned above as a heading.
If you need any Hd5 files please let me know your email-id.
import tensorflow as tf
tf.compat.v1.disable_v2_behavior()
from __future__ import print_function, division
import numpy as np
import h5py
import scipy.io
import random
import sys,os
import itertools
import numbers
from collections import Counter
from warnings import warn
from abc import ABCMeta, abstractmethod
from tensorflow import keras, reshape
np.random.seed(1337)
import keras
from keras.optimizers import RMSprop, SGD
from keras.models import Sequential, model_from_yaml
from keras.layers.core import Dense, Dropout, Activation, Flatten
import keras.layers.core as core
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM, Input, multiply, Reshape
from keras.layers.convolutional import Convolution1D, MaxPooling1D
from keras.layers.wrappers import Bidirectional
from keras.constraints import maxnorm
from keras.layers.recurrent import LSTM, GRU
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Embedding
from sklearn.metrics import fbeta_score, roc_curve, auc, roc_auc_score, average_precision_score
import matplotlib.pyplot as plt
from keras.regularizers import l2, l1, l1_l2
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from keras.engine.topology import Layer
from keras import activations, initializers, regularizers, constraints
from keras.layers import Input
from keras.layers import ActivityRegularization
tf.compat.v1.disable_v2_behavior()
class Attention(tf.keras.layers.Layer):
def __init__(self,hidden ,init='glorot_uniform',activation='linear',W_regularizer=None,b_regularizer=None,W_constraint=None,**kwargs):
self.init = initializers.get(init)
self.activation = activations.get(activation)
self.W_regularizer = regularizers.get(W_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.hidden=hidden
super(Attention, self).__init__(**kwargs)
def build(self, input_shape):
input_dim = input_shape[-1]
self.input_length = input_shape[1]
self.W0 = self.add_weight(name ='{}_W1'.format(self.name), shape = (input_dim, self.hidden), initializer = 'glorot_uniform', trainable=True) # Keras 2 API
self.W = self.add_weight( name ='{}_W'.format(self.name), shape = (self.hidden, 1), initializer = 'glorot_uniform', trainable=True)
self.b0 = K.zeros((self.hidden,), name='{}_b0'.format(self.name))
self.b = K.zeros((1,), name='{}_b'.format(self.name))
# self.trainable_weights = [self.W0,self.W,self.b,self.b0]
self.regularizers =[]
if self.W_regularizer:
self.W_regularizer.set_param(self.W)
self.regularizers.append(self.W_regularizer)
if self.b_regularizer:
self.b_regularizer.set_param(self.b)
self.regularizers.append(self.b_regularizer)
self.constraints = {}
if self.W_constraint:
self.constraints[self.W0] = self.W_constraint
self.constraints[self.W] = self.W_constraint
super(Attention, self).build(input_shape)
def call(self,x,mask=None):
attmap = self.activation(K.dot(x, self.W0)+self.b0)
print("self.b.shape=",self.b.shape)
attmap = K.dot(attmap, self.W) + self.b
print("attmap.shape=",attmap.shape)
#till now it was for attention fully connected network/dot product
attmap = K.reshape(attmap, (-1, self.input_length))
attmap = K.softmax(attmap)
print("attmap.shape1=",attmap.shape)
print("x.shape1=",x.shape)
dense_representation = K.batch_dot(attmap, x, axes=(1, 1))
print("dense_representation.shape=",dense_representation.shape)
out = K.concatenate([dense_representation, attmap],axis=1)
print("out.shape=",out.shape)
return out
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1] + input_shape[1])
def get_config(self):
config = {'init': 'glorot_uniform',
'activation': self.activation.__name__,
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
'hidden': self.hidden if self.hidden else None}
base_config = super(Attention, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class attention_flatten(tf.keras.layers.Layer):
def __init__(self, keep_dim, **kwargs):
self.keep_dim = keep_dim
super(attention_flatten, self).__init__(**kwargs)
def build(self, input_shape):
pass
def compute_output_shape(self, input_shape):
if not all(input_shape[1:]):
raise Exception('The shape of the input to "Flatten" '
'is not fully defined '
'(got ' + str(input_shape[1:]) + '. '
'Make sure to pass a complete "input_shape" '
'or "batch_input_shape" argument to the first '
'layer in your model.')
return (input_shape[0], self.keep_dim)
def call(self, x, mask=None):
x=x[1:,:self.keep_dim]
return (x)
def set_up_model_up():
print('building model')
seq_input_shape = (2000,4,)
nb_filter = 64
filter_length = 6
input_shape = (2000,4,)
attentionhidden = 256
seq_input = Input(shape = seq_input_shape, name = 'seq_input')
convul1 = tf.keras.layers.Convolution1D(filters = nb_filter,
kernel_size = filter_length,
padding = 'valid',
activation = 'relu',
kernel_constraint = maxnorm(3),
)
# modil = Sequential()
pool_ma1 = keras.layers.MaxPooling1D(pool_size = 3)
dropout1 = Dropout(0.5977908689086315)
dropout2 = Dropout(0.30131233477637737)
decoder = Attention(hidden = attentionhidden, activation = 'linear')
dense1 = tf.keras.layers.Dense(1)
dense2 = tf.keras.layers.Dense(256)
output_1 = pool_ma1(convul1(seq_input))
output_2 = (output_1)
output_6=tf.reshape(output_2, [1,1995,64])
print("output_2's'shap[2]e=",output_2.shape[2])
print("output_2's'shape=",output_2.shape)
att_decoder = decoder(output_6)
# tf.slice(output_6, begin, size, name=None)
output_3 = attention_flatten(3)(att_decoder)
output_2=tf.reshape(output_2, [1,3, 42560])
output_4 = dense1(((output_2)))
output_4=tf.keras.layers.Flatten()(output_4)
print("output_3p.shape=",output_3.shape)
print("output_4p.shape=",output_4.shape)
all_outp = K.concatenate([output_3, output_4],axis=0)
print(all_outp)
print("all_outp.shape",all_outp.shape)
output_5 = dense2(all_outp)
tf.keras.layers.Add()
output_f = tf.keras.layers.Activation('sigmoid')(output_5)
output_c=tf.keras.layers.Add()(output_f)
modil=tf.keras.models.Model(inputs = seq_input, outputs = output_f)
modil.build(input_shape=input_shape)
modil.compile(loss = 'binary_crossentropy', optimizer = 'nadam', metrics = ['accuracy'])
print (modil.summary())
print("len(modil number of layers)",len(modil.layers))
return modil
def test(n_estimators = 16):
model = set_up_model_up()
model.save_weights('Sequential_model_weights.h5')
model.load_weights('Sequential_model_weights.h5',by_name=True)
X_test = np.load('/content/drive/MyDrive/X_test.npy', mmap_mode='r')
y_test = np.load('/content/drive/MyDrive/y_test.npy', mmap_mode='r')
ensemble = np.zeros(len(X_test))
for i in range(n_estimators):
print ('testing', i, 'model')
print ('model shape is', model.summary)
model.load_weights('/content/drive/MyDrive/model/bestmodel_split_chr_GD_'+ str(i) + '.hdf5')
print ('model shape after loading is', model.summary)
print ('Predicting...')
print ('testing',X_test.shape)
print (len(model.layers))
# y_score = model.predict(np.expand_dims(np.array(X_test, dtype=np.float32), 0), verbose = 1, batch_size = 256)
formatmul= np.empty((3,2000,4), dtype=object)
for x in range(0, 2):
for y in range(0, 1999):
for z in range(0, 3):
formatmul[x][y][z]=X_test[x][y][z]
# y_score = model.predict(X_test).reshape(665,-1), verbose = 1, batch_size = 256)
print("model.output_shape",model.output_shape)
print("model.input_shape",model.input_shape)
# y_score = model.predict(formatmul, batch_size=42560)
y_score = model.predict(np.array(formatmul, dtype=np.float32), batch_size =64)
y_pred = []
for item in y_score:
y_pred.append(item[0])
y_pred = np.array(y_pred)
ensemble += y_pred
ensemble /= n_estimators
np.save('/content/drive/MyDrive/test_result/y_test', y_test)
np.save('/content/drive/MyDrive/test_result/y_pred', ensemble)
auroc = roc_auc_score(y_test, ensemble)
aupr = average_precision_score(y_test, ensemble)
print ('auroc', auroc)
print ('aupr' , aupr)
test(n_estimators = 16)
Stack trace:
WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term
building model
output_2's'shap[2]e= 64
output_2's'shape= (?, 665, 64)
self.b.shape= (1,)
attmap.shape= (1, 1995, 1)
attmap.shape1= (1, 1995)
x.shape1= (1, 1995, 64)
dense_representation.shape= (1, 64)
out.shape= (1, 2059)
output_3p.shape= (0, 3)
output_4p.shape= (1, 3)
Tensor("concat:0", shape=(1, 3), dtype=float32)
all_outp.shape (1, 3)
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
conv1d (Conv1D) (None, 1995, 64) 1600 seq_input[0][0]
__________________________________________________________________________________________________
tf_op_layer_Reshape (TensorFlow [(1, 1995, 64)] 0 max_pooling1d[0][0]
__________________________________________________________________________________________________
tf_op_layer_Reshape_1 (TensorFl [(1, 3, 42560)] 0 max_pooling1d[0][0]
__________________________________________________________________________________________________
attention (Attention) (1, 2059) 16897 tf_op_layer_Reshape[0][0]
__________________________________________________________________________________________________
dense (Dense) (1, 3, 1) 42561 tf_op_layer_Reshape_1[0][0]
__________________________________________________________________________________________________
attention_flatten (attention_fl (0, 3) 0 attention[0][0]
__________________________________________________________________________________________________
flatten (Flatten) (1, 3) 0 dense[0][0]
__________________________________________________________________________________________________
tf_op_layer_concat (TensorFlowO [(1, 3)] 0 attention_flatten[0][0]
flatten[0][0]
__________________________________________________________________________________________________
dense_1 (Dense) (1, 256) 1024 tf_op_layer_concat[0][0]
__________________________________________________________________________________________________
activation (Activation) (1, 256) 0 dense_1[0][0]
==================================================================================================
Total params: 62,082
Trainable params: 62,082
Non-trainable params: 0
__________________________________________________________________________________________________
None
len(modil number of layers) 10
testing 0 model
model shape is <bound method Model.summary of <tensorflow.python.keras.engine.functional.Functional object at 0x7f5faf959350>>
model shape after loading is <bound method Model.summary of <tensorflow.python.keras.engine.functional.Functional object at 0x7f5faf959350>>
Predicting...
testing (172832, 2000, 4)
10
model.output_shape (1, 256)
model.input_shape (None, 2000, 4)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:2426: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.
warnings.warn('`Model.state_updates` will be removed in a future version. '
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-1-b9a05eeb9fa1> in <module>()
240 # if __name__ == '__main__':
241 # set_up_model_up()
--> 242 test(n_estimators = 16)
5 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py in __call__(self, *args, **kwargs)
1480 ret = tf_session.TF_SessionRunCallable(self._session._session,
1481 self._handle, args,
-> 1482 run_metadata_ptr)
1483 if run_metadata:
1484 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

Tensorflow nonsense reshape values

When using tensorflow.keras.layers.Reshape I've been getting strange errors. Where is it getting the 47409408 value from? 207936 corresponds to the correct size (69312*3).
A weird aspect is if I put a flatten layer before the reshape it works.
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 304, 228, 3) 30
_________________________________________________________________
reshape (Reshape) (None, 69312, 3) 0
=================================================================
Total params: 30
Trainable params: 30
Non-trainable params: 0
____________________________________
(0) Invalid argument: Input to reshape is a tensor with 207936 values, but the requested shape has 47409408
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from PIL import Image
from tensorflow.keras import datasets, layers, models, preprocessing
import os
from natsort import natsorted
from tensorflow.keras.models import Model
BATCH_SIZE = 32
EPOCHS = 15
LEARNING_RATE = 1e-4
#jpegs with values from 0 to 255
img_dir = ".../normalized_imgs"
# .npy files of size (69312,3)
pts_dir = ".../normalized_pts"
img_files = [os.path.join(img_dir, f)
for f in natsorted(os.listdir(img_dir))]
pts_files = [os.path.join(pts_dir, f)
for f in natsorted(os.listdir(pts_dir))]
img = Image.open(img_files[0])
pts = np.load(pts_files[0])
def parse_img_input(img_file, pts_file):
def _parse_input(img_file, pts_file):
# get image
d_filepath = img_file.numpy().decode()
d_image_decoded = tf.image.decode_jpeg(tf.io.read_file(d_filepath), channels=1)
d_image = tf.cast(d_image_decoded, tf.float32) / 255.0
# get numpy data
pts_filepath = pts_file.numpy().decode()
pts = np.load(pts_filepath, allow_pickle= True)
print("d_image ",d_image.shape )
return d_image, pts
return tf.py_function(_parse_input,
inp=[img_file, pts_file],
Tout=[tf.float32, tf.float32])
class SimpleCNN(Model):
def __init__(self):
super(SimpleCNN, self).__init__()
input_shape = (img.size[0], img.size[1], 1)
self.model = model = models.Sequential()
model.add(tf.keras.Input(shape= input_shape))
model.add(layers.Conv2D(3, (3,3), padding='same'))
model.add(layers.Reshape((pts.shape[0], pts.shape[1])))
# split input data into train, test sets
X_train_file, X_test_file, y_train_file, y_test_file = train_test_split(img_files, pts_files,
test_size=0.2,
random_state=0)
model = SimpleCNN()
dataset_train = tf.data.Dataset.from_tensor_slices((X_train_file, y_train_file))
dataset_train = dataset_train.map(parse_img_input)
dataset_test = tf.data.Dataset.from_tensor_slices((X_test_file, y_test_file))
dataset_test = dataset_test.map(parse_img_input)
model.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE), loss= tf.losses.MeanSquaredError(), metrics= [tf.keras.metrics.get('accuracy')])
model.fit(dataset_train, epochs=EPOCHS, shuffle=True, validation_data= dataset_test)

I am getting OOM while running PRE TRAINED Bert Model with new dataset with 20k

I have pre trained model with Accuracy of 96 with 2 epochs and I am trying to use that model on new dataset of 20k tweets for sentiment analysis. while doing that I am getting below error.
I haven't faced any issues while training model with same size of data but not sure why I am getting while using that model.
ResourceExhaustedError: OOM when allocating tensor with shape[1079190,768] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:ResourceGather]
Code:
from transformers import BertTokenizer, TFBertForSequenceClassification
from transformers import InputExample,InputFeatures
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model.summary()
Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
bert (TFBertMainLayer) multiple 109482240
_________________________________________________________________
dropout_37 (Dropout) multiple 0
_________________________________________________________________
classifier (Dense) multiple 1538
=================================================================
Total params: 109,483,778
Trainable params: 109,483,778
Non-trainable params: 0
train = tf.keras.preprocessing.text_dataset_from_directory('aclImdb/train',batch_size=30000,validation_split=0.2,
subset='training',seed=123)
test = tf.keras.preprocessing.text_dataset_from_directory('aclImdb/train',batch_size=30000,validation_split=0.2,
subset='validation',seed=123)
Found 25000 files belonging to 2 classes.
Using 20000 files for training.
Found 25000 files belonging to 2 classes.
Using 5000 files for validation.
for data in train.take(1):
train_feat = data[0].numpy()
train_lab = data[1].numpy()
train = pd.DataFrame([train_feat,train_lab]).T
train.columns = ['DATA_COLUMN','LABEL_COLUMN']
train['DATA_COLUMN'] = train['DATA_COLUMN'].str.decode('utf-8')
for data in test.take(1):
test_feat = data[0].numpy()
test_lab = data[1].numpy()
test = pd.DataFrame([test_feat,test_lab]).T
test.columns = ['DATA_COLUMN','LABEL_COLUMN']
test['DATA_COLUMN'] = test['DATA_COLUMN'].str.decode('utf-8')
test.head()
def convert_data_to_examples(train, test, DATA_COLUMN, LABEL_COLUMN):
train_InputExamples = train.apply(lambda x: InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this case
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
validation_InputExamples = test.apply(lambda x: InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this case
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
return train_InputExamples, validation_InputExamples
train_InputExamples, validation_InputExamples = convert_data_to_examples(train,
test,
'DATA_COLUMN',
'LABEL_COLUMN')
def convert_examples_to_tf_dataset(examples, tokenizer, max_length=128):
features = [] # -> will hold InputFeatures to be converted later
for e in examples:
# Documentation is really strong for this method, so please take a look at it
input_dict = tokenizer.encode_plus(
e.text_a,
add_special_tokens=True,
max_length=max_length, # truncates if len(s) > max_length
return_token_type_ids=True,
return_attention_mask=True,
pad_to_max_length=True, # pads to the right by default # CHECK THIS for pad_to_max_length
truncation=True
)
input_ids, token_type_ids, attention_mask = (input_dict["input_ids"],
input_dict["token_type_ids"], input_dict['attention_mask'])
features.append(
InputFeatures(
input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, label=e.label
)
)
def gen():
for f in features:
yield (
{
"input_ids": f.input_ids,
"attention_mask": f.attention_mask,
"token_type_ids": f.token_type_ids,
},
f.label,
)
return tf.data.Dataset.from_generator(
gen,
({"input_ids": tf.int32, "attention_mask": tf.int32, "token_type_ids": tf.int32}, tf.int64),
(
{
"input_ids": tf.TensorShape([None]),
"attention_mask": tf.TensorShape([None]),
"token_type_ids": tf.TensorShape([None]),
},
tf.TensorShape([]),
),
)
DATA_COLUMN = 'DATA_COLUMN'
LABEL_COLUMN = 'LABEL_COLUMN'
# We can call the functions we created above with the following lines:
train_InputExamples,validation_InputExamples = convert_data_to_examples(train,test,DATA_COLUMN,LABEL_COLUMN)
train_data = convert_examples_to_tf_dataset(list(train_InputExamples),tokenizer)
train_data = train_data.shuffle(100).batch(32).repeat(2)
validation_data = convert_examples_to_tf_dataset(list(validation_InputExamples),tokenizer)
validation_data = validation_data.shuffle(100).batch(32)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])
model.fit(train_data, epochs=2, validation_data=validation_data)
#this is my new data with 20k rows on which I want to run pretrained model:
tweets_list = statement_df['sentiment'].tolist()
#this part of the code is serving that purpose
tf_batch = tokenizer(tweets_list, max_length=128, padding=True, truncation=True, return_tensors='tf')
#print(tf_batch)
tf_outputs = model(tf_batch) # this line is thrown OOM issues
tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
labels = ['Negative','Positive']
label = tf.argmax(tf_predictions, axis=1)
label = label.numpy()
for i in range(len(tweets_list)):
print(tweets_list[i], ": \n", labels[label[i]])
Error:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py in gather_v2(params, indices, validate_indices, axis, batch_dims, name)
4830 name=name,
4831 axis=axis,
-> 4832 batch_dims=batch_dims)
4833
4834
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py in gather(***failed resolving arguments***)
4811 # TODO(apassos) find a less bad way of detecting resource variables
4812 # without introducing a circular dependency.
-> 4813 return params.sparse_read(indices, name=name)
4814 except AttributeError:
4815 return gen_array_ops.gather_v2(params, indices, axis, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/resource_variable_ops.py in sparse_read(self, indices, name)
701 variable_accessed(self)
702 value = gen_resource_variable_ops.resource_gather(
--> 703 self._handle, indices, dtype=self._dtype, name=name)
704
705 if self._dtype == dtypes.variant:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_resource_variable_ops.py in resource_gather(resource, indices, dtype, batch_dims, validate_indices, name)
547 return _result
548 except _core._NotOkStatusException as e:
--> 549 _ops.raise_from_not_ok_status(e, name)
550 except _core._FallbackException:
551 pass
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name)
6860 message = e.message + (" name: " + name if name is not None else "")
6861 # pylint: disable=protected-access
-> 6862 six.raise_from(core._status_to_exception(e.code, message), None)
6863 # pylint: enable=protected-access
6864
/usr/local/lib/python3.7/dist-packages/six.py in raise_from(value, from_value)
ResourceExhaustedError: OOM when allocating tensor with shape[1079190,768] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:ResourceGather]