Related
I am trying to set up a ConvLSTM2D architecture that takes as input and output the data with the following shapes:
Input shape: (1000, 10, 100, 100, 1)
Output shape: (1000, 5, 100, 100, 1)
As you can notice, the difference between these input and output shapes are timesteps. Is there any way I can set up such an architecture?
The following is the one that I am using right now, which doesn't allow me to specify input / output timesteps separately.
inputs = layers.Input(shape=(None, x_train.shape[2], x_train.shape[2], 1 ))
x = layers.ConvLSTM2D(filters=64, kernel_size=(5,5), padding="same", return_sequences=True, activation="relu")(inputs)
x = layers.BatchNormalization()(x)
x = layers.ConvLSTM2D(filters=64, kernel_size=(3,3), padding="same", return_sequences=True, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.ConvLSTM2D(filters=64, kernel_size=(1,1), padding="same", return_sequences=True, activation="relu")(x)
outputs = layers.Conv3D(filters=1, kernel_size=(3,3,3), activation="sigmoid", padding="same")(x)
model = keras.models.Model(inputs, outputs)
model.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam())
model.summary()
Appreciate any help!
I've been trying to translate some PyTorch code to TensorFlow 2, but the TF2 code is around 10 times slower. I've tried looking at where this might come from, and as far as I can tell it comes from the tape.gradient call (performance was the same with keras' .fit function). I've tried to use different data loaders, ways of declaring the model, installations, etc... and the results have been consistent.
Any explanation / solution as to why this is happening would be much appreciated.
Here is a minimalist version of the TF2 code:
import time
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 120, 18, 1)
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
train_dataset = train_dataset.batch(256)
# Create a small model
model = tf.keras.Sequential([
layers.Conv1D(64, kernel_size=7, strides=3, padding="same", activation="relu"),
layers.Conv1D(64, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(256, kernel_size=1, strides=1, padding="same", activation="relu"),
layers.GlobalAveragePooling2D(),
layers.Flatten(),
layers.Dense(128, use_bias=True, activation="relu"),
layers.Dense(32, use_bias=True, activation="relu"),
layers.Dense(1, activation='sigmoid', use_bias=True),
])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=5e-4)
#tf.function
def train_step(data_batch, label_batch):
with tf.GradientTape() as tape:
y_pred = model(data_batch)
loss = tf.keras.losses.MSE(labels_batch, y_pred)
gradients = tape.gradient(loss, model.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
step_times = []
for epoch in range(20):
for data_batch, labels_batch in train_dataset:
step_start_time = time.perf_counter()
train_step(data_batch, labels_batch)
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
And the PyTorch equivalent:
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.backends.cudnn.benchmark = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 18, 120)
# Create a small model
class Model(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv1d(18, 64, kernel_size=7, stride=3, padding=3)
self.conv2 = nn.Conv1d(64, 64, kernel_size=5, stride=2, padding=2)
self.conv3 = nn.Conv1d(64, 128, kernel_size=5, stride=2, padding=2)
self.conv4 = nn.Conv1d(128, 128, kernel_size=3, stride=1, padding=1)
self.conv5 = nn.Conv1d(128, 128, kernel_size=3, stride=1, padding=1)
self.conv6 = nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(256, 128)
self.fc2 = nn.Linear(128, 32)
self.fc3 = nn.Linear(32, 1)
def forward(self, inputs):
x = F.relu(self.conv1(inputs))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = F.relu(self.conv5(x))
x = F.relu(self.conv6(x))
x = x.mean(2)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = torch.sigmoid(self.fc3(x))
return x
model = Model()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4)
loss_fn = torch.nn.MSELoss()
batch_size = 256
train_steps_per_epoch = train_data.shape[0] // batch_size
step_times = []
for epoch in range(20):
for step in range(train_steps_per_epoch):
batch_start, batch_end = step * batch_size, (step+1) * batch_size
data_batch = torch.FloatTensor(train_data[batch_start:batch_end]).to(device)
labels_batch = torch.FloatTensor(train_labels[batch_start:batch_end]).to(device)
step_start_time = time.perf_counter()
optimizer.zero_grad()
y_pred = model(data_batch)
loss = loss_fn(labels_batch, torch.squeeze(y_pred))
loss.backward()
optimizer.step()
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
You're using tf.GradientTape correctly, but both your models and data are different in the snippets you provided.
Here is the TF code that uses the same data and model architecture as your Pytorch model.
import time
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 120, 18)
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
train_dataset = train_dataset.batch(256)
model = tf.keras.Sequential([
layers.Conv1D(64, kernel_size=7, strides=3, padding="same", activation="relu"),
layers.Conv1D(64, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(256, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.GlobalAveragePooling1D(),
layers.Dense(128, use_bias=True, activation="relu"),
layers.Dense(32, use_bias=True, activation="relu"),
layers.Dense(1, activation='sigmoid', use_bias=True),
])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=5e-4)
#tf.function
def train_step(data_batch, label_batch, model):
with tf.GradientTape() as tape:
y_pred = model(data_batch, training=True)
loss = tf.keras.losses.MSE(labels_batch, y_pred)
gradients = tape.gradient(loss, model.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
step_times = []
for epoch in range(20):
for data_batch, labels_batch in train_dataset:
step_start_time = time.perf_counter()
train_step(data_batch, labels_batch, model)
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
So, in reality, TF is 3 times faster than Pytorch: 0.035s vs 0.112s
I keep on getting the same error I think I have the problem with the input shapes, Please help me
X = Features.iloc[: ,:-1].values
Y = Features['labels'].values
As this is a multiclass classification problem onehotencoding our Y.
encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y).reshape(-1,1)).toarray()
# splitting data
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, shuffle=True)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
# scaling our data with sklearn's Standard scaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
# making our data compatible to model.
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_train.shape[1]
The model
model=Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Flatten())
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()
rlrp = ReduceLROnPlateau(monitor='val_loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
model.fit(x_train, y_train, batch_size=64, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])
#history=model.fit(x_train, y_train, callbacks=[rlrp])
I'm getting the error when trying to fit the model.
Here is your code running.
Since you did not provide any sample data, I had to fake some data and I will explain what is the issue.
Your y_train must have a depth of 8 since your softmax layer is 8.
If you want to get the same error in my code, change
y_train = tf.one_hot(tf.random.uniform(shape=[1000],minval=0, maxval=2, dtype=tf.int32),8) #change the depth to 7 and you will see your error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from matplotlib import pyplot as plt
import numpy as np
x_train = tf.random.normal(shape=(1000,10,1), dtype = tf.float32)
x_test = tf.random.normal(shape=(100,10,1), dtype = tf.float32)
y_train = tf.one_hot(tf.random.uniform(shape=[1000],minval=0, maxval=2, dtype=tf.int32),8)
y_test = tf.one_hot(tf.random.uniform(shape=[100],minval=0, maxval=2, dtype=tf.int32),8)
tf.print(y_train)
model=tf.keras.Sequential()
model.add(layers.Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(layers.MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(layers.Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(layers.Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(layers.Dropout(0.2))
model.add(layers.Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(layers.Flatten())
model.add(layers.Dense(units=32, activation='relu'))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()
rlrp = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
model.fit(x_train, y_train, batch_size=64, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])
#history=model.fit(x_train, y_train, callbacks=[rlrp])
encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y).reshape(-1,1)).toarray()
# splitting data
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, shuffle=True)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
# scaling our data with sklearn's Standard scaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
# making our data compatible to model.
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_train.shape[1]
My goal is to develop an app that does image classification. Keras model seems to work good but after i try to convert to Tensorflow Lite, output is completely wrong.
These are the layers of the model
model = Sequential([
layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])
This is how i save and then convert the model.
model.save('/content/drive/My Drive/my_model.h5')
TF_LITE_MODEL_NAME = "tf_lite_model.tflite"
tf_lite_converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = tf_lite_converter.convert()
tflite_model_name = TF_LITE_MODEL_NAME
open(tflite_model_name, "wb").write(tflite_model)
Output is like this:
[[ 1.4364377 -0.02920453 -0.149581 -0.7537567 ]]
Why this problem occur?
Edit (Code that gives random input to the model)
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="/content/tf_lite_model.tflite")
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Test model on random input data.
input_shape = input_details[0]['shape']
input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)
Below is the code i am using for training some gestures. the directory for training data is as follows
'E:\build\set_1\training\palm\seq_01','E:\build\set_1\training\palm\seq_02' and so on.
The error i am follwong is on the last lines. I have tried both of the two lines as provided but they are giving error as Invalid Argument error. I am running this code on jupyter notebook.
import tensorflow as tf
from tensorflow import keras
from keras_preprocessing.image import ImageDataGenerator
path = 'E:\build\set_1\training'
training_datagen = ImageDataGenerator(rescale = 1./255)
TRAINING_DIR = 'E:/build/set_1/training/'
train_generator = training_datagen.flow_from_directory(
TRAINING_DIR,
target_size = (150,150),
class_mode= 'categorical',
batch_size=64
)
VALIDATION_DIR = "E:/build/set_1/test/"
validation_datagen = ImageDataGenerator(rescale = 1./255)
validation_generator = training_datagen.flow_from_directory(
VALIDATION_DIR,
target_size=(150,150),
class_mode='categorical',
batch_size=64
)
model = tf.keras.models.Sequential([
# Note the input shape is the desired size of the image 150x150 with 3 bytes color
# This is the first convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(150, 150, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
# The second convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The third convolution
tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The fourth convolution
tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# Flatten the results to feed into a DNN
tf.keras.layers.Flatten(),
tf.keras.layers.Dropout(0.5),
# 512 neuron hidden layer
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(3, activation='softmax')
])
model.summary()
model.compile(loss='categorical_crossentropy',optimizer = 'rmsprop',
metrics= ['accuracy'])
history = model.fit_generator(train_generator,steps_per_epoch = train_generator.samples//train_generator.batch_size,epochs = 30,validation_data = validation_generator,validation_steps=validation_generator.samples//validation_generator.batch_size)
history = model.fit(train_generator, epochs=25, validation_data = validation_generator, verbose = 1)