Related
everything was going well with these cnn to classify MarkovTransitionField sequences, when I was using tensorflow, but then I changed to tensorflow-gpu and all predictions return same value, so model isn't learning (but is fast)
#model 1
def model_1_signal():
model = Sequential()
model.add(Conv2D(73, (5,5), strides = (2,2), activation = 'relu',
padding = 'same', input_shape = (145,5,5),
kernel_initializer = 'he_normal',
bias_initializer = 'zeros'))
model.add(Conv2D(73, (5,5), strides = (2,2), activation = 'relu',
padding = 'same', kernel_initializer = 'he_normal',
bias_initializer = 'zeros'))
model.add(Flatten())
model.add(Dense(2, activation = 'sigmoid',
kernel_initializer = 'glorot_uniform',
bias_initializer = 'zeros'))
model.compile(loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
return model
def model_2_signal():
model = Sequential()
model.add(Conv2D(73, (5,5), activation = 'relu',
padding = 'same', input_shape = (145,5,5)))
model.add(Dropout(0.2))
model.add(Conv2D(73, (5,5), strides = (2,2), activation = 'relu',
padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv2D(73, (5,5), strides = (2,2), activation = 'relu',
padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv2D(73, (5,5), strides = (2,2), activation = 'relu',
padding = 'same'))
model.add(MaxPooling2D(pool_size = (1,1),strides = 3))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation = 'sigmoid'))
model.compile(loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
return model
EPOCHS = 300
BATCH_SIZE = 15
train_X, val_X, train_y, val_y, train_date, val_date = train_test_split(bullish_episodes_img,y,date,test_size = 0.13, shuffle = False)
val_X, test_X, val_y, test_y, val_date, test_date = train_test_split(val_X, val_y, val_date, test_size = 0.38, shuffle = False)
model_1 = model_1_signal()
model_1.fit(train_X,train_y, validation_data=(val_X,val_y),
epochs = EPOCHS, batch_size = BATCH_SIZE, verbose = 2,
shuffle = True)
yhat_model1 = model_1.predict(test_X)
yhat_model1 = np.where(yhat_model1 >= 0.5, 1, 0)
df1 = pd.DataFrame({'signal':yhat_model1[:,1],'test':test_y[:,1],'time':test_date})
df1 = df1.sort_values(by='time')
#model_1.save('/home/f320x/Documents/AT/Py (1)/final_project/new_standard/spyder/spyder/EURCAD/signal_model/signal_model1')
model_2 = model_2_signal()
model_2.fit(train_X,train_y, validation_data=(val_X,val_y),
epochs = EPOCHS, batch_size = BATCH_SIZE, verbose = 2,
shuffle = True)
yhat_model2 = model_2.predict(test_X)
yhat_model2 = np.where(yhat_model2 >= 0.5, 1, 0)
df2 = pd.DataFrame({'signal':yhat_model2[:,1],'test':test_y[:,1],'time':test_date})
df2 = df2.sort_values(by='time')
before transform data to MarkovTransitionField, i scaled all values between 0 and 1 and there's no nan in dataset.
does somebody has a hint?
BATCH_SIZE = 32 # ADVISED NOT TO CHANGE THIS
N_PAST = 10 # DO NOT CHANGE THIS
N_FUTURE = 10 # DO NOT CHANGE THIS
SHIFT = 1
model = tf.keras.models.Sequential([
tf.keras.layers.Conv1D(filters=32, kernel_size=5,
strides=1, padding="causal",
activation="relu",
input_shape=[None, 1]),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.Dense(30, activation="relu"),
tf.keras.layers.Dense(10, activation="relu"),
tf.keras.layers.Dense(N_FEATURES)
])
I build a time-siries forcasting model but i can't understand how to handle this.
which layer should i use to make this available?
Model input shape must be (BATCH_SIZE, N_PAST = 10, N_FEATURES = 1)
Model output shape must be (BATCH_SIZE, N_FUTURE = 10, N_FEATURES = 1)
the batch_size is not specified in the model.
model = tf.keras.models.Sequential([
tf.keras.layers.Conv1D(filters=32, kernel_size=5,
strides=1, padding="causal",
activation="relu",
input_shape=[N_PAST,1]),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.Dense(30, activation="relu"),
tf.keras.layers.Dense(10, activation="relu"),
tf.keras.layers.Dense(N_FUTURE)
])
You don't need to specify batch_size in input layer. Just change input shape as follows:
input_shape=[N_PAST,1]
I've been trying to translate some PyTorch code to TensorFlow 2, but the TF2 code is around 10 times slower. I've tried looking at where this might come from, and as far as I can tell it comes from the tape.gradient call (performance was the same with keras' .fit function). I've tried to use different data loaders, ways of declaring the model, installations, etc... and the results have been consistent.
Any explanation / solution as to why this is happening would be much appreciated.
Here is a minimalist version of the TF2 code:
import time
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 120, 18, 1)
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
train_dataset = train_dataset.batch(256)
# Create a small model
model = tf.keras.Sequential([
layers.Conv1D(64, kernel_size=7, strides=3, padding="same", activation="relu"),
layers.Conv1D(64, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(256, kernel_size=1, strides=1, padding="same", activation="relu"),
layers.GlobalAveragePooling2D(),
layers.Flatten(),
layers.Dense(128, use_bias=True, activation="relu"),
layers.Dense(32, use_bias=True, activation="relu"),
layers.Dense(1, activation='sigmoid', use_bias=True),
])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=5e-4)
#tf.function
def train_step(data_batch, label_batch):
with tf.GradientTape() as tape:
y_pred = model(data_batch)
loss = tf.keras.losses.MSE(labels_batch, y_pred)
gradients = tape.gradient(loss, model.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
step_times = []
for epoch in range(20):
for data_batch, labels_batch in train_dataset:
step_start_time = time.perf_counter()
train_step(data_batch, labels_batch)
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
And the PyTorch equivalent:
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.backends.cudnn.benchmark = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 18, 120)
# Create a small model
class Model(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv1d(18, 64, kernel_size=7, stride=3, padding=3)
self.conv2 = nn.Conv1d(64, 64, kernel_size=5, stride=2, padding=2)
self.conv3 = nn.Conv1d(64, 128, kernel_size=5, stride=2, padding=2)
self.conv4 = nn.Conv1d(128, 128, kernel_size=3, stride=1, padding=1)
self.conv5 = nn.Conv1d(128, 128, kernel_size=3, stride=1, padding=1)
self.conv6 = nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(256, 128)
self.fc2 = nn.Linear(128, 32)
self.fc3 = nn.Linear(32, 1)
def forward(self, inputs):
x = F.relu(self.conv1(inputs))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = F.relu(self.conv5(x))
x = F.relu(self.conv6(x))
x = x.mean(2)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = torch.sigmoid(self.fc3(x))
return x
model = Model()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4)
loss_fn = torch.nn.MSELoss()
batch_size = 256
train_steps_per_epoch = train_data.shape[0] // batch_size
step_times = []
for epoch in range(20):
for step in range(train_steps_per_epoch):
batch_start, batch_end = step * batch_size, (step+1) * batch_size
data_batch = torch.FloatTensor(train_data[batch_start:batch_end]).to(device)
labels_batch = torch.FloatTensor(train_labels[batch_start:batch_end]).to(device)
step_start_time = time.perf_counter()
optimizer.zero_grad()
y_pred = model(data_batch)
loss = loss_fn(labels_batch, torch.squeeze(y_pred))
loss.backward()
optimizer.step()
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
You're using tf.GradientTape correctly, but both your models and data are different in the snippets you provided.
Here is the TF code that uses the same data and model architecture as your Pytorch model.
import time
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 120, 18)
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
train_dataset = train_dataset.batch(256)
model = tf.keras.Sequential([
layers.Conv1D(64, kernel_size=7, strides=3, padding="same", activation="relu"),
layers.Conv1D(64, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(256, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.GlobalAveragePooling1D(),
layers.Dense(128, use_bias=True, activation="relu"),
layers.Dense(32, use_bias=True, activation="relu"),
layers.Dense(1, activation='sigmoid', use_bias=True),
])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=5e-4)
#tf.function
def train_step(data_batch, label_batch, model):
with tf.GradientTape() as tape:
y_pred = model(data_batch, training=True)
loss = tf.keras.losses.MSE(labels_batch, y_pred)
gradients = tape.gradient(loss, model.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
step_times = []
for epoch in range(20):
for data_batch, labels_batch in train_dataset:
step_start_time = time.perf_counter()
train_step(data_batch, labels_batch, model)
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
So, in reality, TF is 3 times faster than Pytorch: 0.035s vs 0.112s
I'm trying to use VGG-19 model as a semantic segmentation model i.e. pixel-wise classification. I have the following dataset ready:
x_trn.shape, y_trn.shape, x_val.shape, y_val.shape
((3883, 128, 128, 3),
(3883, 128, 128, 10),
(1237, 128, 128, 3),
(1237, 128, 128, 10))
I have 3 Channels of Input Image, output has 10 possible classes, for each class the value can be 0 or 1. It's one-hot encoded already
I'm using the following model architecture:
model = VGG19(include_top=False,
weights=None,
input_tensor=Input(shape=(128,128,3)))
headModel = model.output
headModel = AveragePooling2D(pool_size=(4, 4))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(128, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(10, activation="softmax")(headModel)
combined_model = Model(inputs=model.input, outputs=headModel)
combined_model.compile(Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
Model Summary:
Input:
Output:
I'm not sure what is wrong here, but this gives shape mismatch error.
combined_model.fit(x=x_trn,
y=y_trn,
batch_size=10,
epochs=10,
verbose=1,
callbacks=callbacks,
validation_data=(x_val, y_val),
shuffle=True)
/home/ubuntu/anaconda3/envs/tensorflow2_latest_p37/gpu/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/home/ubuntu/anaconda3/envs/tensorflow2_latest_p37/gpu/lib/python3.7/site-packages/tensorflow/python/keras/backend.py:4687 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/home/ubuntu/anaconda3/envs/tensorflow2_latest_p37/gpu/lib/python3.7/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 128, 128, 10) and (None, 10) are incompatible
Conceptual error in the code. Pixel-wise classification would require the output size to match input size i.e output layer should be 128x128x10. Using Conv2DTranspose we can upsample back to 128x128
I did few changes like these:
model = VGG19(include_top=False,
weights=None,
input_tensor=Input(shape=(128,128,3)))
headModel = model.output
headModel = Conv2DTranspose(512, (3, 3), strides=(2, 2), padding="same")(headModel)
headModel = Dropout(0.2)(headModel)
headModel = Conv2DTranspose(256, (3, 3), strides=(2, 2), padding="same")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Conv2DTranspose(128, (3, 3), strides=(2, 2), padding="same")(headModel)
headModel = Dropout(0.7)(headModel)
headModel = Conv2DTranspose(64, (3, 3), strides=(2, 2), padding="same")(headModel)
headModel = Dropout(0.8)(headModel)
headModel = Conv2DTranspose(32, (3, 3), strides=(2, 2), padding="same")(headModel)
headModel = Conv2D(10, (1,1), activation = 'softmax')(headModel)
combined_model = Model(inputs=model.input, outputs=headModel)
I keep on getting the same error I think I have the problem with the input shapes, Please help me
X = Features.iloc[: ,:-1].values
Y = Features['labels'].values
As this is a multiclass classification problem onehotencoding our Y.
encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y).reshape(-1,1)).toarray()
# splitting data
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, shuffle=True)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
# scaling our data with sklearn's Standard scaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
# making our data compatible to model.
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_train.shape[1]
The model
model=Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Flatten())
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()
rlrp = ReduceLROnPlateau(monitor='val_loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
model.fit(x_train, y_train, batch_size=64, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])
#history=model.fit(x_train, y_train, callbacks=[rlrp])
I'm getting the error when trying to fit the model.
Here is your code running.
Since you did not provide any sample data, I had to fake some data and I will explain what is the issue.
Your y_train must have a depth of 8 since your softmax layer is 8.
If you want to get the same error in my code, change
y_train = tf.one_hot(tf.random.uniform(shape=[1000],minval=0, maxval=2, dtype=tf.int32),8) #change the depth to 7 and you will see your error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from matplotlib import pyplot as plt
import numpy as np
x_train = tf.random.normal(shape=(1000,10,1), dtype = tf.float32)
x_test = tf.random.normal(shape=(100,10,1), dtype = tf.float32)
y_train = tf.one_hot(tf.random.uniform(shape=[1000],minval=0, maxval=2, dtype=tf.int32),8)
y_test = tf.one_hot(tf.random.uniform(shape=[100],minval=0, maxval=2, dtype=tf.int32),8)
tf.print(y_train)
model=tf.keras.Sequential()
model.add(layers.Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(layers.MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(layers.Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(layers.Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(layers.Dropout(0.2))
model.add(layers.Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(layers.Flatten())
model.add(layers.Dense(units=32, activation='relu'))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()
rlrp = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
model.fit(x_train, y_train, batch_size=64, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])
#history=model.fit(x_train, y_train, callbacks=[rlrp])
encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y).reshape(-1,1)).toarray()
# splitting data
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, shuffle=True)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
# scaling our data with sklearn's Standard scaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
# making our data compatible to model.
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_train.shape[1]