Pytorch model performing worse than tensorflow - tensorflow

I have the following tensorflow model, that tries to predict a time series, based on lagging values.
I then tried to translate it to pytorch and it works fine but performs significantly worse. Is there any obvious difference between the two models that could contribute to pytorch performing worse? Any suggestions are greatly apprecaited.
tensorflow model:
early_stop = EarlyStopping(monitor='val_loss',
min_delta=1e-3,
patience=3,
verbose=2, mode='auto')
tbCallBack = PlotLossesKeras()
model = Sequential()
model.add(LSTM(50, input_shape=(look_back, 1)))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_x, train_y,
epochs=2000,
batch_size=20, verbose=1)
Pytorch Model:
class LSTMForecaster(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super().__init__()
self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
self.dropout = nn.Dropout(0.2)
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, input_seq):
lstm_out, _ = self.lstm(input_seq)
dropout = self.dropout(lstm_out)
predictions = self.linear(dropout)
return predictions
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model_1.parameters())
model_1 = LSTMForecaster(input_size=3, hidden_size=50, num_layers=1, output_size=1)
torch.manual_seed(45)
epochs = 2000
batch_size = 20
num_batches = len(train_x) // batch_size
train_x = train_x.to(device)
test_x = test_x.to(device)
train_y = train_y.to(device)
test_y = test_y.to(device)
for epoch in range(epochs):
for i in range(num_batches):
# Get the current batch of data
start = i * batch_size
end = start + batch_size
x_batch = train_x[start:end]
y_batch = train_y[start:end]
pred_y = model_1(x_batch)
loss = loss_fn(pred_y, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
model_1.eval()

Related

pytorch and tensorfelow lossfunction

I have tried getting Tensorflow and Pytorch CrossEntropyLoss but it returns different values and I don't know why. I find a solution for this problem
solution link
but i cant fix my two model
please help me
my tensorflow model feed forward neural network
model=keras.Sequential()
model.add(keras.layers.Input(shape=x_train[0].shape))
model.add(keras.layers.Dense(units=256,activation="relu", use_bias=True))
model.add(keras.layers.Dense(units=128,activation="relu", use_bias=True))
model.add(keras.layers.Dense(units=64,activation="relu", use_bias=True))
model.add(keras.layers.Dense(units=10,activation="softmax"))
epochs=15
# Compile the model
model.compile(
optimizer=tf.keras.optimizers.Adam(0.0001), # Utilize optimizer
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy'])
# Train the network
history1 = model.fit(
x_train,
y_train,
batch_size=64,
validation_split=0.1,
epochs=epochs,callbacks=[tf.keras.callbacks.TensorBoard(
log_dir="logs/image"
)])
my pytorch model feed forward neural network
input_size = 784
hidden_sizes = [256,128, 64]
output_size = 10
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
nn.ReLU(),
nn.Linear(hidden_sizes[0], hidden_sizes[1]),
nn.ReLU(),
nn.Linear(hidden_sizes[1], hidden_sizes[2]),
nn.ReLU(),
nn.Linear(hidden_sizes[2], output_size),
nn.Softmax())
criterion = nn.CrossEntropyLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)
logps = model(images) #log probabilities
loss = criterion(logps, labels) #calculate the NLL loss
optimizer = optim.Adam(model.parameters(), lr=0.0001)
time0 = time()
epochs = 15
for e in range(epochs):
running_loss = 0
running_loss_val = 0
for images, labels in trainloader:
# Flatten MNIST images into a 784 long vector
images = images.view(images.shape[0], -1)
optimizer.zero_grad()
output = model(images)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
else:
print("Epoch {} - Training loss: {} - validation loss: {}".format(e, running_loss/len(trainloader), running_loss_val/len(valloader)))
```
```

sklearn classification_report ValueError: Found input variables with inconsistent numbers of samples: [18, 576]

I'm working on a CNN classification problem. I used keras and a pre-trained model. Now I want to evaluate my model and need the precision, recall and f1-Score. When I use sklearn.metrics classification_report I get above error. I know where the numbers are coming from, first is the length of my test dataset in batches and second are the number of actual sampels (predictions) in there. However I don't know how to "convert" them.
See my code down below:
# load train_ds
train_ds = tf.keras.utils.image_dataset_from_directory(
directory ='/gdrive/My Drive/Flies_dt/224x224',
image_size = (224, 224),
validation_split = 0.40,
subset = "training",
seed = 123,
shuffle = True)
# load val_ds
val_ds = tf.keras.utils.image_dataset_from_directory(
directory ='/gdrive/My Drive/Flies_dt/224x224',
image_size = (224, 224),
validation_split = 0.40,
subset = "validation",
seed = 123,
shuffle = True)
# move some batches of val_ds to test_ds
test_ds = val_ds.take((1*len(val_ds)) // 2)
print('test_ds =', len(test_ds))
val_ds = val_ds.skip((1*len(val_ds)) // 2)
print('val_ds =', len(val_ds)) #test_ds = 18 val_ds = 18
# Load Model
base_model = keras.applications.vgg19.VGG19(
include_top=False,
weights='imagenet',
input_shape=(224,224,3)
)
# Freeze base_model
base_model.trainable = False
#
inputs = keras.Input(shape=(224,224,3))
x = data_augmentation(inputs) #apply data augmentation
# Preprocessing
x = tf.keras.applications.vgg19.preprocess_input(x)
# The base model contains batchnorm layers. We want to keep them in inference mode
# when we unfreeze the base model for fine-tuning, so we make sure that the
# base_model is running in inference mode here.
x = base_model(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x) # Regularize with dropout
outputs = keras.layers.Dense(5, activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.compile(
loss="sparse_categorical_crossentropy",
optimizer="Adam",
metrics=['acc']
)
model.fit(train_ds, epochs=8, validation_data=val_ds, callbacks=[tensorboard_callback])
# Unfreeze the base_model. Note that it keeps running in inference mode
# since we passed `training=False` when calling it. This means that
# the batchnorm layers will not update their batch statistics.
# This prevents the batchnorm layers from undoing all the training
# we've done so far.
base_model.trainable = True
model.summary()
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.000001), # Low learning rate
loss="sparse_categorical_crossentropy",
metrics=['acc']
)
model.fit(train_ds, epochs=5, validation_data=val_ds)
#Evaluate
from sklearn.metrics import classification_report
y_pred = model.predict(test_ds, batch_size=64, verbose=1)
y_pred_bool = np.argmax(y_pred, axis=1)
print(classification_report(test_ds, y_pred_bool))
I also tried something like this, but I'm not sure if this gives me the correct values for multiclass classification.
from keras import backend as K
def recall_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def f1_m(y_true, y_pred):
precision = precision_m(y_true, y_pred)
recall = recall_m(y_true, y_pred)
return 2*((precision*recall)/(precision+recall+K.epsilon()))
# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc',f1_m,precision_m, recall_m])
# fit the model
history = model.fit(Xtrain, ytrain, validation_split=0.3, epochs=10, verbose=0)
# evaluate the model
loss, accuracy, f1_score, precision, recall = model.evaluate(Xtest, ytest, verbose=0)
This is a lot, Sorry. Hope somebody can help.

Why pure tensorflow autoencoder cannot converge, when Keras one fits good

I'm newbie at machine learning(and at stackoverflow too). I want to ask for help.
I have two implementations of same two-layer autoencoder for mnist.
First one fits good:
import tensorflow as tf, numpy as np
def in_pics(pics):
return np.array(pics, np.float32)/255.
def out_pics(pics):
npformed = np.array(pics, np.float32)
samples = np.shape(npformed)[0]
return np.reshape(npformed, (samples, 784))/255.
(train_data, test_data) = tf.keras.datasets.mnist.load_data()
train_dataset = tf.data.Dataset.from_tensor_slices((in_pics(train_data[0]), out_pics(train_data[0]))).batch(100)
test_dataset = tf.data.Dataset.from_tensor_slices((in_pics(test_data[0]), out_pics(test_data[0]))).batch(100)
model = tf.keras.Sequential(
[
tf.keras.layers.InputLayer(input_shape=(28,28)),
tf.keras.layers.Reshape(target_shape=(784,)),
tf.keras.layers.Dense(128, activation="sigmoid", kernel_initializer=tf.keras.initializers.truncated_normal(stddev=0.1),
bias_initializer =tf.keras.initializers.truncated_normal(stddev=0.1)),
tf.keras.layers.Dense(784, activation="sigmoid", kernel_initializer=tf.keras.initializers.truncated_normal(stddev=0.1),
bias_initializer =tf.keras.initializers.truncated_normal(stddev=0.1)),
]
)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), optimizer="adam")
model.fit(x = train_dataset, epochs=20, verbose=1, validation_data=test_dataset)
Keras result
And I have the same model, same learning procedure, but written in terms of pure tensorflow(almost):
import tensorflow as tf, numpy as np
def prep_pics(pics):
npformed = np.array(pics, np.float32)
samples = np.shape(npformed)[0]
return np.reshape(npformed, (samples, 784))/255.
batch_size, num_batches = 100, 12000
(train_data, test_data) = tf.keras.datasets.mnist.load_data()
(train_pics_prepared, test_pics_prepared) = (prep_pics(train_data[0]), prep_pics(test_data[0]))
train_dataset = tf.data.Dataset.from_tensor_slices((train_pics_prepared, train_pics_prepared))
test_dataset = tf.data.Dataset.from_tensor_slices((test_pics_prepared, test_pics_prepared)).batch(batch_size)
encoder_W = tf.Variable(tf.keras.initializers.truncated_normal(stddev=0.1)(shape = [784,128], dtype = tf.float32))
encoder_b = tf.Variable(tf.keras.initializers.truncated_normal(stddev=0.1)(shape = [128],dtype = tf.float32))
decoder_W = tf.Variable(tf.keras.initializers.truncated_normal(stddev=0.1)(shape = [128,784], dtype = tf.float32))
decoder_b = tf.Variable(tf.keras.initializers.truncated_normal(stddev=0.1)(shape = [784],dtype = tf.float32))
optimizer = tf.keras.optimizers.Adam()
bin_cross = tf.keras.losses.BinaryCrossentropy(from_logits=True)
#tf.function
def trainstep(X_pack, Y_pack):
with tf.GradientTape() as tape:
tape.watch([encoder_W, encoder_b, decoder_W, decoder_b])
encoded = tf.nn.sigmoid(tf.matmul(X_pack, encoder_W) + encoder_b)
decoded = tf.nn.sigmoid(tf.matmul(encoded, decoder_W) + decoder_b)
loss = bin_cross(decoded, Y_pack)
gradients = tape.gradient(loss, [encoder_W, encoder_b, decoder_W, decoder_b])
optimizer.apply_gradients(zip(gradients, [encoder_W, encoder_b, decoder_W, decoder_b]))
num_samples = len(train_data[0])
epochs = num_batches//(num_samples//batch_size)
for i in range(epochs):
print(f"Epoch num {i+1}:")
dataset = train_dataset.shuffle(num_samples).batch(batch_size)
for x,y in dataset:
trainstep(x,y)
Loss is never becomes lower than 0.64(First one had 0.07 in the end of education).
Tensorflow result.
What I tried:
1.) Change optimizer to Adagrad and even to SGD.
2.) Use other loss - sigmoid_cross_entropy_with_logits.
3.) Learn more. Especially for SGD.
Cannot find mistake or typo almost for week. Please, help!

Keras RMSE MAE finding

I have a problem with my code. I will be very happy if you can help.
The purpose is having mean absolute errors and root mean square errors with different epochs and batch sizes. I'm very new in deep learning so I have tried to do that like this. However, i am very confused.
How can I fix or rewrite this code. Thank you so much.
# Reading the file
df = pd.read_csv('data.csv')
df = df[df.columns.difference(['Unnamed: 0'])]
input_data = df.iloc[:,:100].values
label_MOS = df['MOS'].values
train_X, val_X, train_y, val_y = train_test_split(input_data,
label_MOS, test_size = 0.25, random_state = 14)
x_train = train_X
y_train = train_y
x_test = val_X
y_test = val_y
def create_model():
model=Sequential()
model.add(Dense(32, input_dim=100, kernel_initializer='normal', activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
adam=Adam(learning_rate=0.1)
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])
return model
# Create the model
model = KerasClassifier(build_fn = create_model,verbose = 0)
# Define the grid search parameters
batch_size = [20]
epochs = [500,1000]
# Make a dictionary of the grid search parameters
param_grid = dict(batch_size = batch_size,epochs = epochs)
# Build and fit the GridSearchCV
grid = GridSearchCV(estimator = model,param_grid = param_grid,cv = KFold(),verbose = )
grid_result = grid.fit(x_train,y_train)
NNpredictions = model.predict(x_test)
MAE = mean_absolute_error(val_y , NNpredictions)
RMSE = mean_squared_error(val_y , NNpredictions, squared = False)
# Summarize the results
print(' MAE {}, RMSE {}'.format(MAE.best_score_,RMSE.best_params_))
mae = MAE.cv_results_['mae']
rmse = RMSE.cv_results_['rmse']
# params = grid_result.cv_results_['params']
for mean, stdev in zip(mae, rmse):
print("mae %f rmse (%f) " % (mean, stdev))
I would do something like this:
batch_size = [20]
epochs = [500,1000]
result_list = list()
for batch_value in batch_size:
for epoch_value in epochs:
model = create_model()
model.fit(x=x_train,y=y_train,epochs=epoch_value, batch_size=batch_value)
metrics = model.evaluate(x=x_test,y=y_test)
ord_dic = collections.OrderedDict()
ord_dic['batch_size'] = batch_value
ord_dic['epochs'] = epoch_value
ord_dic['metrics'] = metrics
result_list.append(ord_dic)
print(result_list)
I have put the results in a list of Ordered Dictionaries, but you can easily change that part
I updated your code
from keras import backend as K
def create_model(losses='mse'):
model=Sequential()
model.add(Dense(32, input_dim=100, kernel_initializer='normal', activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
adam=Adam(learning_rate=0.1)
model.compile(optimizer=adam, loss=losses, metrics=['accuracy'])
return model
def root_mean_squared_error(y_true, y_pred):
return K.sqrt(K.mean(K.square(y_pred - y_true)))
batch_size = [20]
epochs = [500,1000]
losses = ['mse', root_mean_squared_error]
neural_network = KerasClassifier(build_fn=network, verbose = 1)
param_grid = dict(losses=losses, epochs=epochs, batch_size = batches)
grid = GridSearchCV(estimator=neural_network, param_grid=param_grid )
grid_result = grid.fit(X_train, y_train)
print(grid_result.best_params_)
The create_model function needs to have a losses parameter, it's where your grid will pass the parameter.

Pycharm: Changes made have no effect, how to explain this behaviour?

This problem only happens in Pycharm:
I made a very simple NN based on TF2.0 website tutorial. The weird thing about it is when I change batch_size, it keeps going with the old one as if I did nothing. In fact, everything I do is irrelevant.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') / 255
class Prototype(tf.keras.models.Model):
def __init__(self, **kwargs):
super(Prototype, self).__init__(**kwargs)
self.l1 = layers.Dense(64, activation='relu', name='dense_1')
self.l2 = layers.Dense(64, activation='relu', name='dense_2')
self.l3 = layers.Dense(10, activation='softmax', name='predictions')
def call(self, ip):
x = self.l1(ip)
x = self.l2(x)
return self.l3(x)
model = Prototype()
model.build(input_shape=(None, 784,))
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
loss_fn = keras.losses.SparseCategoricalCrossentropy()
batch_size = 250
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
def train_one_epoch():
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
print(x_batch_train.shape)
with tf.GradientTape() as tape:
logits = model(x_batch_train) # Logits for this minibatch
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
I run the train_one_epoch(), it trains for one epoch. Then I change batch size and consequently dataset object to give new chunk sizes, BUT when I run train_one_epoch() again, it keeps going with the old batch_size.
Proof: