I have a multilabel classification problem, I used the following code but the validation accuracy is not logical, the accuracy is less than 20%. With the same data using machine learning the result for it is more than 80%. what's the problem could be here.
This is the code.
data_path = "TestData.csv"
data_raw = pd.read_csv(data_path)
data_raw.shape #(156004, 9)
categories = list(data_raw.columns.values)
categories = categories[1:]#for the labels
data = data_raw
data.shape #(156004, 9)
num_words = 20000
max_features = 150000
max_len = 200
embedding_dims = 128
num_epochs = 5
X_train = data["text"].values
X_test = data["text"].values
#Tokenization
tokenizer = tokenizer = Tokenizer(num_words)
tokenizer.fit_on_texts(list(X_train))
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)
X_train = sequence.pad_sequences(X_train, max_len)
X_test = sequence.pad_sequences(X_test, max_len)
y_train = data[categories].values
y_test = data[categories].values
X_tra, X_val, y_tra, y_val = train_test_split(X_train, y_train)#, test_size =0.2, random_state=0)
CNN_model = Sequential([
Embedding(input_dim=max_features, input_length=max_len, output_dim=embedding_dims),
SpatialDropout1D(0.5),
Conv1D(filters=20, kernel_size=8, padding='same', activation='relu'),
BatchNormalization(),
Dropout(0.5),
GlobalMaxPool1D(),
Dense(8, activation = 'sigmoid')
def mean_pred(y_true, y_pred):
return K.mean(y_pred)
from tensorflow.python.keras.optimizer_v2.adam import Adam
adam = Adam()
CNN_model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
for category in categories:
print('**Processing {} ...**'.format(category))
pred = CNN_model.fit(X_tra, y_tra, batch_size=128, epochs=5, validation_data=(X_val, y_val))
Related
I have the following tensorflow model, that tries to predict a time series, based on lagging values.
I then tried to translate it to pytorch and it works fine but performs significantly worse. Is there any obvious difference between the two models that could contribute to pytorch performing worse? Any suggestions are greatly apprecaited.
tensorflow model:
early_stop = EarlyStopping(monitor='val_loss',
min_delta=1e-3,
patience=3,
verbose=2, mode='auto')
tbCallBack = PlotLossesKeras()
model = Sequential()
model.add(LSTM(50, input_shape=(look_back, 1)))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_x, train_y,
epochs=2000,
batch_size=20, verbose=1)
Pytorch Model:
class LSTMForecaster(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super().__init__()
self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
self.dropout = nn.Dropout(0.2)
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, input_seq):
lstm_out, _ = self.lstm(input_seq)
dropout = self.dropout(lstm_out)
predictions = self.linear(dropout)
return predictions
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model_1.parameters())
model_1 = LSTMForecaster(input_size=3, hidden_size=50, num_layers=1, output_size=1)
torch.manual_seed(45)
epochs = 2000
batch_size = 20
num_batches = len(train_x) // batch_size
train_x = train_x.to(device)
test_x = test_x.to(device)
train_y = train_y.to(device)
test_y = test_y.to(device)
for epoch in range(epochs):
for i in range(num_batches):
# Get the current batch of data
start = i * batch_size
end = start + batch_size
x_batch = train_x[start:end]
y_batch = train_y[start:end]
pred_y = model_1(x_batch)
loss = loss_fn(pred_y, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
model_1.eval()
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X1_train)
X1_train = tokenizer.texts_to_sequences(X1_train)
X1_val = tokenizer.texts_to_sequences(X1_val)
X1_test = tokenizer.texts_to_sequences(X1_test)
vocab_size = len(tokenizer.word_index) + 1
maxlen = 5000
X1_train = pad_sequences(X1_train, padding='post', maxlen=maxlen)
X1_val = pad_sequences(X1_val, padding='post', maxlen=maxlen)
X1_test = pad_sequences(X1_test, padding='post', maxlen=maxlen)
embeddings_dictionary = dict()
df_g = pd.read_csv('gs://----------/glove.6B.100d.txt', sep=" ", quoting=3, header=None, index_col=0)
embeddings_dictionary = {key: val.values for key, val in df_g.T.items()}
embedding_matrix = zeros((vocab_size, 100))
for word, index in tokenizer.word_index.items():
embedding_vector = embeddings_dictionary.get(word)
if embedding_vector is not None:
embedding_matrix[index] = embedding_vector
input_2_col_list= [x1,x2,...................., x30]
X2_train = X_train[input_2_col_list].values
X2_val = X_val[input_2_col_list].values
X2_test = X_test[[input_2_col_list].values
input_1 = Input(shape=(maxlen,))
input_2 = Input(shape=(30,))
embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], trainable=False)(input_1)
Bi_layer= Bidirectional(LSTM(128, return_sequences=True, dropout=0.15, recurrent_dropout=0.15))(embedding_layer) # Dimn shd be (None,200,128)
con_layer = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='glorot_uniform')(Bi_layer)
avg_pool = GlobalAveragePooling1D()(con_layer)
max_pool = GlobalMaxPooling1D()(con_layer)
dense_layer_1 = Dense(64, activation='relu')(input_2)
dense_layer_2 = Dense(64, activation='relu')(dense_layer_1)
concat_layer = Concatenate()([avg_pool,max_pool, dense_layer_2])
dense_layer_3 = Dense(50, activation='relu')(concat_layer)
output = Dense(2, activation='softmax')(dense_layer_3)
model = Model(inputs=[input_1, input_2], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc',f1_m,precision_m, recall_m])
print(model.summary())
history = model.fit(x=[X1_train, X2_train], y=y_train, batch_size=30, epochs=10, verbose=1, validation_data=([X1_val,X2_val],y_val))
loss, accuracy, f1_score, precision, recall = model.evaluate(x=[X1_test, X2_test], y=y_test, verbose=0)
model.save('gs://----------/Tuned_hybrid_GCP_5000_CASETYPE_8_9.tf')
##################################################
loaded_model=tf.keras.models.load_model( 'gs://----------/Tuned_hybrid_GCP_5000_CASETYPE_8_9.tf', custom_objects={"f1_m": f1_m , "recall_m": recall_m, "precision_m": precision_m } )
loss, accuracy, f1_score, precision, recall = loaded_model.evaluate(x=[X1_test, X2_test], y=y_test, verbose=0) ###This is getting no error BUT the predictions are wrong
y_pred = loaded_model.predict(x=[X1_test, X2_test], batch_size=64, verbose=1)
y_pred_bool = np.argmax(y_pred, axis=1) ###This is getting no error BUT the predictions are wrong
##################################################################
import tensorflow_hub as hub
x=[X1_test, X2_test]
loaded_model_2 = tf.keras.Sequential([hub.KerasLayer('gs:---------------/Tuned_hybrid_GCP_100_CASETYPE_8_11_save.tf')])
loaded_model_2.build(x.shape) #### Getting an error
y_pred_2 = loaded_model_2.predict(x=[X1_test, X2_test], batch_size=64, verbose=1)
y_pred_bool_2 = np.argmax(y_pred_2, axis=1)
###################################################
#### Inside of the model folder the files and dirs are: assets/, variables/, saved_model.pb, keras_metadata.pb
#### Using 'us-docker.pkg.dev/vertex-ai/training/tf-gpu.2-8:latest' to train the model on Vertex AI
I have tried multiple saving a loading function with custom objects, but not of them are working properly
The working loaded model is predicting, but the outputs are not accurate. I have tested the similar TEST data to predict on the loaded model with another test script. The predictions are not matching after I loaded the model.
similar issues on StackOverflow: 'https://stackoverflow.com/questions/68937973/how-can-i-fix-the-problem-of-loading-the-model-to-get-new-predictions'
I wanted to fit simple LSTM model to perform binary classification on multivariate time series data. Since my data is severely imbalanced, I have integrated class_weight argument from sklearn in my model. However, I have got pretty high loss value, and it was not decreasing with each epoch. My f1 score was 0.018 which is extremely low as well. I appreciate your suggestions!
Sample data:
sequence_length = 10
def generate_data(X, y, sequence_length = 10, step = 1):
X_local = []
y_local = []
for start in range(0, len(data) - sequence_length, step):
end = start + sequence_length
X_local.append(X[start:end])
y_local.append(y[end-1])
return np.array(X_local), np.array(y_local)
X_sequence, y = generate_data(data.loc[:, "V1":"V4"].values, data.Class)
model = keras.Sequential()
model.add(LSTM(100, input_shape = (10, 4)))
model.add(Dropout(0.5))
model.add(Dense(1, activation="sigmoid"))
model.compile(loss="binary_crossentropy"
, metrics=[keras.metrics.binary_accuracy]
, optimizer="adam")
model.summary()
training_size = int(len(X_sequence) * 0.7)
X_train, y_train = X_sequence[:training_size], y[:training_size]
X_test, y_test = X_sequence[training_size:], y[training_size:]
from sklearn.utils import class_weight
class_weights = dict(zip(np.unique(y_train), class_weight.compute_class_weight('balanced', np.unique(y_train),
y_train)))
model.fit(X_train, y_train, batch_size=64, epochs=50,class_weight=class_weights)
model.evaluate(X_test, y_test)
y_test_prob = model.predict(X_test, verbose=1)
y_test_pred = np.where(y_test_prob > 0.5, 1, 0)
from sklearn.metrics import f1_score
f1_score(y_test, y_test_pred)
I have a problem with my code. I will be very happy if you can help.
The purpose is having mean absolute errors and root mean square errors with different epochs and batch sizes. I'm very new in deep learning so I have tried to do that like this. However, i am very confused.
How can I fix or rewrite this code. Thank you so much.
# Reading the file
df = pd.read_csv('data.csv')
df = df[df.columns.difference(['Unnamed: 0'])]
input_data = df.iloc[:,:100].values
label_MOS = df['MOS'].values
train_X, val_X, train_y, val_y = train_test_split(input_data,
label_MOS, test_size = 0.25, random_state = 14)
x_train = train_X
y_train = train_y
x_test = val_X
y_test = val_y
def create_model():
model=Sequential()
model.add(Dense(32, input_dim=100, kernel_initializer='normal', activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
adam=Adam(learning_rate=0.1)
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])
return model
# Create the model
model = KerasClassifier(build_fn = create_model,verbose = 0)
# Define the grid search parameters
batch_size = [20]
epochs = [500,1000]
# Make a dictionary of the grid search parameters
param_grid = dict(batch_size = batch_size,epochs = epochs)
# Build and fit the GridSearchCV
grid = GridSearchCV(estimator = model,param_grid = param_grid,cv = KFold(),verbose = )
grid_result = grid.fit(x_train,y_train)
NNpredictions = model.predict(x_test)
MAE = mean_absolute_error(val_y , NNpredictions)
RMSE = mean_squared_error(val_y , NNpredictions, squared = False)
# Summarize the results
print(' MAE {}, RMSE {}'.format(MAE.best_score_,RMSE.best_params_))
mae = MAE.cv_results_['mae']
rmse = RMSE.cv_results_['rmse']
# params = grid_result.cv_results_['params']
for mean, stdev in zip(mae, rmse):
print("mae %f rmse (%f) " % (mean, stdev))
I would do something like this:
batch_size = [20]
epochs = [500,1000]
result_list = list()
for batch_value in batch_size:
for epoch_value in epochs:
model = create_model()
model.fit(x=x_train,y=y_train,epochs=epoch_value, batch_size=batch_value)
metrics = model.evaluate(x=x_test,y=y_test)
ord_dic = collections.OrderedDict()
ord_dic['batch_size'] = batch_value
ord_dic['epochs'] = epoch_value
ord_dic['metrics'] = metrics
result_list.append(ord_dic)
print(result_list)
I have put the results in a list of Ordered Dictionaries, but you can easily change that part
I updated your code
from keras import backend as K
def create_model(losses='mse'):
model=Sequential()
model.add(Dense(32, input_dim=100, kernel_initializer='normal', activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
adam=Adam(learning_rate=0.1)
model.compile(optimizer=adam, loss=losses, metrics=['accuracy'])
return model
def root_mean_squared_error(y_true, y_pred):
return K.sqrt(K.mean(K.square(y_pred - y_true)))
batch_size = [20]
epochs = [500,1000]
losses = ['mse', root_mean_squared_error]
neural_network = KerasClassifier(build_fn=network, verbose = 1)
param_grid = dict(losses=losses, epochs=epochs, batch_size = batches)
grid = GridSearchCV(estimator=neural_network, param_grid=param_grid )
grid_result = grid.fit(X_train, y_train)
print(grid_result.best_params_)
The create_model function needs to have a losses parameter, it's where your grid will pass the parameter.
I'm trying to work with lstm in tensorflow, but I got to the point I can't make a simple imdb sentiment model to converge.
I took a keras model and tried to duplicate the exact same model in tensorflow, in keras it trains and converge however in tensorflow it is just stuck at some point (0.69 loss).
I tried to make them as equal as possible, the only difference I can tell of is that in keras the padding is before the sequence, while in tensorflow I use 'post' padding due to the conventions in tensorflow.
Any idea whats wrong with my tensorflow model?
from __future__ import print_function
import random
import numpy as np
from tensorflow.contrib.keras.python.keras.preprocessing import sequence
from tensorflow.contrib.keras.python.keras.models import Sequential
from tensorflow.contrib.keras.python.keras.layers import Dense, Dropout, Activation
from tensorflow.contrib.keras.python.keras.layers import Embedding
from tensorflow.contrib.keras.python.keras.layers import LSTM
from tensorflow.contrib.keras.python.keras.layers import Conv1D, MaxPooling1D
from tensorflow.contrib.keras.python.keras.datasets import imdb
import tensorflow as tf
# Embedding
max_features = 30000
maxlen = 2494
embedding_size = 128
# Convolution
kernel_size = 5
filters = 64
pool_size = 4
# LSTM
lstm_output_size = 70
# Training
batch_size = 30
epochs = 2
class TrainData:
def __init__(self, batch_sz=batch_size):
(x_train, y_train), (_, _) = imdb.load_data(num_words=max_features)
y_train = [[int(x == 1), int(x != 1)] for x in y_train]
self._batch_size = batch_sz
self._train_data = sequence.pad_sequences(x_train, padding='pre')
self._train_labels = y_train
def next_batch(self):
if len(self._train_data) < self._batch_size:
self.__init__()
batch_x, batch_y = self._train_data[:self._batch_size], self._train_labels[:self._batch_size]
self._train_data = self._train_data[self._batch_size:]
self._train_labels = self._train_labels[self._batch_size:]
return batch_x, batch_y
def batch_generator(self):
while True:
if len(self._train_data) < self._batch_size:
self.__init__()
batch_x, batch_y = self._train_data[:self._batch_size], self._train_labels[:self._batch_size]
self._train_data = self._train_data[self._batch_size:]
self._train_labels = self._train_labels[self._batch_size:]
yield batch_x, batch_y
def get_num_batches(self):
return int(len(self._train_data) / self._batch_size)
def length(sequence):
used = tf.sign(tf.abs(sequence))
length = tf.reduce_sum(used, reduction_indices=1)
length = tf.cast(length, tf.int32)
return length
def get_model(x, y):
embedding = tf.get_variable("embedding", [max_features, embedding_size], dtype=tf.float32)
embedded_x = tf.nn.embedding_lookup(embedding, x)
print(x)
print(embedded_x)
print(length(x))
cell_1 = tf.contrib.rnn.BasicLSTMCell(lstm_output_size)
output_1, state_1 = tf.nn.dynamic_rnn(cell_1, embedded_x, dtype=tf.float32, scope="rnn_layer1",
sequence_length=length(x))
# Select last output.
last_index = tf.shape(output_1)[1] - 1
# reshaping to [seq_length, batch_size, num_units]
output = tf.transpose(output_1, [1, 0, 2])
last = tf.gather(output, last_index)
# Softmax layer
with tf.name_scope('fc_layer'):
weight = tf.get_variable(name="weights", shape=[lstm_output_size, 2])
bias = tf.get_variable(shape=[2], name="bias")
logits = tf.matmul(last, weight) + bias
loss = tf.losses.softmax_cross_entropy(y, logits=logits)
optimizer = tf.train.AdamOptimizer()
optimize_step = optimizer.minimize(loss=loss)
return loss, optimize_step
def tf_model():
x_holder = tf.placeholder(tf.int32, shape=[None, maxlen])
y_holder = tf.placeholder(tf.int32, shape=[None, 2])
loss, opt_step = get_model(x_holder, y_holder)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
for epoch in range(10):
cost_epochs = []
train_data = TrainData()
cost_batch = 0
for batch in range(train_data.get_num_batches()):
x_train, y_train = train_data.next_batch()
_, cost_batch = sess.run([opt_step, loss],
feed_dict={x_holder: x_train,
y_holder: y_train})
cost_epochs.append(cost_batch)
step += 1
# if step % 100 == 0:
print("Epoch: " + str(epoch))
print("\tcost: " + str(np.mean(cost_epochs)))
def keras_model():
# print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
y_test = [[int(x == 1), int(x != 1)] for x in y_test]
x_test = sequence.pad_sequences(x_test, maxlen=maxlen, padding='pre')
model = Sequential()
model.add(Embedding(max_features, embedding_size, input_length=maxlen))
model.add(LSTM(lstm_output_size))
model.add(Dense(2))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print('Train...')
data = TrainData()
model.fit_generator(data.batch_generator(), steps_per_epoch=data.get_num_batches(),
epochs=epochs,
validation_data=(x_test, y_test))
if __name__ == '__main__':
# keras_model()
tf_model()
EDIT
When I limit the sequence length to 100 both models converge, so I assume there is something different in the the lstm layer.
Check the initial values of your operations. In my case the adadelta optimizer in keras had initial learning rate of 1.0 and in tf.keras it had 0.001 so in the mnist dataset it converged much slowly.