I just dove into my first deep-learning project to get familiar with keras and tf.
It is a multi-label classification problem where I'm using a simple convolutional network to identify individual chemical compounds from XRD-patterns.
It took me some time but the results really blew my mind. I did not expect my simple network to perform that good in terms of accuracy.
Then I realized that I've been using theano as backend.
So I gave it another try with tensorflow and the very same input data gave terrible results. Even worse than a random forest classifier.
I'm pretty much clueless what is going on there.
I guess I'm doing something wrong, but I just couldn't figure out what ....
Anyway, here's the code (the input data is provided below...)
Theano_vs_TF.py:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ['KERAS_BACKEND']='theano'
from keras.models import Model
from keras.models import Sequential
import keras as K
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
X=np.loadtxt('X.txt')
Y=np.loadtxt('Y.txt')
X3d = np.expand_dims(X, axis=-1)
# start with TF-model
tf_model = keras.Sequential()
wreg = tf.keras.regularizers.l2(l=0) # dont use for now
tf_model.add(layers.Conv1D(32, 8,strides=8, padding='same',
input_shape=(180,1), activation='relu',kernel_regularizer=wreg))
tf_model.add(layers.Conv1D(32, 5,strides=5, padding='same',
activation='relu',kernel_regularizer=wreg))
tf_model.add(layers.Conv1D(16, 3,strides=3, padding='same',
activation='relu',kernel_regularizer=wreg))
tf_model.add(layers.Flatten())
tf_model.add(layers.Dense(512,activation='relu',kernel_regularizer=wreg))
tf_model.add(layers.Dense(29, activation='sigmoid',kernel_regularizer=wreg))
tf_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
tf_model.compile(loss='binary_crossentropy',
optimizer=tf_optimizer,
metrics=['accuracy'],
)
tf_model.summary()
# train tf-model
tf_history = tf_model.fit(X3d,Y,batch_size=128,
epochs=50, verbose=1,validation_split=0.3,
)
plt.plot(tf_history.history['accuracy'])
plt.plot(tf_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
# now compare to theano model
th_model = Sequential() #old way
wreg = K.regularizers.l2(l=0.0) # 1e-3 was way too much, no hints for overfitting so far
th_model.add(K.layers.Conv1D(32, 8,strides=8, padding='same',
input_shape=(180,1), activation='relu',
kernel_regularizer=wreg))
th_model.add(K.layers.Conv1D(32, 5,strides=5,
padding='same', activation='relu',kernel_regularizer=wreg))
th_model.add(K.layers.Conv1D(16, 3,strides=3, padding='same', activation='relu',kernel_regularizer=wreg))
th_model.add(K.layers.Flatten())
th_model.add(K.layers.Dense(512,activation='relu',kernel_regularizer=wreg))
th_model.add(K.layers.Dense(29, activation='sigmoid'))
#Define optimizer
optimizer = K.optimizers.adam(lr=1e-3)
# Compile model
th_model.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'],
)
th_model.summary()
# train th model and plot metrics
th_history = th_model.fit(X3d,Y,batch_size=128,
epochs=50, verbose=1,validation_split=0.3,
)
plt.plot(th_history.history['acc'])
plt.plot(th_history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
Uguu links (will expire after 24h)
X.tar.gz
Y.tar.gz
Tensorflow learning curve:
Theano learning curve:
Related
I am predicting classes, but there is something I don't get. In the simplified example below, I train a model to predict MNIST handwritten digits. My test set has an accuracy of 95%, when I use
model.evaluate(test_image, test_label)
However, when I use
model.predict(test_image)
and the extract the predicted labels using np.argmax(), this accuracy drops. When I run all the code again and again, this accuracy changes a lot.
I suspect now that the classes in the model are not ordered 0, 1 ... 9. Is there a way to see the class labels of a model? Or did I make another mistake?
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets.mnist import load_data
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
import numpy as np
# Load data
(train_image, train_label), (test_image, test_label) = load_data()
# Train
model = Sequential([
Flatten(input_shape=(28,28)),
Dense(100, activation="relu"),
Dense(100, activation="relu"),
Dense(10, activation="sigmoid")
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics='accuracy')
history = model.fit(train_image, train_label,
batch_size=32, epochs=50,
validation_data=(test_image, test_label),
verbose = 0)
eval = model.evaluate(test_image, test_label)
print('Accuracy (auto):', eval[1]) # This is always high
# Predict and evaluate manually
predictions = model.predict(test_image)
pred = np.array([np.argmax(pred) for pred in predictions])
true = test_label
print('Accuracy (manually):', np.mean(pred == true)) # This varies a lot
Here are the accuracy and loss plots for the class-weighted version:
Here are the accuracy and loss plots for the unweighted version:
Here is the code. The only difference in the above two versions is that one calls the class weights dictionary and one doesn't. (General advice about how this is set up is also welcome -- as you can see I am very new to this!)
from tensorflow import keras
from keras import optimizers
from keras.applications.resnet_v2 import ResNet50V2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Rescaling, Conv2D, MaxPool2D, Flatten
#Create datasets
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
'/content/drive/MyDrive/Colab Notebooks/train/All classes/',
labels="inferred",
label_mode="int",
validation_split=0.2,
seed=1337,
subset="training",
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
'/content/drive/MyDrive/Colab Notebooks/train/All classes/',
labels="inferred",
label_mode="int",
validation_split=0.2,
seed=1337,
subset="validation",
)
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
'/content/drive/MyDrive/Colab Notebooks/test/All classes/',
labels="inferred",
label_mode="int",
)
#Import ResNet
base_model = ResNet50V2(weights='imagenet', include_top=False)
#Create basic network to append to ResNet above
x = base_model.output
x = Rescaling(1.0 / 255)(x)
x = Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(256,256,3), padding="same")(x)
x = MaxPool2D(pool_size=(2, 2), strides=2)(x)
x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2), strides=2)(x)
x = GlobalAveragePooling2D()(x)
predictions = Dense(units=5, activation='softmax')(x)
#Merge the models
model = Model(inputs=base_model.input, outputs=predictions)
#Freeze ResNet layers
for layer in base_model.layers:
layer.trainable = False
#Compile
model.compile(optimizer=keras.optimizers.Adam(1e-3), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
#These are the weights. They are derived here from their numbers in the train dataset -- there are 25,811
#files in class 0, 2444 files in class 1, etc. This dictionary was not called for the unweighted version.
class_weight = {0: 1.0,
1: 25811.0/2444.0,
2: 25811.0/5293.0,
3: 25811.0/874.0,
4: 25811.0/709.0}
#Training the model
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath='/content/drive/MyDrive/Colab Notebooks/ResNet/',
save_weights_only=False,
mode='auto',
save_best_only=True,
save_freq= 'epoch')
history = model.fit(
x=train_ds,
epochs=30,
class_weight=class_weight,
validation_data=val_ds,
callbacks=[model_checkpoint_callback]
)
#Evaluating
loss, acc = model.evaluate(test_ds)
print("Accuracy", acc)
Also, some other questions:
Should the metrics=['accuracy'] actually be metrics=['sparse_categorical_accuracy']?
Should class_weight=class_weight actually be sample_weight=sample_weight? I couldn't tell the difference in the documentation, although most examples seem to use class_weight.
I only used padding in one Conv2D layer, and this was a bodge to force the whole thing to actually compile. Should I have been more consistent and used it for the other one too?
On that note, are there other ways my simple appended CNN model (it was called 'predictions') could be laid out to make better sense?
Ah yes, before I forget -- as you can see from the above code, I didn't preprocess the data in accordance with the keras guidance for ResNet. I figured it probably wouldn't make that much of a difference (but also because I was having trouble trying to implement it). Would that be worth looking into? I suppose the unweighted model shows a very high accuracy... probably too high now that I'm looking at it... oh, dear.
I shall be so very thankful for any advice!
With the aim of training a model with a known performance, I run the same model twice. The first is following a 90/10 split, where I can measure the performance of the model with the test set. The second one uses the same parameters as the former, but now on the entire dataset for deployment, which I call "full model" (a common approach using shallow ML algorithms).
I'm using a MLP from the Keras/TensorFlow package running on GPU. I also decided to apply a callback function, EarlyStopping, to stop after the result at the validation dataset (10% of the training set) does not improve after 50 iterations, and to get the best configuration once the fit is complete.
What has been weird to see is that the training of the first model usually goes until the end of the epochs (around 300 depending on the run due to the GPU random seeds), but the second model, the "full model" takes between 40-60 epochs and gives back a very poor performance.
My doubt is if this is due to the callback function being shared by the two models. Is it possible that the +-50 trials of patience of the "full model" end up being compared to the best case of the first model, and therefore ending the testing?
Code below:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from keras.layers import Activation
from keras.layers import BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# from tensorflow.keras.layers import Dropout
from scikeras.wrappers import KerasRegressor
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
def create_model():
model = Sequential()
model.add(Dense(400, input_dim=len(X_train.columns)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(400))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(400))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(400))
model.add(BatchNormalization())
model.add(Activation('relu'))
# model.add(Dropout(0.2))
model.add(Dense(1, activation='linear'))
# compile the keras model
model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(0.01), metrics=['mean_squared_error','mean_absolute_error'])
return model
callback_model = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
callback_fullmodel = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
model_rf = Pipeline([
('scaler', StandardScaler()),
('estimator', KerasRegressor(model=create_model, epochs=300, batch_size=1024, verbose=1,validation_split=0.1, callbacks=callback_model)) #
])
full_model_rf = Pipeline([
('scaler', StandardScaler()),
('estimator', KerasRegressor(model=create_model, epochs=3000, batch_size=1024, verbose=1, validation_split=0.1, callbacks=callback_fullmodel)) #
])
model = model_rf.fit(X_train, y_train)
full_model = full_model_rf.fit(X, y)
We have one GitHub Project for classification of heart sounds (link), with below README content:
Technology can play a role in addressing the above problem. The
Phonocardiogram (PCG) is the method of retrieving the sound of the
heart. This sound can capture through simple stethoscope. In this
work, we are proposing an artificial intelligence model which have the
potential to detect the heart abnormality from the heart sounds.
The dataset can be downloaded from https://physionet.org. This data is
also available in the link below
https://drive.google.com/open?id=13ehWqXt8YDrmmjQc7XAUqcCk6Dwb69hy The
data was gathered from two sources: (A) from the public via the
iStethoscope Pro iPhone app, and (B) from a clinic trial in hospitals
using the digital stethoscope DigiScope. There were two tasks
associated with this data:
Heart Sound Feature Extraction The first task is to extract the features from the heart sounds within audio data.
Heart Sound Classification The task is to produce a method that can classify real heart sound into one of four categories (Normal, Murmur,
Extra-Heart Sound and Artifact).
So if possible, i asked here to find out some idea to improve the validation accuracy in
the deep learning algorithm for classification of heart sounds which the codes and blocks could be seen below (link):
import keras
from keras.models import Sequential
from keras.layers import Conv1D, MaxPool1D, GlobalAvgPool1D, Dropout, BatchNormalization, Dense
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping
from keras.utils import np_utils
from keras.regularizers import l2
from scipy.signal import decimate
#new_labels = np.array(new_labels, dtype='int')
Y_train = np_utils.to_categorical(y_train)
Y_test=np_utils.to_categorical(y_test)
model = Sequential()
model.add(Conv1D(filters=4, kernel_size=9, activation='relu', input_shape = x_train.shape[1:],kernel_regularizer = l2(0.025)))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Conv1D(filters=4, kernel_size=(9), activation='relu',
kernel_regularizer = l2(0.05)))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Conv1D(filters=8, kernel_size=(9), activation='relu',
kernel_regularizer = l2(0.1)))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Conv1D(filters=16, kernel_size=(9), activation='relu'))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv1D(filters=64, kernel_size=(4), activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Conv1D(filters=32, kernel_size=(1), activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.75))
model.add(GlobalAvgPool1D())
model.add(Dense(3, activation='softmax'))
def batch_generator(x_train, y_train, batch_size):
"""
Rotates the time series randomly in time
"""
x_batch = np.empty((batch_size, x_train.shape[1], x_train.shape[2]), dtype='float32')
y_batch = np.empty((batch_size, y_train.shape[1]), dtype='float32')
full_idx = range(x_train.shape[0])
while True:
batch_idx = np.random.choice(full_idx, batch_size)
x_batch = x_train[batch_idx]
y_batch = y_train[batch_idx]
for i in range(batch_size):
sz = np.random.randint(x_batch.shape[1])
x_batch[i] = np.roll(x_batch[i], sz, axis = 0)
yield x_batch, y_batch
weight_saver = ModelCheckpoint('set_a_weights.h5', monitor='val_loss',
save_best_only=True, save_weights_only=True)
model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.8**x)
x_train.shape
hist = model.fit_generator(batch_generator(x_train, Y_train, 8),
epochs=10, steps_per_epoch=1000,
validation_data=(x_test, Y_test),
callbacks=[weight_saver, annealer],
verbose=2)
model.load_weights('set_a_weights.h5')
import matplotlib.pyplot as plt
Thanks.
Try having a look to related publications. For example, work based on
The Heat Sounds Shenzhen Corpus may be of use to you?
Otherwise, I agree with the above that this may not be a suitable question for stack overflow.
I am working on a stock prediction project and I just want to predict the gain and drop labels from the LSTM net. It is a binary classification problem.
However, my LSTM net is hard to converge even I reduce the training set a lot. Technically, it should overfit easily. But my prediction accuracy is still only 60% and loss is around 0.7 even I just feed 90 samples for training. So, I was thinking I probably made some mistakes in building the neural net. However, due to my limited ability, I cannot find the reason. Therefore, I really hope someone can take a look at my code and point out the reason! I will appreciate a lot!
My code is given below.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import collections
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers.advanced_activations import LeakyReLU
from keras.layers import Dense, LSTM, Dropout, Activation, Flatten ,BatchNormalization
from keras.utils import to_categorical, np_utils
from keras.optimizers import SGD
data = pd.read_csv("EURUSD_M5_201910210000_201910251140.csv", sep="\t")
train_cut = int(data.shape[0] * 0.8)
dataset_train = data[0:train_cut]
training_set = dataset_train["<OPEN>"].values
sc = MinMaxScaler(feature_range=(0, 1))
train_sec_scaled = sc.fit_transform(training_set.reshape(-1, 1))
X_train = []
y_train = []
step_size = 60
predic_days = 1
for i in range(step_size, len(train_sec_scaled) - predic_days):
X_train.append(train_sec_scaled[i - step_size : i, 0])
y_value = train_sec_scaled[i : i + predic_days, 0]
last_day_value = train_sec_scaled[i - 1, 0]
# 1 ==> up, 0 ==> down
if y_value[0] > last_day_value:
y_train.append([1])
else:
y_train.append([0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
y_train = y_train.reshape(y_train.shape[0], y_train.shape[1])
y_train = np_utils.to_categorical(y_train, 2)
print(y_train)
print("train data generated!")
print(X_train.shape, y_train.shape)
def train():
model = Sequential()
model.add(Dropout(0.2))
model.add(LSTM(40))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.02))
model.add(Dropout(0.2))
model.add(Dense(30, kernel_initializer='glorot_normal'))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.02))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-4, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.fit(X_train,
y_train,batch_size=32,epochs=10000)
model.save("trend_analysis.h5")
print("model saved!")
if __name__ == "__main__":
train()
Also, here is the dataset I used:
https://drive.google.com/open?id=1r_0Ko1F6i0F1pToTSsQF1xGt_FTtpUux
Thanks in advance!