I want to get reproducible results for a CNN. I use Keras and Google Colab with GPU.
In addition to recommendations to insert certain code snippets, which should allow a reproducibility, I also added seeds to the layers.
###### This is the first code snipped to run #####
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
###### This is the second code snipped to run #####
from __future__ import print_function
import numpy as np
import tensorflow as tf
import random as rn
import os
os.environ['PYTHONASHSEED'] = '0'
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
###### This is the third code snipped to run #####
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
###### This is the fourth code snipped to run #####
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=1), input_shape=(28,28,1)))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(Dropout(0.25, seed=1))
model.add(Dense(512, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(Dropout(0.5, seed=1))
model.add(Dense(10, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=['accuracy'])
return model
def split_data(X,y):
X_train_val, X_val, y_train_val, y_val = train_test_split(X, y, random_state=42, test_size=1/5, stratify=y)
return(X_train_val, X_val, y_train_val, y_val)
def train_model_with_EarlyStopping(model, X, y):
# make train and validation data
X_tr, X_val, y_tr, y_val = split_data(X,y)
es = EarlyStopping(monitor='val_loss', patience=20, mode='min', restore_best_weights=True)
history = model.fit(X_tr, y_tr,
return history
###### This is the fifth code snipped to run #####
train_model_with_EarlyStopping(model_cnn(), X, y)
Always I run the above code I get different results.
Does the reason lies in the code, or it is simply not possible to obtain reproducible results in Google Colab with GPU support?
The complete code (there are unneccessary parts in the code, such as libraries which are not used):
import os
local_root_path = os.path.expanduser("~/data/data")
os.makedirs(local_root_path, exist_ok=True)
except: pass
def ListFolder(google_drive_id, destination):
file_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % google_drive_id}).GetList()
counter = 0
for f in file_list:
# If it is a directory then, create the dicrectory and upload the file inside it
if f['mimeType']=='application/vnd.google-apps.folder':
folder_path = os.path.join(destination, f['title'])
os.makedirs(folder_path, exist_ok=True)
print('creating directory {}'.format(folder_path))
ListFolder(f['id'], folder_path)
fname = os.path.join(destination, f['title'])
f_ = drive.CreateFile({'id': f['id']})
counter += 1
print('{} files were uploaded in {}'.format(counter, destination))
ListFolder("1DyM_D2ZJ5UHIXmXq4uHzKqXSkLTH-lSo", local_root_path)
import glob
import h5py
from time import time
from keras import initializers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, merge
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.optimizers import SGD, Adam, RMSprop, Adagrad, Adadelta, Adamax, Nadam
from keras.utils import np_utils
from keras.callbacks import LearningRateScheduler, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras.regularizers import l2
from keras.layers.advanced_activations import LeakyReLU, ELU
from keras import backend as K
import numpy as np
import pickle as pkl
from matplotlib import pyplot as plt
%matplotlib inline
import gzip
import numpy as np
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
from keras.datasets import fashion_mnist
from numpy import mean, std
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.optimizers import SGD, Adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import auc, average_precision_score, f1_score
import time
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from google.colab import files
from PIL import Image
The problem isn't limited to Colab, and is reproducible locally. The behavior, however, may be inevitable.
Code at bottom is a minimally-reproducible version of your code, with fit parameters tweaked for faster testing. What I observed is, the maximum difference for loss is only 0.0144% for 468 iterations per run, across 5 runs. This is pretty good. With batch_size=64, 60000 samples, and 20 epochs, you'll have 18750 iterations - which will amplify this figure substantially.
Regardless, GPU parallelism is the most likely culprit driving the randomnes - and the small differences do accumulate over time to yield a substantial difference - demo below. If 1e-8 seems small, try adding random noise to half your weights w/ magnitude clipped at 1e-8, and witness its life philosophy change.
The role of the seeds becomes dramatically pronounced if you don't use them - try it, all your metrics will fly rampant within the first 10 iterations. Also, loss is better for measuring runtime differences, as accuracy's lot more sensitive to numeric precision errors: the difference between 60% accuracy and 70% accuracy on a 10-sample batch is a prediction that differs by 0.000001 w.r.t. 0.5 - but loss will barely budge.
Lastly, note that your hyperparameter choice will have a far greater impact upon model performance than randomness; no matter how many seeds you throw, they won't magic a model into SOTA. -- I recommend this fine clip.
Your code - is fine. You've taken all practical steps to ensure reproducibility, with an exception: PYTHONHASHSEED must be set before your Python kernel starts.
What can you do to reduce randomness?
Repeat runs, average results. Understandably that's expensive, but note that even a perfectly reproducible run isn't perfectly informative, as model variance w.r.t. train & validation sets is likely to be much greater than noise-induced randomness
K-Fold Cross-Validation: can mitigate both data & noise variance significantly
Larger validation set: extracted features can differ only so much due to noise; the larger the validation set, the less small perturbations in weights should reflect in metrics
GPU Parallelism: amplifying float error
print(2. * 11. / 9.) # 2.4444444444444446
print(2. / 9. * 11.) # 2.444444444444444
Order of operations matters, and by exploiting multithreading, GPU parallelism gives no guarantee whatsoever of operations being executed in the same order. On a first look, the difference may look innocent - but give it enough iterations ...
one = 1
for _ in range(int(1e8)):
one *= (2. / 9. * 11.) / (2. * 11. / 9.)
print(one) # 0.9999999777955395
print(1 - one) # 1.8167285897874308e-08
... and a "one" is a typical small weight value of 1e-08 away from being its original self. If 100 million iterations seems to be a stretch, consider that the operation completed in ~half a minute, whereas your model can train over an hour, and former runs entirely on CPU.
Minimal reproducible experimentation:
import tensorflow as tf
import random as rn
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import MaxPooling2D, Conv2D
from keras.optimizers import Adam
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3),
kernel_initializer='he_uniform', input_shape=(28,28,1)))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer='he_uniform'))
model.add(Dense(512, kernel_initializer='he_uniform'))
model.add(Dense(10, kernel_initializer='he_uniform'))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001),
return model
X_train = np.random.randn(30000, 28, 28, 1)
y_train = np.random.randint(0, 2, (30000, 10))
X_val = np.random.randn(30000, 28, 28, 1)
y_val = np.random.randint(0, 2, (30000, 10))
model = model_cnn()
history = model.fit(X_train, y_train, batch_size=64,shuffle=True,
epochs=1, verbose=1, validation_data=(X_val,y_val))
Run differences:
loss: 12.5044 - acc: 0.0971 - val_loss: 11.5389 - val_acc: 0.1051
loss: 12.5047 - acc: 0.0958 - val_loss: 11.5369 - val_acc: 0.1018
loss: 12.5055 - acc: 0.0955 - val_loss: 11.5382 - val_acc: 0.0980
loss: 12.5042 - acc: 0.0961 - val_loss: 11.5382 - val_acc: 0.1179
loss: 12.5062 - acc: 0.0960 - val_loss: 11.5366 - val_acc: 0.1082
I have created this custom CNN, trained it, and now wish to try and pass frames from my webcam in real-time for testing the predictions.
The webcam video playback starts to capture the frame by frame, however, I am unsure what to do to the frame in order to get it working with the CNN model
Any advice would be appreciated
I have provided the full code of what I am trying to achieve
#imported necessities
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import cv2
from matplotlib.image import imread
from IPython.display import clear_output
import time
import PIL.Image
from io import StringIO
import IPython.display
import numpy as np
from io import BytesIO
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Conv2D, MaxPool2D, Dropout, Flatten, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping
#Data Paths
data_dir = 'C:\\Users\\User\\Desktop\\DATAWeather'
test_path = data_dir+'\\Test\\'
train_path = data_dir+'\\Train\\'
#Variable to resize all of the images
image_shape = (224,224,3) #224*224*3 = 150528 Data Points : thats why we need image batch
#Apply a generator so it does not always get the same format of picture (recognizes different things)
image_gen = ImageDataGenerator(rotation_range=20, width_shift_range=0.1, height_shift_range=0.1, rescale=1/255, shear_range=0.1, zoom_range=0.1,horizontal_flip=True,fill_mode='nearest')
#setting up a base convolutional layer
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(3,3),input_shape=image_shape, activation='relu',))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(filters=64, kernel_size=(3,3),input_shape=image_shape, activation='relu',))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(filters=64, kernel_size=(3,3),input_shape=image_shape, activation='relu',))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy']), #model.summary()
#Create an early EPOCH stoppage based on the validation loss based off TWO epochs
early_stop=EarlyStopping(monitor='val_loss', patience=2)
#TRAINING MODEL - use two to the power
#TWO generators
train_image_gen = image_gen.flow_from_directory(train_path, target_size=image_shape[:2], color_mode='rgb', batch_size = batch_size, class_mode='categorical', shuffle=True)
test_image_gen = image_gen.flow_from_directory(test_path, target_size=image_shape[:2], color_mode='rgb', batch_size = batch_size, class_mode='categorical', shuffle=False)
results = model.fit_generator(train_image_gen, epochs=1, validation_data=test_image_gen, callbacks=[early_stop])
**def showarray(a, fmt='jpeg'):
f = BytesIO()
PIL.Image.fromarray(a).save(f, fmt)
def get_frame(cam):
# Capture frame-by-frame
ret, frame = cam.read()
#flip image for natural viewing
frame = cv2.flip(frame, 1)
return frame
cam = cv2.VideoCapture(0)
def make_1080p():
cam.set(3, 224)
cam.set(4, 224)
def change_res(width, height):
cam.set(3, width)
cam.set(4, height)
change_res(224, 224)
t1 = time.time()
frame = get_frame(cam)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
t2 = time.time()
print("%f FPS" % (1/(t2-t1)))
# Display the frame until new frame is available
Weather_Prediction_Cell = (frame)
#Weather_Prediction_Cell /= 255
except KeyboardInterrupt:
print("Stream stopped")
Keras model has a method called "predict". It takes a single np.array or a list of np.arrays as input (which should have the exact same shape as your neural net. input, including the batch part: (batch_count, width, height, channels) for example). You input this to model.predict, then it returns you back the result as an np.array again, with the shape your neural network output layer has. I'm not used to webcam applications with opencv, but if you get the frame data in np.array somehow, you can feed it to your neural net. as well. Just be sure about its shape, and reshape it if needed.
I'm using a StackingClassifier on 5 scikit-learn classifiers and a Keras one. It doesn't seem to recognize the Keras one as a classifier however.
Relevant code:
from tensorflow.keras import layers
from tensorflow import keras
from keras.constraints import maxnorm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import metrics
import joblib
from joblib import parallel_backend
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import sklearn
from sklearn.ensemble import StackingClassifier
def create_model ():
# create model
model = Sequential()
model.add(Dense(best_neurons, input_shape=(X_train.shape[1],), kernel_initializer=best_init_mode, activation='relu',
optimizer= tf.keras.optimizers.RMSprop(lr=best_learn_rate)
model.add(Dense(units = 1, kernel_initializer=best_init_mode, activation = 'sigmoid')) # Compile model
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
return model
NN_clf=KerasClassifier(build_fn=create_model, epochs=best_epochs, batch_size= best_batch_size)
RF_clf =RandomForestClassifier(max_depth=best_max_depth_rf, n_estimators=best_n_estimators_rf,
min_samples_leaf=best_min_samples_leaf_rf, max_features=best_max_features_rf,
class_weight=best_class_weight_rf, max_samples=best_max_samples_rf,
random_state=42, oob_score=True)
KN_clf =KNeighborsClassifier(n_neighbors=best_n_neighbors, p=best_p, leaf_size=best_leaf_size )
#DT_clf = DecisionTreeClassifier(max_depth=best_max_depth_dt, min_samples_leaf=best_min_samples_leaf_dt)
SV_clf = SVC(gamma=best_gamma_sv, C=best_c_sv, kernel=best_kernel_sv, random_state=42, probability=True)
GBC_clf = xgb.XGBClassifier(learning_rate=best_learning_rate_gbc, random_state=42, colsample_bytree=best_colsample_bytree_gbc,
max_depth=best_max_depth_gbc, n_estimators=best_n_estimators_gbc,
gamma=best_gamma_gbc, subsample=best_subsample_gbc)
EX_clf= ExtraTreesClassifier(max_depth=best_max_depth_ex, n_estimators=best_n_estimators_ex,
min_samples_leaf=best_min_samples_leaf_ex, max_features=best_max_features_ex,
warm_start=False, oob_score=True, bootstrap=True, random_state=42)
LR_clf=LogisticRegression(random_state=42, solver=best_solver, penalty=best_penalty, class_weight=best_class_weight, C=best_log_C)
estimators= [('RF', RF_clf), ('GBC', GBC_clf), ('EX', EX_clf), ('LR',LR_clf), ('KN', KN_clf),
('SV', SV_clf), ('NN', NN_clf) ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1)
clf.fit(X_train, y_train.values.ravel())
print("Stacking model score: %.3f" % clf.score(X_test, y_test.values.ravel()))
ValueError Traceback (most recent call last)
<ipython-input-41-272df6aa838e> in <module>
2 ('SV', SV_clf), ('NN', NN_clf) ]
3 clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1)
----> 4 clf.fit(X_train, y_train.values.ravel())
5 print("Stacking model score: %.3f" % clf.score(X_test, y_test.values.ravel()))
~\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py in fit(self, X, y, sample_weight)
411 self._le = LabelEncoder().fit(y)
412 self.classes_ = self._le.classes_
--> 413 return super().fit(X, self._le.transform(y), sample_weight)
415 #if_delegate_has_method(delegate='final_estimator_')
~\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py in fit(self, X, y, sample_weight)
129 # all_estimators contains all estimators, the one to be fitted and the
130 # 'drop' string.
--> 131 names, all_estimators = self._validate_estimators()
132 self._validate_final_estimator()
~\Anaconda3\lib\site-packages\sklearn\ensemble\_base.py in _validate_estimators(self)
247 raise ValueError(
248 "The estimator {} should be a {}.".format(
--> 249 est.__class__.__name__, is_estimator_type.__name__[3:]
250 )
251 )
ValueError: The estimator KerasClassifier should be a classifier.
I am using Sci-kit learn versions 2.2, TF ver 2.x. I've seen a similar error here but didn't want to rewrite my code and use the MLextend library.
This problem is because of the similar issue reported here for VotingClassifier.
The solution is just adding this _estimator_type='classifier' to KerasClassifier.
Note: please provide just the minimum code to reproduce your issue.
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
import numpy as np
from tensorflow.keras import layers
from tensorflow import keras
from keras.constraints import maxnorm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import metrics
import joblib
from joblib import parallel_backend
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import sklearn
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
def create_model ():
# create model
model = Sequential()
model.add(Dense(20, input_dim=20, activation='relu'))
optimizer= keras.optimizers.RMSprop(lr=0.001)
model.add(Dense(units = 1, activation = 'sigmoid')) # Compile model
optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
return model
NN_clf=KerasClassifier(build_fn=create_model, epochs=15, batch_size= 32)
NN_clf._estimator_type = "classifier"
RF_clf =RandomForestClassifier(random_state=42, oob_score=True)
KN_clf =KNeighborsClassifier()
SV_clf = SVC(random_state=42, probability=True)
EX_clf= ExtraTreesClassifier(random_state=42)
estimators= [('RF', RF_clf), ('EX', EX_clf), ('LR',LR_clf), ('KN', KN_clf),
('SV', SV_clf), ('NN', NN_clf) ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
X, y = make_classification()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train , y_test = train_test_split(X, y, test_size=0.3)
clf.fit(X_train, y_train)
print("Stacking model score: %.3f" % clf.score(X_test, y_test))
# Stacking model score: 0.967
I run the following code in Google Colab(with GPU):
import random
import numpy as np
from numpy.random import seed
from tensorflow import set_random_seed
import pandas as pd
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Flatten, Dense, Lambda, SimpleRNN
from keras.optimizers import *
from keras.utils import np_utils
from keras.initializers import *
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, auc, precision_recall_curve
from sklearn.metrics import confusion_matrix
from keras.callbacks import EarlyStopping
from keras import backend as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
##Loading dataset train and validation files, the files are same for every run
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
def make_model():
model = Sequential()
model.add(Conv2D(10,(5,5), kernel_initializer=glorot_uniform(seed=1), input_shape = (22,10,1), use_bias = True, activation = "relu", strides = 1, padding = "valid"))
model.add(Dense(20, kernel_initializer=glorot_uniform(seed=1), activation = "relu"))
model.add(Lambda(lambda x: tf.expand_dims(x, axis=1)))
model.add(SimpleRNN(20, kernel_initializer=glorot_uniform(seed=1), activation="relu",return_sequences=False))
model.add(Dense(1, kernel_initializer=glorot_uniform(seed=1), activation="sigmoid"))
opti = SGD(lr = 0.01)
model.compile(loss = "binary_crossentropy", optimizer = opti, metrics = ["accuracy"])
return model
model = make_model()
model.fit(x_train, y_train, validation_data = (x_validation,y_validation), epochs = 50, batch_size = 20, verbose = 2, callbacks=[es])
Despite setting all seed values, my prediction results of the model are different on subsequent runs. The training and testing of the model happens in the same Colab cell.
You are dealing with floating point numbers that are multiplied and added on different threads and can therefore happen in different order. Floating point additions and multiplications are not commutative. See What Every Computer Scientist Should Know About Floating-Point Arithmetic.
I have created a project using keras and tensorflow. I used the NSL KDD dataset and coded my project in python. I also used the SGD optimizer.
I would like to fit a model then evaluate it and then check its accuracy. (So I can compare it to the results with machine learning).
Here is my complete code below, please review it.
import tensorflow as tf
from keras import backend as K
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import tag_constants, signature_constants, signature_def_utils_impl
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
import numpy as np
sess = tf.Session()
model_version = "2"
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('KDD_Dataset.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 41:42].values
# Encoding categorical data X
from sklearn.preprocessing import LabelEncoder
labelencoder_X = LabelEncoder()
X[:,0] = labelencoder_X.fit_transform(X[:,0])
X[:,1] = labelencoder_X.fit_transform(X[:,1])
X[:,2] = labelencoder_X.fit_transform(X[:,2])
from sklearn.preprocessing import OneHotEncoder
onehotencoder_0 = OneHotEncoder(categorical_features=[0])
onehotencoder_1 = OneHotEncoder(categorical_features=[1])
onehotencoder_2 = OneHotEncoder(categorical_features=[2])
X = onehotencoder_0.fit_transform(X).toarray()
X = onehotencoder_1.fit_transform(X).toarray()
X = onehotencoder_2.fit_transform(X).toarray()
# Encoding categorical data y
from sklearn.preprocessing import LabelEncoder
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)
# Splitting the dataset into the Training set and Test set
#from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size = 0.2,
random_state = 0)
# create the model
model = Sequential()
model.add(Dense(41, input_dim=8, init='uniform', activation='relu'))
model.add(Dense(20, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
# compile the model
model.compile(loss='binary_crossentropy', optimizer=sgd,metrics=['accuracy'])
model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=200, batch_size=5, verbose=0)
See Dense(41, input_dim=8, init='uniform', activation='relu')
The model you defined with 8 features, however your inputs have 45 features. They are not matching. You have to either make your model with 45 features to match the input, or cut the length of input feature to 8 to match your model.
Change line
model.add(Dense(41, input_dim=8, init='uniform', activation='relu'))
model.add(Dense(42, input_dim=42, init='uniform', activation='relu'))
optimizer=sgd to optimizer='sgd'
This is a keras model for sentiment analysis i need to convert it to tensorflow i couldn’t build embedding layer with tensorflow and using confusion matrix to evaluate this model? And I asked if tf-learn is the-same as tensorflow
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import set_random_seed
from nltk.tokenize import word_tokenize
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers.embeddings import Embedding
from keras.layers import Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.layers import Dense,Activation
from keras.layers import Dropout
from keras.callbacks import TensorBoard, ModelCheckpoint
import re
import string
import collections
import time
seed = 10
Read CSV Files
df=pd.read_csv('tweets-pos-neg.csv', usecols = ['text','airline_sentiment'])
df = df.reindex(['text','airline_sentiment'], axis=1) #reorder columns
df=df.apply(lambda x: x.astype(str).str.lower())
Normalize Text
def normalize(text):
text= re.sub(r"http\S+", r'', text)
text= re.sub(r"#\S+", r'', text)
punctuation = re.compile(r'[!"#$%&()*+,-./:;<=>?#[\]^_`{|}~|0-9]')
text = re.sub(punctuation, ' ', text)
text= re.sub(r'(.)\1\1+', r'\1', text)
return text
Cleaned Text
def prepareDataSets(df):
for index, r in df.iterrows():
text= normalize(r['text'])
return df_sentences
Split reviews to tokens
max_features = 50000
tokenizer = Tokenizer(num_words=max_features, split=' ')
#convert review tokens to integers
X_seq = tokenizer.texts_to_sequences(X)
Padding Sequence to make all vectors with the same size according to MAX-length of reviews
X_pad = pad_sequences(X_seq,maxlen=seq_len)
Convert target value from string to integer
X_train, X_test, Y_train, Y_test = train_test_split(X_pad,Y_le_oh, test_size
= 0.33, random_state = 42)
X_train, X_Val, Y_train, Y_Val = train_test_split(X_train,Y_train, test_size
= 0.1, random_state = 42)
Create the model
embedding_vecor_length = 32 #no of vector columns
model_cnn = Sequential()
model_cnn.add(Embedding(max_features, embedding_vecor_length,
model_cnn.add(Conv1D(filters=100, kernel_size=2, padding='valid',
activation='relu', strides=1))
model_cnn.add(Dense(256, activation='relu'))
model_cnn.add(Dense(2, activation='softmax'))
opt=tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
model_cnn.compile(loss='binary_crossentropy', optimizer=opt, metrics=
Evaluate model
history=model_cnn.fit(X_train, Y_train, epochs=3, batch_size=32, callbacks=[tensorboard], validation_data=(X_Val, Y_Val))
scores = model_cnn.evaluate(X_test, Y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[-1]*100))
If you just need to use Tensorflow APIs to train / evaluate, you can build an Estimator using model_to_estimator function.
Here's the documentation with an example.