I have a problem with loading self pretrained keras model.
When i make predictions directly after training model everything works fine.
My code:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
import matplotlib.pyplot as plt
import numpy as np
import os
from keras.models import load_model
"""
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from tensorflow.keras.models import load_model
"""
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import matplotlib.pyplot as plt
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# Reshaping the array to 4-dims so that it can work with the Keras API
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)
# Making sure that the values are float so that we can get decimal points after division
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
# Normalizing the RGB codes by dividing it to the max RGB value.
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print('Number of images in x_train', x_train.shape[0])
print('Number of images in x_test', x_test.shape[0])
# Importing the required Keras modules containing model and layers
def train():
# Creating a Sequential Model and adding the layers
model = Sequential()
model.add(Conv2D(28, kernel_size=(3,3), input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten()) # Flattening the 2D arrays for fully connected layers
model.add(Dense(128, activation=tf.nn.relu))
model.add(Dropout(0.2))
model.add(Dense(10, activation=tf.nn.softmax))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x=x_train,y=y_train, epochs=10)
print(model.summary())
model.save('x.h5')
def eval():
model = load_model('x.h5')
scores = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
train()
eval()
And i got an error:
ValueError: Unknown activation function:softmax_v2
I've tried to use different tensorflow versions (1.15, 2.0, 1.5) but this changes nothing.
Any ideas what is wrong with it?
Edit:
This problem occurs only when i try to load model.
Related
I am currently studying how CNNs can be used in text classification and found some code on stack overflow that had worked with the use of a keras embedding layer.
I ran the code with the keras embedding but now want to test out what would happen with a pre-trained embedding, I have downloaded the word2vec api from gensim but dont know how to adapt the code from there?
My question is how can I replace the keras embedding layer with a pre-trained embedding like the word2vec model or Glove?
heres is the code
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Convolution1D, Flatten, Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.callbacks import TensorBoard
# Using keras to load the dataset with the top_words
top_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
# Pad the sequence to the same length
max_review_length = 1600
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
# Using embedding from Keras
embedding_vecor_length = 300
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
# Convolutional model (3x conv, flatten, 2x dense)
model.add(Convolution1D(64, 3, padding='same'))
model.add(Convolution1D(32, 3, padding='same'))
model.add(Convolution1D(16, 3, padding='same'))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(180,activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))
# Log to tensorboard
tensorBoardCallback = TensorBoard(log_dir='./logs', write_graph=True)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=3, callbacks=[tensorBoardCallback], batch_size=64)
# Evaluation on the test set
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
This reads the text file containing the weights, stores the words and their weights in a dictionary, then maps them into a new matrix using the vocabulary of your fit tokenizer.
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Convolution1D, Flatten, Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.callbacks import TensorBoard
from tensorflow import keras
import itertools
import numpy as np
# Using keras to load the dataset with the top_words
top_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
word_index = keras.datasets.imdb.get_word_index()
embedding_vecor_length = 300 # same as the embeds to be loaded below
embeddings_dictionary = dict()
glove_file = open('./embeds/glove.6B.300d.txt', 'rb')
for line in glove_file:
records = line.split() # seperates each line by a white space
word = records[0] # the first element is the word
vector_dimensions = np.asarray(
records[1:], dtype='float32') # the rest are the weights
# storing in dictionary
embeddings_dictionary[word] = vector_dimensions
glove_file.close()
# len_of_vocab = len(word_index)
embeddings_matrix = np.zeros((top_words, embedding_vecor_length))
# mapping to a new matrix, using only the words in your tokenizer's vocabulary
for word, index in word_index.items():
if index>=top_words:
continue
# the weights of the individual words in your vocabulary
embedding_vector = embeddings_dictionary.get(bytes(word, 'utf-8'))
if embedding_vector is not None:
embeddings_matrix[index] = embedding_vector
# Pad the sequence to the same length
max_review_length = 1600
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
# Using embedding from Keras
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length,
input_length=max_review_length, name="embeddinglayer", weights=[embeddings_matrix], trainable=True))
# Convolutional model (3x conv, flatten, 2x dense)
model.add(Convolution1D(64, 3, padding='same'))
model.add(Convolution1D(32, 3, padding='same'))
model.add(Convolution1D(16, 3, padding='same'))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(180, activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
# Log to tensorboard
tensorBoardCallback = TensorBoard(log_dir='./logs', write_graph=True)
model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=3, callbacks=[
tensorBoardCallback], batch_size=64)
# Evaluation on the test set
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
i have imported data as following
from keras.datasets import cifar10
import matplotlib.pyplot as plt
(X_train,y_train),(X_test,y_test) =cifar10.load_data()
for i in range(9):
plt.subplot(330+1+i)
plt.imshow(X_train[i])
plt.show()
it works fine as it is given in result :
next i have defined following convolution neural network structure
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Convolution2D
from tensorflow.keras.layers import MaxPooling2D
from keras.utils import np_utils
model =Sequential()
model.add(Convolution2D(filters=32,kernel_size=3,input_shape=(3,32,32),padding='same',activation='relu',data_format='channels_first',kernel_constraint=max_norm(3)))
model.add(Dropout(0.2))
model.add(Convolution2D(filters=32,kernel_size=3,activation='relu',padding='same',kernel_constraint=max_norm(3)))
model.add(MaxPooling2D(pool_size=2))
model.add(Flatten())
model.add(Dense(units=512,activation='relu',kernel_constraint=max_norm(3)))
model.add(Dropout(0.5))
model.add(Dense(num_classes,activation='softmax'))
i have done all necessary compile procedures
Epochs=25
lrate =0.01
decay =lrate /epochs
sgd =SGD(learning_rate=decay,momentum=0.9,nesterov=False)
model.compile(loss='categorical_crossentropy' , optimizer='sgd', metrics=['accuracy' ])
print(model.summary())
and result is here:
after runing following code
model.fit(X_train, y_train, validation_data=(X_test, y_test),epochs=Epochs,batch_size=32, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
it gives me error :
ValueError: Input 0 of layer sequential_7 is incompatible with the layer: expected axis 1 of input shape to have value 3 but received input with shape [None, 32, 32, 3]
please help me to fix this error
The input shape in your first convolutional layer should be input_shape=(32,32,3) instead of input_shape=(3,32,32). You should also set the data_format to the default "channels_last". The reason for setting it up like this is that the shape of cifar10 images is (32,32,3) and not (3,32,32)
you should always be careful about the input shape of the first layer of CNN using Conv2D. In tensorflow the input shape parameters are given as (batch_size, img_h, img_w, channels).
In contrary to pytorch it is (batch_size, channels, img_h, img_w). So, it should be (32,32,3) not (3, 32, 32).
I am working on a Speaker recognition problem, I have very big number of classes so I need to use, tf.nn.sampled_softmax_loss to speed up training time. The problem is I am using Keras with Tensorflow as Backend, but Keras doesnt implement Sampled_softmax so I need to use Tensorflow function, but its unclear which should be the inputs of the tf.nn.sampled_softmax_loss() function.
My model and input are as below:
from preprocess import *
import keras
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D,AveragePooling2D
from keras.utils import to_categorical
from keras.utils import plot_model
from keras.models import load_model
from keras import regularizers
import numpy as np
from keras.callbacks import EarlyStopping
from keras import metrics
import os
import tensorflow as tf
from tensorflow.python.framework import dtypes
from importance_sampling.training import ImportanceTraining
epochs = 50
batch_size = 100
verbose = 1
labels= get_labels(big_numpy_files_path)
num_classes = len(labels)
#save_data_as_numpy_array(max_len = feature_dim_1, max_len2 = feature_dim_2,origin_path=data_set_path,destination_path=numpy_files_path)
#Get X & Y
X_train, X_valid, X_test ,y_train,y_valid,y_test = get_x_y_data(split_ratio=0.8, random_state=42,maxsamples=20,path=big_numpy_files_path)
#reshape X for input into CNN
X_train, X_valid, X_test = reshape_prepare_for_input(X1=X_train, X2=X_valid, X3=X_test,channel=1)
#Dimensions
dim_1 = X_train.shape[1]
dim_2 = X_train.shape[2]
dim_3 = X_train.shape[3]
#one hot encoding of Y
y_train_hot = to_categorical(y_train)
y_valid_hot = to_categorical(y_valid)
y_test_hot = to_categorical(y_test)
#Model
model = Sequential()
model.add(Conv2D(128, kernel_size=(6, 6),strides=2, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3])))
model.add(Conv2D(64, kernel_size=(2, 2),strides=1, activation='relu'))
model.add(Conv2D(32, kernel_size=(2, 2), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(256, activation='relu', use_bias=True,kernel_regularizer=regularizers.l2(0.01)))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu',use_bias=True,kernel_regularizer=regularizers.l2(0.02)))
model.add(Dropout(0.3))
model.add(Dense(256, activation='relu',use_bias=True,kernel_regularizer=regularizers.l2(0.02)))
model.add(Dropout(0.3))
model.add(Dense(num_classes, activation='softmax'))
loss=keras.losses.categorical_crossentropy
model.compile(loss=loss,optimizer='adamax',metrics=['accuracy'])
earlystopping = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=15, verbose=verbose, mode='auto')
model.fit(X_train, y_train_hot,batch_size=batch_size,callbacks=[earlystopping],epochs=epochs, verbose=verbose, validation_data=(X_valid, y_valid_hot))
If I want to replace the above loss function with Samled softmax, If I'm trying to do something like below, what should be the inputs give my above architecture and should I add tf.reduce_mean?
for more details on the code, is https://github.com/selimelawwa/Speaker_Verification
I am using keras from tensorflow backend to detect potholes and garbage in my trained model. The output generated for detecting potholes or garbage works fine but when i give a random image of a car or bike or cat or human or building it identifies everything as a garbage class.what should i do now? what should be the optimal output for such cases. I am providig my code here.
#train
import numpy as np
np.random.seed(123) # for reproducibility
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from dataset_pothole import pothole
from keras.models import model_from_json
# 4. Load pre-shuffled MNIST data into train and test sets
(X_train, y_train), (X_test, y_test) = pothole.load_data()
# 5. Preprocess input data
X_train = X_train.reshape(X_train.shape[0], 50, 50, 3)
X_test = X_test.reshape(X_test.shape[0], 50, 50, 3)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
# 6. Preprocess class labels
Y_train = np_utils.to_categorical(y_train, 2)
Y_test = np_utils.to_categorical(y_test, 2)
# 7. Define model architecture
model = Sequential()
model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(50, 50, 3)))
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))
# 8. Compile model
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
# 9. Fit model on training data
model.fit(X_train, Y_train,
batch_size=32, nb_epoch=20, verbose=1)
# 10. Evaluate model on test data
score = model.evaluate(X_test, Y_test, verbose=0)
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")
print('Test loss: ', score[0])
print('Test accuracy: ', score[1])
#evaluation
import numpy as np
np.random.seed(123) # for reproducibility
import keras
from keras.utils import np_utils
from keras.models import model_from_json
import os
from PIL import Image
from numpy import *
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
img = input("Please enter the test filename:")
test = array(array(Image.open(img)).flatten())
print(test.shape)
X_test = test.reshape((1, 50, 50, 3))
print(X_test.shape)
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
#loaded_model.compile(loss='categorical_crossentropy',
# optimizer='adam',
# metrics=['accuracy'])
prediction = loaded_model.predict_classes(X_test)
print(prediction)
print(loaded_model.predict(X_test))
if prediction==[1]:
print("Pothole")
elif prediction==[0]:
print("Garbage")
else:
print("Invalid Image!")
I'm converting student-teacher model in below url to keras one.
https://github.com/chengshengchan/model_compression/blob/master/teacher-student.py
How can I give input to two model(student, teacher) and get one output from only student in keras?
I'll set teacher's all tensors with trainable=false, and loss function as difference between student and teacher's output like below :
tf_loss = tf.nn.l2_loss(teacher - student)/batch_size
As I know, it is possible to give input to only one model when defining model.fit. But in this cases, I should it to both of teacher and student model.
Thank in advance!
Below is very simple student-teacher model in keras.
I hope it might be helpful to someone like me.
Good job!
import keras
from keras.datasets import mnist
from keras.layers import Input, Embedding, LSTM, Dense, Lambda
from keras.models import Model
import numpy as np
from keras.utils import np_utils
from keras.layers.core import Dense, Dropout, Activation
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
from keras.models import Sequential
from keras.layers import Dense, Merge
from keras.optimizers import SGD, Adam, RMSprop
batch_size = 128
nb_classes = 10
nb_epoch = 3
teacher = Sequential()
teacher.add(Dense(10, input_shape=(784,)))
teacher.add(Dense(10))
teacher.add(Activation('softmax'))
teacher.summary()
teacher.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
history = teacher.fit(X_train, Y_train,
batch_size=batch_size, nb_epoch=nb_epoch,
verbose=1, validation_data=(X_test, Y_test))
score = teacher.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
for i in range(len(teacher.layers)):
setattr(teacher.layers[i], 'trainable', False)
Y_train = np.zeros((60000, 10))
student = Sequential()
student.add(Dense(10, input_dim=784))
student.add(Activation('softmax'))
student.compile(loss='mean_squared_error', optimizer='Adam', metrics=['accuracy'])
from keras.layers import *
def negativeActivation(x):
return -x
negativeRight = Activation(negativeActivation)(student.output)
diff = Add()([teacher.output,negativeRight])
model = Model(inputs=[teacher.input, student.input], outputs=[diff])
model.compile(loss='mean_squared_error', optimizer='Adam', metrics=['acc'])
model.summary(line_length=150)
model.fit([X_train, X_train], [Y_train], batch_size=128, nb_epoch=5)
print student.evaluate(X_test, Y_test)
The only implementation I have seen in Keras involves building 2 separate functions which either widen or deepen weight layers from the teacher model as initial weights for the student model.
I am not sure if it is precisely that Hinton et al. (2015) distillation to be honest, but it is teacher-student.
https://github.com/fchollet/keras/issues/3491