I need help making my Neural Network make itself smarter - numpy

Recently i followed a video from a channel from NeuralNine on creating a AI Chatbot, i Followed the video and vaguely understand the code, i'm a beginner to coding and didnt understand all of it however i managed to resolve the multiple problems that arose on my own, However the training code doesnt make the bot smarter i essentially want a way to load the previous training model and then run the code again and improve the accuracy each time. However at the moment it seems to just be overwriting the previous training model and not improving itself (i think please correct me if im wrong), i just cant see any code that is overwriting and making the model "chatbotmodel.h5" "smarter".
Here is "my" (NeuralNine's) Code:
|Training.py|
import random
import json
import pickle
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
lemmatizer = WordNetLemmatizer()
intents = json.loads(open('intents.json').read())
words = []
classes = []
documents = []
ignore_letters = ['?', ',', '.', '!', "'"]
for intent in intents['intents']:
for pattern in intent['patterns']:
word_list = nltk.word_tokenize(pattern)
words.extend(word_list)
documents.append((word_list, intent['tag']))
if intent['tag'] not in classes:
classes.append(intent['tag'])
words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_letters]
words = sorted(set(words))
classes = sorted(set(classes))
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))
training = []
output_empty = [0] * len(classes)
for document in documents:
bag = []
word_patterns = document[0]
word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
for word in words:
bag.append(1) if word in word_patterns else bag.append(0)
output_row = list(output_empty)
output_row[classes.index(document[1])] = 1
training.append([bag, output_row])
random.shuffle(training)
training = np.array(training)
train_x = list(training[:, 0])
train_y = list(training[:, 1])
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics='accuracy')
hist = model.fit(np.array(train_x), np.array(train_y), epochs=300, batch_size=8, verbose=1)
model.save('chatbotmodel.h5', hist)
print("Done")

Your problem is not the code. One fundamental principle to understand in machine learning is that training more your model doesn't necessary make it more efficient.
I suggest that you take a look at overfitting problem. The data distribution is also very important.
Moreover, if NeuralNine didn't train more the model, maybe there are some reasons.

Related

How to use the pretrained model in an application?

I dont understand how to use the trained model.
For example I trained the model using the code from https://towardsdatascience.com/what-is-a-recurrent-neural-network-and-implementation-of-simplernn-gru-and-lstm-models-in-keras-f7247e97c405.
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
imdb, info = tfds.load("imdb_reviews",with_info=True, as_supervised=True)
train_data, test_data = imdb['train'], imdb['test']
training_sentences = []
training_labels = []
testing_sentences = []
testing_labels = []
for s,l in train_data:
training_sentences.append(str(s.numpy()))
training_labels.append(l.numpy())
for s,l in test_data:
testing_sentences.append(str(s.numpy()))
testing_labels.append(l.numpy())
training_labels_final = np.array(training_labels)
testing_labels_final = np.array(testing_labels)
vocab_size = 10000
embedding_dim=16
max_length = 120
trunc_type= 'post'
oov_tok="<OOV>"
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded = pad_sequences(sequences, maxlen=max_length,
truncating = trunc_type)
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length)
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim,
input_length=max_length),
tf.keras.layers.SimpleRNN(32),
tf.keras.layers.Dense(10, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.summary()
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
num_epochs=30
history=model.fit(padded, training_labels_final, epochs=num_epochs, validation_data = (testing_padded, testing_labels_final))
It is all from this link.
Then I added the last line of code
model.save('name.model')
Then I open the new script where I want to use this model. I suggests that I will put a sentence ("He is a bad cat.") and I will get 0 or 1 depending wether it represents good sentiment or bad sentiment (I think I must get 0).
import tensorflow as tf
from tensorflow import keras
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
model=tf.keras.models.load_model('name.model')
print(model.output_shape)
prediction=model.predict(["He is a bad cat."])
print(prediction)
And I get error.
QUESTION 1: In which format I must put this sentance? If I give this model to my friend how does he know in which format he must put the sentence to this model?
QUESTION 2: I noticed that the output will have (None,1) format. But I hoped to see one number (0 or 1) but not strange vector. What is going and how to get 0 or 1?
Before you make predictions you have to convert your text to input format.
For example, to make a prediction of "He is a bad cat." you have to tokenize the sentence.
#tokenizing the prediction data
sentence=['He is a bad cat.']
sequences=tokenizer.texts_to_sequences(sentence)
Then you have to add padding to it
#padding the tokenized sentence
padded=pad_sequences(sequences,maxlen=max_length,truncating=trunc_type)
Then you can make predictions on the padded sequence
model1.predict(padded) #output: array([[0.00079787]], dtype=float32)
The output will be a float not exactly 0 or 1 because the model will give the value based on the given sentence close to the negative or positive side on the embedding.
For more details please refer to this working gist. Thank You.

Strange results from a neural network build using Keras

I build an sentiment classifier using Keras to predict if a sentence has a sentiment score of 1, 2, 3, 4 or 5. However I am getting some strange results. I will first show my code:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
import numpy as np
# the data only reflects the structure of the actual data
# the real data has way larger text and more entries
X_train = ['i am glad i heard about that', 'that is one ugly bike']
y_train = pd.Series(np.array([1, 4])) # pandas series
X_test = ['that hurted me']
y_test = pd.Series(np.array([1, 4])) # pandas series
# tokenizing
tokenizer = Tokenizer(num_words = 5)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
# performing some padding
padding_len = 4
X_train_seq_padded = pad_sequences(X_train_seq, maxlen = padding_len)
X_test_seq_padded = pad_sequences(X_test_seq, maxlen = padding_len)
# building the model
model = Sequential()
model.add(Dense(16, input_dim = padding_len, activation = 'relu', name = 'hidden-1'))
model.add(Dense(16, activation = 'relu', name = 'hidden-2'))
model.add(Dense(16, activation = 'relu', name = 'hidden-3'))
model.add(Dense(6, activation='softmax', name = 'output_layer'))
# compiling the model
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])
# training the model
callbacks = [EarlyStopping(monitor = 'accuracy', patience = 5, mode = 'max')]
my_model = model.fit(X_train_seq_padded, to_categorical(y_train), epochs = 100, batch_size = 1000, callbacks = callbacks, validation_data = (X_test, to_categorical(y_test)))
Using the actual data I keep getting results around 0.67xx (xx random numbers) which are reached after 1/2 epochs, no matter what changes to the code I introduce (and some are extreme).
I tried changing the padding to 1, 10, 100, 1000.
I tried removing the layer hidden-2 and hidden-3.
I tried adding stop word removal before tokenizing.
I tried using the tahn activation function in the hidden layers.
I used the sgd optimizer.
Example output of one setup:
Now my question is, is there something wrong with my code or are these actual possible results?

degraded accuracy performance with overfitting when downgrading from tensorflow 2.3.1 to tensorflow 1.14 or 1.15 on multiclass categorization

I made a script in tensorflow 2.x but I had to downconvert it to tensorflow 1.x (tested in 1.14 and 1.15). However, the tf1 version performs very differently (10% accuracy lower on the test set). See also the plot for train and validation performance (diagram is attached below).
Looking at the operations needed for the migration from tf1 to tf2 it seems that only the Adam learning rate may be a problem but I'm defining it explicitly tensorflow migration
I've reproduced the same behavior both locally on GPU and CPU and on colab. The keras used was the one built-in in tensorflow (tf.keras). I've used the following functions (both for train,validation and test), using a sparse categorization (integers):
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
horizontal_flip=horizontal_flip,
#rescale=None, #not needed for resnet50
preprocessing_function=None,
validation_split=None)
train_dataset = train_datagen.flow_from_directory(
directory=train_dir,
target_size=image_size,
class_mode='sparse',
batch_size=batch_size,
shuffle=True)
And the model is a simple resnet50 with a new layer on top:
IMG_SHAPE = img_size+(3,)
inputs = Input(shape=IMG_SHAPE, name='image_input',dtype = tf.uint8)
x = tf.cast(inputs, tf.float32)
# not working in this version of keras. inserted in imageGenerator
x = preprocess_input_resnet50(x)
base_model = tf.keras.applications.ResNet50(
include_top=False,
input_shape = IMG_SHAPE,
pooling=None,
weights='imagenet')
# Freeze the pretrained weights
base_model.trainable = False
x=base_model(x)
# Rebuild top
x = GlobalAveragePooling2D(data_format='channels_last',name="avg_pool")(x)
top_dropout_rate = 0.2
x = Dropout(top_dropout_rate, name="top_dropout")(x)
outputs = Dense(num_classes,activation="softmax", name="pred_out")(x)
model = Model(inputs=inputs, outputs=outputs,name="ResNet50_comp")
optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
model.compile(optimizer=optimizer,
loss="sparse_categorical_crossentropy",
metrics=['accuracy'])
And then I'm calling the fit function:
history = model.fit_generator(train_dataset,
steps_per_epoch=n_train_batches,
validation_data=validation_dataset,
validation_steps=n_val_batches,
epochs=initial_epochs,
verbose=1,
callbacks=[stopping])
I've reproduced the same behavior for example with the following full script (applied to my dataset and changed to adam and removed intermediate final dense layer):
deep learning sandbox
The easiest way to replicate this behavior was to enable or disable the following line on a tf2 environment with the same script and add the following line to it. However, I've tested also on tf1 environments (1.14 and 1.15):
tf.compat.v1.disable_v2_behavior()
Sadly I cannot provide the dataset.
Update 26/11/2020
For full reproducibility I've obtained a similar behaviour by means of the food101 (101 categories) dataset enabling tf1 behaviour with 'tf.compat.v1.disable_v2_behavior()'. The following is the script executed with tensorflow-gpu 2.2.0:
#%% ref https://medium.com/deeplearningsandbox/how-to-use-transfer-learning-and-fine-tuning-in-keras-and-tensorflow-to-build-an-image-recognition-94b0b02444f2
import os
import sys
import glob
import argparse
import matplotlib.pyplot as plt
import tensorflow as tf
# enable and disable this to obtain tf1 behaviour
tf.compat.v1.disable_v2_behavior()
from tensorflow.keras import __version__
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
# since i'm using resnet50 weights from imagenet, i'm using food101 for
# similar but different categorization tasks
# pip install tensorflow-datasets if tensorflow_dataset not found
import tensorflow_datasets as tfds
(train_ds,validation_ds),info= tfds.load('food101', split=['train','validation'], shuffle_files=True, with_info=True)
assert isinstance(train_ds, tf.data.Dataset)
print(train_ds)
#%%
IM_WIDTH, IM_HEIGHT = 224, 224
NB_EPOCHS = 10
BAT_SIZE = 32
def get_nb_files(directory):
"""Get number of files by searching directory recursively"""
if not os.path.exists(directory):
return 0
cnt = 0
for r, dirs, files in os.walk(directory):
for dr in dirs:
cnt += len(glob.glob(os.path.join(r, dr + "/*")))
return cnt
def setup_to_transfer_learn(model, base_model):
"""Freeze all layers and compile the model"""
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
def add_new_last_layer(base_model, nb_classes):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top
nb_classes: # of classes
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
#x = Dense(FC_SIZE, activation='relu')(x) #new FC layer, random init
predictions = Dense(nb_classes, activation='softmax')(x) #new softmax layer
model = Model(inputs=base_model.input, outputs=predictions)
return model
def train(nb_epoch, batch_size):
"""Use transfer learning and fine-tuning to train a network on a new dataset"""
#nb_train_samples = train_ds.cardinality().numpy()
nb_train_samples=info.splits['train'].num_examples
nb_classes = info.features['label'].num_classes
classes_names = info.features['label'].names
#nb_val_samples = validation_ds.cardinality().numpy()
nb_val_samples = info.splits['validation'].num_examples
#nb_epoch = int(args.nb_epoch)
#batch_size = int(args.batch_size)
def preprocess(features):
#print(features['image'], features['label'])
image = tf.image.resize(features['image'], [224,224])
#image = tf.divide(image, 255)
#print(image)
# data augmentation
image=tf.image.random_flip_left_right(image)
image = preprocess_input(image)
label = features['label']
# for categorical crossentropy
#label = tf.one_hot(label,101,axis=-1)
#return image, tf.cast(label, tf.float32)
return image, label
#pre-processing the dataset to fit a specific image size and 2D labelling
train_generator = train_ds.map(preprocess).batch(batch_size).repeat()
validation_generator = validation_ds.map(preprocess).batch(batch_size).repeat()
#train_generator=train_ds
#validation_generator=validation_ds
#fig = tfds.show_examples(validation_generator, info)
# setup model
base_model = ResNet50(weights='imagenet', include_top=False) #include_top=False excludes final FC layer
model = add_new_last_layer(base_model, nb_classes)
# transfer learning
setup_to_transfer_learn(model, base_model)
history = model.fit(
train_generator,
epochs=nb_epoch,
steps_per_epoch=nb_train_samples//BAT_SIZE,
validation_data=validation_generator,
validation_steps=nb_val_samples//BAT_SIZE)
#class_weight='auto')
#execute
history = train(nb_epoch=NB_EPOCHS, batch_size=BAT_SIZE)
And the performance on food101 dataset:
update 27/11/2020
It's possible to see the discrepancy also in the way smaller oxford_flowers102 dataset:
(train_ds,validation_ds,test_ds),info= tfds.load('oxford_flowers102', split=['train','validation','test'], shuffle_files=True, with_info=True)
Nb: the above plot shows confidences given by running the same training multiple times and evaluatind mean and std to check for the effects on random weights initialization and data augmentation.
Moreover I've tried some hyperparameter tuning on tf2 resulting in the following picture:
changing optimizer (adam and rmsprop)
not applying horizontal flipping aumgentation
deactivating keras resnet50 preprocess_input
Thanks in advance for every suggestion. Here are the accuracy and validation performance on tf1 and tf2 on my dataset:
Update 14/12/2020
I'm sharing the colab for reproducibility on oxford_flowers at the clic of a button:
colab script
I came across something similar, when doing the opposite migration (from TF1+Keras to TF2).
Running this code below:
# using TF2
import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50
fe = ResNet50(include_top=False, pooling="avg")
out = fe.predict(np.ones((1,224,224,3))).flatten()
sum(out)
>>> 212.3205274187726
# using TF1+Keras
import numpy as np
from keras.applications.resnet50 import ResNet50
fe = ResNet50(include_top=False, pooling="avg")
out = fe.predict(np.ones((1,224,224,3))).flatten()
sum(out)
>>> 187.23898954353717
you can see the same model from the same library on different versions does not return the same value (using sum as a quick check-up). I found the answer to this mysterious behavior in this other SO answer: ResNet model in keras and tf.keras give different output for the same image
Another recommendation I'd give you is, try using pooling from inside applications.resnet50.ResNet50 class, instead of the additional layer in your function, for simplicity, and to remove possible problem-generators :)

Keras predicting different output for same input image

am working on a classification problem for binary classes, I have finished the training and testing the model in single images now using the below code
import warnings
import time
from urllib.request import urlopen
import os
import urllib.request
start_time = time.time()
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=FutureWarning)
import numpy as np
from keras.preprocessing.image import img_to_array, load_img
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.applications.vgg16 import VGG16
import tensorflow as tf
import logging
logging.getLogger('tensorflow').disabled = True
img_size = 224
class PersonPrediction:
def __init__(self):
self.class_dictionary = np.load(
'class_indices_vgg.npy',
allow_pickle=True).item()
self.top_model_weights_path = 'v2/weights/bottleneck_fc_model_2020-10-10-05.h5'
self.num_classes = len(self.class_dictionary)
self.model = self.create_model(self.num_classes)
self.graph = tf.compat.v1.get_default_graph()
def create_model(self, num_of_cls):
model = Sequential()
vgg_model = VGG16(include_top=False, weights='imagenet', input_shape=(img_size, img_size, 3))
for layer in vgg_model.layers[:-4]:
layer.trainable = False
model.add(vgg_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
return model
def predict(self, path=None, file_name=None):
if path:
image_path = path
path = self.url_to_image(image_path)
else:
path = os.path.join('imgs', file_name)
print("[INFO] loading and preprocessing image...")
image = load_img(path, target_size=(224, 224))
image = img_to_array(image)
# important! otherwise the predictions will be '0'
image = image / 255
image = np.expand_dims(image, axis=0)
label_idx = self.model.predict_classes(image)[0][0]
probability = self.model.predict(image)[0]
inv_map = {v: k for k, v in self.class_dictionary.items()}
label = inv_map[label_idx]
return label, probability[0]
path = 'temp.jpg'
tax_model = PersonPrediction()
label, proba = tax_model.predict(
file_name='frame303.jpg')
print(label, proba)
Problem is I keep getting chaning predictions of both label and accuracy every time I rerun the code, am not sure what is causing that
There are a number of sources that create randomness in the results when training a model. First the weights are randomly initialized so your model is starting from a different point in N space (N is the number of trainable parameters). Second layers like dropout have randomness in terms of which nodes will be nodes will be selected. Some GPU processes particularly with multi-processing can also have some degree of randomness. I have seen a number of posts on getting repeatable results in tensorflow but I have not found one that seems to really work. In general though the results should be reasonably close if your model is working correctly and you run enough epochs. Now once the model is trained and you use it for predictions as long as you use the same trained model you should get identical prediction results.

Keras vs. TensorFlow code comparison sources

This isn't really a question that's code-specific, but I haven't been able to find any answers or resources.
I'm currently trying to teach myself some "pure" TensorFlow rather than just using Keras, and I felt that it would be very helpful if there were some sources where they have TensorFlow code and the equivalent Keras code side-by-side for comparison.
Unfortunately, most of the results I find on the Internet talk about performance-wise differences or have very simple comparison examples (e.g. "and so this is why Keras is much simpler to use"). I'm not so much interested in those details as much as I am in the code itself.
Does anybody know if there are any resources out there that could help with this?
Here you have two models, in Tensorflow and in Keras, that are correspondent:
import tensorflow as tf
import numpy as np
import pandas as pd
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
Tensorflow
X = tf.placeholder(dtype=tf.float64)
Y = tf.placeholder(dtype=tf.float64)
num_hidden=128
# Build a hidden layer
W_hidden = tf.Variable(np.random.randn(784, num_hidden))
b_hidden = tf.Variable(np.random.randn(num_hidden))
p_hidden = tf.nn.sigmoid( tf.add(tf.matmul(X, W_hidden), b_hidden) )
# Build another hidden layer
W_hidden2 = tf.Variable(np.random.randn(num_hidden, num_hidden))
b_hidden2 = tf.Variable(np.random.randn(num_hidden))
p_hidden2 = tf.nn.sigmoid( tf.add(tf.matmul(p_hidden, W_hidden2), b_hidden2) )
# Build the output layer
W_output = tf.Variable(np.random.randn(num_hidden, 10))
b_output = tf.Variable(np.random.randn(10))
p_output = tf.nn.softmax( tf.add(tf.matmul(p_hidden2, W_output), b_output) )
loss = tf.reduce_mean(tf.losses.mean_squared_error(
labels=Y,predictions=p_output))
accuracy=1-tf.sqrt(loss)
minimization_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
feed_dict = {
X: x_train.reshape(-1,784),
Y: pd.get_dummies(y_train)
}
with tf.Session() as session:
session.run(tf.global_variables_initializer())
for step in range(10000):
J_value = session.run(loss, feed_dict)
acc = session.run(accuracy, feed_dict)
if step % 100 == 0:
print("Step:", step, " Loss:", J_value," Accuracy:", acc)
session.run(minimization_op, feed_dict)
pred00 = session.run([p_output], feed_dict={X: x_test.reshape(-1,784)})
Keras
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from keras.models import Model
l = tf.keras.layers
model = tf.keras.Sequential([
l.Flatten(input_shape=(784,)),
l.Dense(128, activation='relu'),
l.Dense(128, activation='relu'),
l.Dense(10, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
model.summary()
model.fit(x_train.reshape(-1,784),pd.get_dummies(y_train),nb_epoch=15,batch_size=128,verbose=1)
You can take a look to this toy example, but it may be too simple.