Replicating results using Keras - tensorflow

I am trying to replicate results of my experiments using Tensorflow and Keras (with TF backend). When I use TF, I set random seeds for numpy and tensorflow graphs first right at the top of the script. I am not using any dropout layers or other methods that could introduce randomness (that I can think of).
When running such models, regardless of it's network size, always yields same results.
TF Experiment 1:
('Epoch ', 99, ' completed out of ', 100, ' loss: ', 289.8982433080673, 'accuracy: ', 0.6875)
TF Experiment 2:
('Epoch ', 99, ' completed out of ', 100, ' loss: ', 289.8982433080673, 'accuracy: ', 0.6875)
When I tried to replicate these results using Keras with same configurations, I failed. On top of that, each separate run yields different performance.
My TF code, which can replicate results, looks like this:
Snippet Reference: https://www.youtube.com/watch?v=BhpvH5DuVu8&list=PLQVvvaa0QuDfKTOs3Keq_kaG2P55YRn5v&index=46
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
## import system modules
#
import os
import sys
## import ML modules
#
import tensorflow as tf
import numpy as np
from keras.utils import to_categorical
from sklearn import preprocessing
logs_path = '../logs/'
## Default constants
#
NO_OF_CLASSES = 2
BATCH_SIZE = 32
FEAT_DIM = 26
N_nodes_hl1 = 300
N_nodes_hl2 = 30
N_nodes_hl3 = 30
## define the network architecture
#
## This model is a simple multilayer perceptron network with 3 hidden layers.
## Input to the layer has the dimensions equal to feature dimensions.
## We create a complete graph in this method with input placeholder as an input argument and
## output placeholder as an returning argument
#
def neural_network_model(data):
## defining dictionaries specifying the specification of each layer.
#
hidden_1_layer = {'weights': tf.Variable(tf.random_normal([FEAT_DIM, N_nodes_hl1]), name='w1'),\
'biases': tf.Variable(tf.random_normal([N_nodes_hl1]), name='b1')}
hidden_2_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl1, N_nodes_hl2]), name='w2'), \
'biases': tf.Variable(tf.random_normal([N_nodes_hl2]), name='b2')}
hidden_3_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl2, N_nodes_hl3]), name='w3'),\
'biases': tf.Variable(tf.random_normal([N_nodes_hl3]), name='b3')}
output_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl3, NO_OF_CLASSES]), name='w4'), \
'biases': tf.Variable(tf.random_normal([NO_OF_CLASSES]), name='b4')}
l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
l3 = tf.nn.relu(l3)
output = tf.add(tf.matmul(l3, output_layer['weights']), output_layer['biases'], name="last_layer")
## return the final layer's output gracefully
#
return output
## end of method
#
## This method trains a neural network along with collecting statistics related to
## the graphs.
#
def train_neural_network(xtrain, ytrain, odir):
learning_rate = 0.0008
epoch_iter = 100
## input/ output placeholders where data would be plugged in...
#
x = tf.placeholder('float', [None, FEAT_DIM], name="input")
y_ = tf.placeholder('float', name="output")
## define the network
#
logits = neural_network_model(x)
prediction = tf.nn.softmax(logits, name="op_to_restore") ## softmax normalizes the output results
loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = y_) )
## Major OP for the training procedure. The "train" op defined here tries to minimize loss
#
with tf.name_scope('ADAM'):
# Gradient Descent
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
with tf.name_scope('Accuracy'):
## Accuracy calculation by comparing the predicted and detected labels
#
acc = tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1))
acc = tf.reduce_mean(tf.cast(acc, tf.float32))
## summary and display variables
#
loss_sum = tf.summary.scalar("loss", loss)
acc_sum = tf.summary.scalar("accuracy", acc)
## Merge all summaries into a single variable. This summaries will be displayed using Tensorboard
#
merged_summary_op = tf.summary.merge([loss_sum, acc_sum])
## create a session for the graph (graph initialization)
#
with tf.Session() as sess:
## initialize all the variables. Note that before this point, all the variables were empty buckets !!
#
sess.run(tf.global_variables_initializer())
## initialize the summary writer (For tensorboard)
#
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
## iterate over epochs (complete forward-backward for the entire training set)
#
for epoch in range(epoch_iter):
## initialize some variables to keep track of progress during training
#
epoch_loss = 0
epoch_accuracy = 0
## minibatch training. Splitting input data in to smaller chunks is better
#
for i in range( int(len(xtrain)/ BATCH_SIZE) ):
epoch_x = xtrain[ i * BATCH_SIZE : i * BATCH_SIZE + BATCH_SIZE]
epoch_y = ytrain[ i * BATCH_SIZE : i * BATCH_SIZE + BATCH_SIZE]
## run the session and collect the intermediate stats. Feed dict kwarg takes in input/output placeholdar names as
## a key and features/labels as values
#
_, ac, ls, summary = sess.run([train, acc, loss, merged_summary_op], feed_dict = {x: epoch_x, y_: epoch_y})
## write the the summary in logs to visualize it later
#
summary_writer.add_summary(summary, epoch * int(len(xtrain)/BATCH_SIZE)+i)
## update stats
#
epoch_loss += ls
epoch_accuracy += ac
print ("Epoch ", epoch, " completed out of ", epoch_iter, " loss: ", epoch_loss, "accuracy: ", ac)
## saver module to save tf graph variables.. etc....
My Keras script to replicate results looks as follows:
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
## import system modules
#
import os
import sys
## import ML and datatype modules
#
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD
from keras.utils import to_categorical
from sklearn import preprocessing
## Default constants
#
NO_OF_CLASSES = 2
BATCH_SIZE = 32
FEAT_DIM = 26
N_nodes_hl1 = 300
N_nodes_hl2 = 30
N_nodes_hl3 = 30
## This method defines the NN architecture as well as performs training and saves the model info
#
def train_neural_network(xtrain, ytrain, odir):
learning_rate = 0.009
## Define the network (MLP)
#
model = Sequential()
model.add(Dense(N_nodes_hl1, input_dim=FEAT_DIM, activation="relu"))
model.add(Dense(N_nodes_hl2, activation="relu"))
model.add(Dense(N_nodes_hl3, activation="relu"))
model.add(Dense(NO_OF_CLASSES, activation="softmax"))
## optimizer
#
sgd = SGD(lr=learning_rate)
model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=['accuracy'])
print model.summary()
## train the model
model.fit(x=xtrain, y=ytrain, epochs=100)
Keras experiment1:
loss: 0.5964 - acc: 0.6725
Keras experiment2:
loss: 0.5974 - acc: 0.6712
The only difference between two scripts is the optimizer. I don't think that would introduce any randomness during training. Also, I believe that NN architecture should yield same results with the precision upto float64 on CPUs (and float32 on GPUs due to hardware capabilities).
What am I missing in my Keras script? Also, correct me if my understanding is wrong somewhere in this query.
Along with that, additional references (other than the following) on how to replicate NN results would be highly appreciated.
https://machinelearningmastery.com/reproducible-results-neural-networks-keras/
How to get stable results with TensorFlow, setting random seed
Getting reproducible results using tensorflow-gpu

Related

Keras/TensorFlow MNIST DCGAN: why does generator has almost zero loss from start?

I have constructed a DCGAN (deep convolutional generative adversarial network) inspired by this github repository. It is written in a more low level Tensorflow code that I tried transforming into Keras syntax instead.
Now, the network is quite heavy I think (around 4 million parameters), and I get this problem that during training that the generator network beats the discriminator network by a lot. I have not found any similar posts about this problem, since most of the time it is the discriminator that beats the generator (when in fact being fooled), or that we have mode collapse. So I am thinking there might be something wrong in the code (maybe the discriminator network is training when it shouldn't, or the loss function is wrong etc.). I have tried spotting the mistake but failed.
My code follows below:
from keras.models import Sequential
from keras.layers import Dense, Reshape, ReLU, LeakyReLU, BatchNormalization as BN#, tanh, sigmoid
from keras.layers.core import Activation, Flatten
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D, Conv2D, MaxPooling2D, Conv2DTranspose
from keras.optimizers import SGD, Adam
from keras.datasets import mnist
import time
import numpy as np
import math
from utils import load_mnist, load_lines, load_celebA
class dcgan(object):
def __init__(self, config):
"""
Args:
batch_size: The size of batch. Should be specified before training.
y_dim: (optional) Dimension of dim for y. [None]
z_dim: (optional) Dimension of dim for Z. [100]
gf_dim: (optional) Dimension of G filters in first conv layer. [64]
df_dim: (optional) Dimension of D filters in first conv layer. [64]
gfc_dim: (optional) Dimension of G units for for fully connected layer. [1024]
dfc_dim: (optional) Dimension of D units for fully connected layer. [1024]
c_dim: (optional) Dimension of image color. For grayscale input, set to 1. [3]
"""
self.build_model(config)
def build_model(self,config):
self.D = self.discriminator(config)
self.G = self.generator(config)
self.GAN = Sequential()
self.GAN.add(self.G)
self.D.trainable = False
self.GAN.add(self.D)
def discriminator(self,config):
input_shape = (config.x_h,config.x_w,config.x_d)
D = Sequential()
D.add(Conv2D(filters=config.df_dim,strides=2,padding='same',kernel_size=5,input_shape=input_shape))
D.add(LeakyReLU(alpha=0.2))
D.add(Conv2D(filters=config.df_dim*2,strides=2,padding='same',kernel_size=5))
D.add(BN(momentum=0.9,epsilon=1e-5))
D.add(LeakyReLU(alpha=0.2))
D.add(Conv2D(filters=config.df_dim*4,strides=2,padding='same',kernel_size=5))
D.add(BN(momentum=0.9,epsilon=1e-5))
D.add(LeakyReLU(alpha=0.2))
D.add(Conv2D(filters=config.df_dim*8,strides=2,padding='same',kernel_size=5))
D.add(BN(momentum=0.9,epsilon=1e-5))
D.add(LeakyReLU(alpha=0.2))
D.add(Flatten())
D.add(Dense(1))
D.add(Activation('sigmoid'))
print('D:')
D.summary()
return D
def generator(self,config):
G = Sequential()
G.add(Dense(input_dim=config.z_dim, units=config.gf_dim*8*4*4))
G.add(Reshape((4,4,config.gf_dim*8)))
G.add(BN(momentum=0.9,epsilon=1e-5))
G.add(ReLU())
G.add(Conv2DTranspose(filters=config.gf_dim*4,strides=2,padding='same',kernel_size=5))
G.add(BN(momentum=0.9,epsilon=1e-5))
G.add(ReLU())
G.add(Conv2DTranspose(filters=config.gf_dim*2,strides=2,padding='same',kernel_size=5))
G.add(BN(momentum=0.9,epsilon=1e-5))
G.add(ReLU())
if config.dataset not in ['mnist','lines']:
#more layers could (and should) be added in order to get correct output size of G
G.add(Conv2DTranspose(filters=config.gf_dim,strides=2,padding='same',kernel_size=5))
G.add(BN(momentum=0.9,epsilon=1e-5))
G.add(ReLU())
G.add(Conv2DTranspose(filters=config.c_dim,strides=2,padding='same',kernel_size=5))
G.add(Activation('tanh'))
print('G:')
G.summary()
return G
def train(self,config):
if config.dataset == 'mnist':
(X_train, y_train), (X_test, y_test) = load_mnist()
X_train = (X_train.astype(np.float32) - 127.5)/127.5
elif config.dataset == 'lines':
(X_train, y_train), (X_test, y_test) = load_lines()
elif config.dataset == 'celebA':
(X_train, y_train), (X_test, y_test) = load_celebA()
D_optim = Adam(learning_rate=config.learning_rate, beta_1=config.beta_1)
G_optim = Adam(learning_rate=config.learning_rate, beta_1=config.beta_1)
loss_f = 'binary_crossentropy'
#Compile models
self.D.compile(loss=loss_f,optimizer=D_optim)
self.D.trainable = True
self.G.compile(loss=loss_f,optimizer=G_optim)
self.GAN.compile(loss=loss_f,optimizer=G_optim)
batches = int(len(X_train)/config.batch_size) #int always rounds down --> no problem with running out of data
counter = 1
print('\n' * 1)
print('='*42)
print('-'*10,'Training initialized.','-'*10)
print('='*42)
print('\n' * 2)
start_time = time.time()
for epoch in range(config.epochs):
for batch in range(batches):
batch_X_real = X_train[int(batch*config.batch_size/2):int((batch+1)*config.batch_size/2)][np.newaxis].transpose(1,2,3,0)
batch_z = np.random.normal(0,1,size=(config.batch_size,config.z_dim))
batch_X_fake = self.G.predict(batch_z[0:int(config.batch_size/2)])
batch_X = np.concatenate((batch_X_real,batch_X_fake),axis=0)
batch_yd = np.concatenate((np.ones(int(config.batch_size/2)),np.zeros((int(config.batch_size/2)))))
batch_yg = np.ones((config.batch_size))
#maybe normalize values in X?
#Update D network
self.D.trainable = True
D_loss = self.D.train_on_batch(batch_X, batch_yd)
#Update G network
self.D.trainable = False
G_loss = self.GAN.train_on_batch(batch_z, batch_yg)
#Update G network again according to https://github.com/carpedm20/DCGAN-tensorflow.git
#G_loss = self.GAN.train_on_batch(batch_z, batch_yg)
# Ta tid på körningen
# print("[%8d Epoch:[%2d/%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \
# % (counter, epoch, config.epoch, idx, batch_idxs,
# time.time() - start_time, errD_fake+errD_real, errG))
#Save losses to vectors in order to plot
#Print status and save images for each config.sample_freq iterations
if np.mod(counter,config.sample_freq) == 0:
print('Epoch: {}/{} | Batch: {}/{} | D-loss {} | G-loss {} | Time: {}'.format(epoch+1,config.epochs,batch+1,batches,D_loss,G_loss,time.time() - start_time))
counter += 1
print('\n' * 2)
print('='*38)
print('-'*10,'Training complete.','-'*10)
print('='*38)
The program runs slow, but if you try running it using this chunk of code:
#import model
from setup import model_config
#create configuration object
config = model_config(dataset='mnist',loadmodel=False, interpolation=False,epochs=20,batch_size=64,
z_dim=100,gf_dim=64,df_dim=64,gfc_dim=1024,dfc_dim=1024,
c_dim=1,sample_freq=10) # >> model=None << ny parameter!
if config.loadmodel:
# Pass model to model parameter in config, vet inte hur man gör
# model1 = LoadModel('Generator')
# model2 = LoadModel('Discriminator')
# model3 = LoadModel('DG')
#load existing model
pass
else:
dcgan = dcgan(config)
dcgan.train(config)
if config.interpolation:
#do interpolation
pass
it will start printing out progress and losses. I am certain there is some obvious error somewhere! If I have missed something, let me know what I can add in order to make this a better post!

I am running out of 25gb ram on google colab

This is my code below I am making a multilabel classification model using 8000 x-ray images can someone here help me this is my code below most of the ram is used while loading the images itself and only 10 epochs are able to run.
Can someone tell me what changes do I need to make to this code for it to run and generate the model.
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import *
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import cv2
import os
# construct the argument parser and parse the arguments
# initialize the initial learning rate, number of epochs to train for,
# and batch size
INIT_LR = 1e-3
EPOCHS = 40
BS = 66
# grab the list of images in our dataset directory, then initialize
# the list of data (i.e., images) and class images
print("[INFO] loading images...")
imagePaths = list(paths.list_images('/content/drive/My Drive/testset/'))
data = []
labels = []
# loop over the image paths
for imagePath in imagePaths:
# extract the class label from the filename
label = imagePath.split(os.path.sep)[-2]
# load the image, swap color channels, and resize it to be a fixed
# 224x224 pixels while ignoring aspect ratio
image = cv2.imread(imagePath)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (224, 224))
# update the data and labels lists, respectively
data.append(image)
labels.append(label)
# convert the data and labels to NumPy arrays while scaling the pixel
# intensities to the range [0, 255]
data = np.array(data) / 255.0
labels = np.array(labels)
# perform one-hot encoding on the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,
test_size=0.20, stratify=labels, random_state=42)
# initialize the training data augmentation object
trainAug = ImageDataGenerator(
rotation_range=15,
fill_mode="nearest")
# load the VGG16 network, ensuring the head FC layer sets are left
# off
baseModel = VGG16(weights="imagenet", include_top=False,
input_tensor=Input(shape=(224, 224, 3)))
# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(4, 4))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(64, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(3, activation="softmax")(headModel)
# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)
# loop over all layers in the base model and freeze them so they will
# *not* be updated during the first training process
for layer in baseModel.layers:
layer.trainable = False
# compile our model
print("[INFO] compiling model...")
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
# train the head of the network
print("[INFO] training head...")
H = model.fit(
trainAug.flow(trainX, trainY, batch_size=BS),
steps_per_epoch=len(trainX) // BS,
validation_data=(testX, testY),
validation_steps=len(testX) // BS,
epochs=EPOCHS)
# make predictions on the testing set
print("[INFO] evaluating network...")
predIdxs = model.predict(testX, batch_size=BS)
# for each image in the testing set we need to find the index of the
# label with corresponding largest predicted probability
predIdxs = np.argmax(predIdxs, axis=1)
# show a nicely formatted classification report
print(classification_report(testY.argmax(axis=1), predIdxs,
target_names=lb.classes_))
# compute the confusion matrix and and use it to derive the raw
# accuracy, sensitivity, and specificity
cm = confusion_matrix(testY.argmax(axis=1), predIdxs)
total = sum(sum(cm))
acc = (cm[0, 0] + cm[1, 1]) / total
sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
specificity = cm[1, 1] / (cm[1, 0] + cm[1, 1])
# show the confusion matrix, accuracy, sensitivity, and specificity
print(cm)
print("acc: {:.4f}".format(acc))
print("sensitivity: {:.4f}".format(sensitivity))
print("specificity: {:.4f}".format(specificity))
# plot the training loss and accuracy
N = EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc",color='green')
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy on COVID-19 Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig('/content/drive/My Drive/setcovid/plot2.png')
# serialize the model to disk
print("[INFO] saving COVID-19 detector model...")
model.save('/content/drive/My Drive/setcovid/model2',save_format="h5" )
You can try to generate TFRecords from this data, store them into your drive and then feed them into your model through batches instead of directly loading them to memory. I would recommend the Hvass Laboratories YouTube channel, tensorflow tutorials playlist, tutorial number 18, TFRecords and Dataset API and the Dataset api tensorflow's official documentation.

Why am I getting "ValueError: No gradients provided for any variable: ['Variable:0']." error?

I'm extremely new to tensorflow, and I'm trying to build a style transfer model, I understand the concept of how the model is but am having difficulty at actually implementing it, since I don't fully understand what is going on in tensorflow, yet. When I try to run the optimization for the generated image I get the "No gradients provided" error, which I don't understand since my code has:
loss = total_loss(content_feats, style_feats, output_feats)
grad = tape.gradient(loss, output_processado)
optimizer.apply_gradients(zip([grad],[output_processado]))
ValueError Traceback (most recent call
last)
in ()
8
9 grad = tape.gradient(loss, output_processado)
---> 10 optimizer.apply_gradients(zip([grad],[output_processado]))
11
12 clip = tf.clip_by_value(output_processado, min_value, max_value)
1 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py in _filter_grads(grads_and_vars) 1217 if not filtered: 1218
raise ValueError("No gradients provided for any variable: %s." %
-> 1219 ([v.name for _, v in grads_and_vars],)) 1220 if vars_with_empty_grads: 1221 logging.warning(
ValueError: No gradients provided for any variable: ['Variable:0'].
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
import numpy as np
from PIL import Image
import requests
from io import BytesIO
from keras.applications.vgg19 import VGG19
from keras.applications.vgg19 import preprocess_input
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.models import Model
import keras.backend as K
from matplotlib import pyplot as plt
from numpy import expand_dims
from tensorflow import GradientTape
ITERATIONS = 10
CHANNELS = 3
IMAGE_SIZE = 500
IMAGE_WIDTH = IMAGE_SIZE
IMAGE_HEIGHT = IMAGE_SIZE
CONTENT_WEIGHT = 0.02
STYLE_WEIGHT = 4.5
MEAN = np.array([103.939, 116.779, 123.68])
CONTENT_LAYERS = ['block4_conv2']
STYLE_LAYERS = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
input_image_path = "input.png"
style_image_path = "style.png"
output_image_path = "output.png"
combined_image_path = "combined.png"
san_francisco_image_path = "https://www.economist.com/sites/default/files/images/print-edition/20180602_USP001_0.jpg"
tytus_image_path = "http://meetingbenches.com/wp-content/flagallery/tytus-brzozowski-polish-architect-and-watercolorist-a-fairy-tale-in-warsaw/tytus_brzozowski_13.jpg"
input_image = Image.open(BytesIO(requests.get(san_francisco_image_path).content))
input_image = input_image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
input_image.save(input_image_path)
#input_image
# Style visualization
style_image = Image.open(BytesIO(requests.get(tytus_image_path).content))
style_image = style_image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
style_image.save(style_image_path)
#style_image
def obter_modelo():
modelo = VGG19(include_top = False, weights = 'imagenet', input_tensor = None)
c_layer = CONTENT_LAYERS
s_layers = STYLE_LAYERS
output_layers = [modelo.get_layer(layer).output for layer in (c_layer + s_layers)]
return Model(modelo.inputs, output_layers)
def processar_imagem(img):
imagem = img.resize((IMAGE_HEIGHT, IMAGE_WIDTH))
imagem = img_to_array(imagem)
imagem = preprocess_input(imagem)
imagem = expand_dims(imagem, axis=0)
return imagem
def desprocessar_imagem(img):
imagem = img
mean = MEAN
imagem[..., 0] += mean[0]
imagem[..., 1] += mean[1]
imagem[..., 2] += mean[2]
imagem = imagem[..., ::-1]
return imagem.astype(int)
def content_loss(c_mat, out_mat):
return 0.5 * K.sum(K.square(out_mat - c_mat))
def matriz_gram(mat):
return K.dot(mat,K.transpose(mat))
def style_loss(s_mat, out_mat):
style_feat = K.batch_flatten(K.permute_dimensions(s_mat,(2,0,1)))
output_feat = K.batch_flatten(K.permute_dimensions(out_mat,(2,0,1)))
style_gram = matriz_gram(style_feat)
output_gram = matriz_gram(output_feat)
return K.sum(K.square(style_gram - output_gram)) / (4.0 * (CHANNELS ** 2) * (IMAGE_SIZE ** 2))
def total_loss(c_layer, s_layers, out_layers):
content_layer = c_layer[0]
out_content = out_layers[0]
style_layers = s_layers[1:]
out_style = out_layers[1:]
c_loss = content_loss(content_layer[0], out_content[0])
s_loss = None
for i in range(len(style_layers)):
if s_loss is None:
s_loss = style_loss(style_layers[i][0], out_style[i][0])
else:
s_loss += style_loss(style_layers[i][0], out_style[i][0])
return CONTENT_WEIGHT * c_loss + (STYLE_WEIGHT * s_loss)/len(style_layers)
modelo = obter_modelo()
#content image
content_processado = processar_imagem(input_image)
content_feats = modelo(K.variable(content_processado))
#style image
style_processado = processar_imagem(style_image)
style_feats = modelo(K.variable(style_processado))
#output image
output_processado = preprocess_input(np.random.uniform(0,250,(IMAGE_HEIGHT, IMAGE_WIDTH,CHANNELS)))
output_processado = expand_dims(output_processado, axis=0)
output_processado = K.variable(output_processado)
optimizer = tf.optimizers.Adam(5,beta_1=.99,epsilon=1e-3)
epochs=200
melhor_loss = K.variable(2000000.0)
melhor_imagem = None
min_value = MEAN
max_value = 255 + MEAN
loss = K.variable(0.0)
for e in range(epochs):
with tf.GradientTape() as tape:
tape.watch(output_processado)
output_feats = modelo(output_processado)
loss = total_loss(content_feats, style_feats, output_feats)
grad = tape.gradient(loss, output_processado)
optimizer.apply_gradients(zip([grad],[output_processado]))
clip = tf.clip_by_value(output_processado, min_value, max_value)
output_processado.assign(clip)
print("Epoch: " + str(e) )
For tape.gradient, you have to pass (loss, model.trainable_weights), but you are passing tape.gradient(loss, output_processado). Also for optimizer.apply_gradients, you have to pass (grad, model.trainable_variables), but you are passing (zip([grad],[output_processado]).
Calling a model inside a GradientTape scope enables you to retrieve the gradients of the trainable weights of the layer with respect to a loss value. Using an optimizer instance, you can use these gradients to update these variables (which you can retrieve using model.trainable_weights).
TensorFlow provides the tf.GradientTape API for automatic differentiation - computing the gradient of a computation with respect to its input variables. Tensorflow "records" all operations executed inside the context of a tf.GradientTape onto a "tape". Tensorflow then uses that tape and the gradients associated with each recorded operation to compute the gradients of a "recorded" computation using reverse mode differentiation.
If you want to process the gradients before applying them you can instead use the optimizer in three steps:
Compute the gradients with tf.GradientTape.
Process the gradients as you wish.
Apply the processed gradients with apply_gradients().
Here is a simple example for mnist data. The comments are present in the code to explain better.
Code-
import tensorflow as tf
print(tf.__version__)
from tensorflow import keras
from tensorflow.keras import layers
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Preprocess the data (these are Numpy arrays)
x_train = x_train.reshape(60000, 784).astype('float32') / 255
x_test = x_test.reshape(10000, 784).astype('float32') / 255
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')
# Reserve 10,000 samples for validation
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]
# Get the model.
inputs = keras.Input(shape=(784,), name='digits')
x = layers.Dense(64, activation='relu', name='dense_1')(inputs)
x = layers.Dense(64, activation='relu', name='dense_2')(x)
outputs = layers.Dense(10, name='predictions')(x)
model = keras.Model(inputs=inputs, outputs=outputs)
# Instantiate an optimizer.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Prepare the training dataset.
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
epochs = 3
for epoch in range(epochs):
print('Start of epoch %d' % (epoch,))
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
# Open a GradientTape to record the operations run
# during the forward pass, which enables autodifferentiation.
with tf.GradientTape() as tape:
# Run the forward pass of the layer.
# The operations that the layer applies
# to its inputs are going to be recorded
# on the GradientTape.
logits = model(x_batch_train, training=True) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = loss_fn(y_batch_train, logits)
# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)
# Run one step of gradient descent by updating
# the value of the variables to minimize the loss.
optimizer.apply_gradients(zip(grads, model.trainable_weights))
# Log every 200 batches.
if step % 200 == 0:
print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
print('Seen so far: %s samples' % ((step + 1) * 64))
Output -
2.2.0
Start of epoch 0
Training loss (for one batch) at step 0: 2.323657512664795
Seen so far: 64 samples
Training loss (for one batch) at step 200: 2.3156163692474365
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 2.2302279472351074
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 2.131979465484619
Seen so far: 38464 samples
Start of epoch 1
Training loss (for one batch) at step 0: 2.00234317779541
Seen so far: 64 samples
Training loss (for one batch) at step 200: 1.7992427349090576
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 1.8583933115005493
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 1.6005337238311768
Seen so far: 38464 samples
Start of epoch 2
Training loss (for one batch) at step 0: 1.6701987981796265
Seen so far: 64 samples
Training loss (for one batch) at step 200: 1.6237502098083496
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 1.3603084087371826
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 1.246948480606079
Seen so far: 38464 samples
You can find more about tf.GradientTape here. The example used here is taken from here.
Hope this answers your question. Happy Learning.

Odd problem with the Multivariate Input Multi-Step LSTM Time Series Forecasting Models

I have developed Multivariate Input Multi-Step LSTM Time Series Forecasting Models for my dataset according to the tutorial (https://machinelearningmastery.com/how-to-develop-lstm-models-for-multi-step-time-series-forecasting-of-household-power-consumption/).
Yet, I had a very odd problem, that is, when I run code with smaller samples (50 samples for training, 10 samples for testing), the predictions are correct. but when I run the experiment with full samples (4000 samples for training, 1000 samples for testing), the predictions contain NaN values, which lead to errors.
Then, when I try scaling plus relu activation functions plus regularization as following code, I can get predictions with full samples (4000 samples for training, 1000 samples for testing), but the predictions is still not correct, I want to forecast 96 steps, but all steps I predicted is the same number.
Can you give a useful suggestion to deal with the forecast accuracy issues?
import time
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
import csv
import numpy
from sklearn.preprocessing import MinMaxScaler
from numpy import save
from timeit import default_timer as timer
def scale(train, test):
# fit scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
train = train.astype(float)
test = test.astype(float)
scaler = scaler.fit(train)
# transform train
train = train.reshape(train.shape[0], train.shape[1])
train_scaled = scaler.transform(train)
# transform test
test = test.reshape(test.shape[0], test.shape[1])
test_scaled = scaler.transform(test)
return scaler, train_scaled, test_scaled
# split a univariate dataset into train/test sets
def split_dataset(data):
# split into standard weeks
train, test = data[0:387030, 10:26], data[387030:433881, 10:26]
# train, test = data[0:4850, 10:26], data[4850:5820, 10:26]
# train, test = data[0:387030], data[387029:433880]
# restructure into windows of weekly data
# numpy.savetxt("test.csv", data[387030:433881, :], delimiter=",")
# save('test.npy', data[387030:433881, :])
scaler, train_scaled, test_scaled = scale(train, test)
train_scaled = array(split(train_scaled, len(train_scaled) / 97))
test_scaled = array(split(test_scaled, len(test_scaled) / 97))
return scaler, train_scaled, test_scaled
# create a list of configs to try
def model_configs():
# define scope of configs
# n_input = [12]
n_nodes = [100, 200, 300]
n_epochs = [50, 100]
n_batch = [64]
# n_diff = [12]
# create configs
configs = list()
# for i in n_input:
for j in n_nodes:
for k in n_epochs:
for l in n_batch:
cfg = [j, k, l]
configs.append(cfg)
print('Total configs: %d' % len(configs))
return configs
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
scores = list()
# calculate an RMSE score for each day
for i in range(0, actual.shape[1], 97):
# for i in range():
# calculate mse
mse = mean_squared_error(actual[:, i, :], predicted[:, i, :])
# calculate rmse
rmse = sqrt(mse)
# store
scores.append(rmse)
# calculate overall RMSE
s = 0
for x in range(actual.shape[0]):
for y in range(actual.shape[1]):
for z in range(actual.shape[2]):
s += (actual[x, y, z] - predicted[x, y, z])**2
score = sqrt(s / (actual.shape[0] * actual.shape[1] * actual.shape[2]))
return score, scores
# convert history into inputs and outputs
def to_supervised(train, n_steps_in, n_steps_out=97, overlop=97):
# flatten data
sequences = train.reshape(
(train.shape[0] * train.shape[1], train.shape[2]))
X, y = list(), list()
for i in range(0, len(sequences), overlop):
end_ix = i + n_steps_in
out_end_ix = end_ix + n_steps_out
# check if we are beyond the dataset
if out_end_ix > len(sequences):
break
# gather input and output parts of the pattern
seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, :]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
# train the model
def build_model(train, n_input, config):
# unpack config
n_nodes, n_epochs, n_batch = config
# prepare data
train_x, train_y = to_supervised(train, n_input)
# define parameters
verbose, epochs, batch_size = 0, n_epochs, n_batch
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
# reshape output into [samples, timesteps, features]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], n_features))
# define model
model = Sequential()
model.add(
LSTM(
n_nodes,
activation='relu',
input_shape=(
n_timesteps,
n_features), recurrent_dropout=0.6))
model.add(RepeatVector(n_outputs))
model.add(LSTM(n_nodes, activation='relu', return_sequences=True, recurrent_dropout=0.6))
model.add(TimeDistributed(Dense(n_nodes, activation='relu')))
model.add(TimeDistributed(Dense(n_features)))
model.compile(loss='mse', optimizer='adam')
# fit network
model.fit(
train_x,
train_y,
epochs=epochs,
batch_size=batch_size,
verbose=verbose)
return model
# make a forecast
def forecast(model, history, n_input):
# flatten data
data = array(history)
data = data.reshape((data.shape[0] * data.shape[1], data.shape[2]))
# retrieve last observations for input data
input_x = data[-n_input:, :]
# reshape into [1, n_input, n]
input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
# forecast the next week
yhat = model.predict(input_x, verbose=0)
# we only want the vector forecast
yhat = yhat[0]
return yhat
# evaluate a single model
def evaluate_model(train, test, n_input, cfg):
start = timer()
# fit model
model = build_model(train, n_input, cfg)
# print("--- %s seconds ---" % (time.time() - start_time))
# history is a list of weekly data
history = [x for x in train]
# walk-forward validation over each week
predictions = list()
for i in range(len(test)):
# predict the week
yhat_sequence = forecast(model, history, n_input)
# store the predictions
predictions.append(yhat_sequence)
# get real observation and add to history for predicting the next week
history.append(test[i, :])
# evaluate predictions days for each week
predictions = array(predictions)
# invert scaling
predictions = predictions.reshape(
(predictions.shape[0] *
predictions.shape[1],
predictions.shape[2]))
predictions = scaler.inverse_transform(predictions)
test = test.reshape((test.shape[0] * test.shape[1], test.shape[2]))
test = scaler.inverse_transform(test)
predictions = array(split(predictions, len(predictions) / 97))
test = array(split(test, len(test) / 97))
score, scores = evaluate_forecasts(test, predictions)
run_time = timer() - start
return cfg[0], cfg[1], cfg[2], score, scores, run_time
# load the new file
dataset = read_csv(
'data_preproccess_5.csv',
header=0,
index_col=0)
# split into train and test
scaler, train_scaled, test_scaled = split_dataset(dataset.values)
# evaluate model and get scores
n_input = 7 * 97
# model configs
cfg_list = model_configs()
scores = [
evaluate_model(
train_scaled,
test_scaled,
n_input,
cfg) for cfg in cfg_list]
provide some sample data
sample data
If you have multistep output, you can easily reshape your predictions and calculate it.
My splitted datasets
`trainX, trainY, testX, testY`
Get Prediction Results
`trainPredict = model.predict(trainX)`
`testPredict = model.predict(testX)`
Reshape the Predictions and Real Values
`trainY = trainY.reshape(-1, )`
`trainPredict = trainPredict.reshape(-1, )`
`testY = testY.reshape(-1, )`
`testPredict = testPredict.reshape(-1, )`
Calculate root mean squared error
`print('Train Root mean squared error: {}'.format(math.sqrt(mean_squared_error(trainY, trainPredict))))`
`print('Test Root mean squared error: {}'.format(math.sqrt(mean_squared_error(testY, testPredict))))`

Tensor Flow Estimator Template based save and restore of models

I took the neural networks simple example from tensorflow github and have tried to split it into two parts. The first part is training+test, and the second part is separating out the test part which requires a restore. The restore seems to work, but it cannot find the predict function.
Here is the first part:
from __future__ import print_function
from tensorflow.python.saved_model import builder as saved_model_builder
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import shutil
matplotlib.use('TkAgg')
# Parameters
learning_rate = 0.1
num_steps = 1000
batch_size = 128
display_step = 100
# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)
#init = tf.initialize_all_variables()
sess = tf.Session()
# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
x={'images': mnist.train.images}, y=mnist.train.labels,
batch_size=batch_size, num_epochs=None, shuffle=True)
# Define the neural network
def neural_net(x_dict):
# TF Estimator input is a dict, in case of multiple inputs
x = x_dict['images']
# Hidden fully connected layer with 256 neurons
layer_1 = tf.layers.dense(x, n_hidden_1, name="layer_1")
# Hidden fully connected layer with 256 neurons
layer_2 = tf.layers.dense(layer_1, n_hidden_2, name="layer_2")
# Output fully connected layer with a neuron for each class
out_layer = tf.layers.dense(layer_2, num_classes, name="out_layer")
return out_layer
# Define the model function (following TF Estimator Template)
def model_fn(features, labels, mode):
# Build the neural network
logits = neural_net(features)
# Predictions
pred_classes = tf.argmax(logits, axis=1)
pred_probas = tf.nn.softmax(logits)
# If prediction mode, early return
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())
# Evaluate the accuracy of the model
acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
# TF Estimators requires to return a EstimatorSpec, that specify
# the different ops for training, evaluating, ...
estim_specs = tf.estimator.EstimatorSpec(
mode=mode,
predictions=pred_classes,
loss=loss_op,
train_op=train_op,
eval_metric_ops={'accuracy': acc_op})
return estim_specs
# Build the Estimator
model = tf.estimator.Estimator(model_fn)
# Train the Model
model.train(input_fn, steps=num_steps)
# Evaluate the Model
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
x={'images': mnist.test.images}, y=mnist.test.labels,
batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
model.evaluate(input_fn)
#model.export_savedmodel(".", input_fn)
init = tf.global_variables_initializer()
sess.run(init)
tf.add_to_collection("nn_model", model)
# Add ops to save and restore all the variables.
#saver = tf.train.Saver()
#save_path = saver.save(sess, "model/model.ckpt")
try:
shutil.rmtree("model")
except:
pass
builder = saved_model_builder.SavedModelBuilder("model")
builder.add_meta_graph_and_variables(sess, ["nn"])
builder.save()
print("Model saved in file")
# Predict single images
n_images = 4
# Get images from test set
test_images = mnist.test.images[:n_images]
# Prepare the input data
input_fn = tf.estimator.inputs.numpy_input_fn(
x={'images': test_images}, shuffle=False)
# Use the model to predict the images class
preds = list(model.predict(input_fn))
# Display
for i in range(n_images):
plt.imshow(np.reshape(test_images[i], [28, 28]), cmap='gray')
plt.show()
print("Model prediction:", preds[i])
The above program works fine. It saves the model, not sure correctly, as I see all the directories being created. Although it does give one warning:
WARNING:tensorflow:Error encountered when serializing nn_model.
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'Estimator' object has no attribute 'name'
Here is the "apply" program that restores and tries to apply and fails at the predict() line:
import tensorflow as tf
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)
sess=tf.Session()
#First let's load meta graph and restore weights
#saver = tf.train.import_meta_graph('model/model.ckpt.meta')
#saver.restore(sess,tf.train.latest_checkpoint('nn_model'))
tf.saved_model.loader.load(sess, ["nn"], "model")
model = tf.get_collection('nn_model')
# Predict single images
n_images = 4
# Get images from test set
test_images = mnist.test.images[:n_images]
# Prepare the input data
input_fn = tf.estimator.inputs.numpy_input_fn(
x={'images': test_images}, shuffle=False)
# Use the model to predict the images class
preds = list(model.predict(input_fn))
# Display
for i in range(n_images):
plt.imshow(np.reshape(test_images[i], [28, 28]), cmap='gray')
plt.show()
print("Model prediction:", preds[i])
The error it gives is:
Traceback (most recent call last):
File "applynn.py", line 35, in
preds = list(model.predict(input_fn))
AttributeError: 'module' object has no attribute 'predict'
So what is missing here?
So this problem is now fixed. Here is what I had to do to fix this.
The first part is:
from __future__ import print_function
from tensorflow.python.saved_model import builder as saved_model_builder
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import shutil
matplotlib.use('TkAgg')
# Parameters
learning_rate = 0.1
num_steps = 1000
batch_size = 128
display_step = 100
# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)
#init = tf.initialize_all_variables()
sess = tf.Session()
# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
x={'images': mnist.train.images}, y=mnist.train.labels,
batch_size=batch_size, num_epochs=None, shuffle=True)
# Define the neural network
def neural_net(x_dict):
# TF Estimator input is a dict, in case of multiple inputs
x = x_dict['images']
# Hidden fully connected layer with 256 neurons
layer_1 = tf.layers.dense(x, n_hidden_1, name="layer_1")
# Hidden fully connected layer with 256 neurons
layer_2 = tf.layers.dense(layer_1, n_hidden_2, name="layer_2")
# Output fully connected layer with a neuron for each class
out_layer = tf.layers.dense(layer_2, num_classes, name="out_layer")
return out_layer
# Define the model function (following TF Estimator Template)
def model_fn(features, labels, mode):
# Build the neural network
logits = neural_net(features)
# Predictions
pred_classes = tf.argmax(logits, axis=1)
pred_probas = tf.nn.softmax(logits)
# If prediction mode, early return
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())
# Evaluate the accuracy of the model
acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
# TF Estimators requires to return a EstimatorSpec, that specify
# the different ops for training, evaluating, ...
estim_specs = tf.estimator.EstimatorSpec(
mode=mode,
predictions=pred_classes,
loss=loss_op,
train_op=train_op,
eval_metric_ops={'accuracy': acc_op})
return estim_specs
# Build the Estimator
estimator = tf.estimator.Estimator(model_fn, model_dir='estimator')
# Train the Model
estimator.train(input_fn, steps=num_steps)
# Evaluate the Model
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
x={'images': mnist.test.images}, y=mnist.test.labels,
batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
estimator.evaluate(input_fn)
#model.export_savedmodel(".", input_fn)
init = tf.global_variables_initializer()
sess.run(init)
tf.add_to_collection("nn_model", estimator)
# Add ops to save and restore all the variables.
#saver = tf.train.Saver()
#save_path = saver.save(sess, "model/model.ckpt")
try:
shutil.rmtree("model")
except:
pass
builder = saved_model_builder.SavedModelBuilder("model")
builder.add_meta_graph_and_variables(sess, ["nn"])
builder.save()
print("Model saved in file")
# Predict single images
n_images = 4
# Get images from test set
test_images = mnist.test.images[:n_images]
# Prepare the input data
input_fn = tf.estimator.inputs.numpy_input_fn(
x={'images': test_images}, shuffle=False)
# Use the model to predict the images class
preds = list(estimator.predict(input_fn))
# Display
for i in range(n_images):
plt.imshow(np.reshape(test_images[i], [28, 28]), cmap='gray')
plt.show()
print("Model prediction:", preds[i])
The second part is:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)
# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_classes = 10 # MNIST total classes (0-9 digits)
# Define the neural network
def neural_net(x_dict):
# TF Estimator input is a dict, in case of multiple inputs
x = x_dict['images']
# Hidden fully connected layer with 256 neurons
layer_1 = tf.layers.dense(x, n_hidden_1, name="layer_1")
# Hidden fully connected layer with 256 neurons
layer_2 = tf.layers.dense(layer_1, n_hidden_2, name="layer_2")
# Output fully connected layer with a neuron for each class
out_layer = tf.layers.dense(layer_2, num_classes, name="out_layer")
return out_layer
# Define the model function (following TF Estimator Template)
def model_fn(features, labels, mode):
# Build the neural network
logits = neural_net(features)
# Predictions
pred_classes = tf.argmax(logits, axis=1)
pred_probas = tf.nn.softmax(logits)
# If prediction mode, early return
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())
# Evaluate the accuracy of the model
acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
# TF Estimators requires to return a EstimatorSpec, that specify
# the different ops for training, evaluating, ...
estim_specs = tf.estimator.EstimatorSpec(
mode=mode,
predictions=pred_classes,
loss=loss_op,
train_op=train_op,
eval_metric_ops={'accuracy': acc_op})
return estim_specs
sess=tf.Session()
estimator = tf.estimator.Estimator(model_fn, model_dir='estimator')
# Predict single images
n_images = 4
# Get images from test set
test_images = mnist.test.images[:n_images]
# Prepare the input data
input_fn = tf.estimator.inputs.numpy_input_fn(
x={'images': test_images}, shuffle=False)
# Use the model to predict the images class
preds = list(estimator.predict(input_fn))
# Display
for i in range(n_images):
plt.imshow(np.reshape(test_images[i], [28, 28]), cmap='gray')
plt.show()
print("Model prediction:", preds[i])
Note that I have called the model variable as estimator, since it really is the estimator. Also, I am passing a model_dir so serialize the estimator separately from the other variables. I also had to explicitly make sure the second python file has access to the two functions and any variables they depend upon. A couple of minor other fixes were made in the code.