I wrote simple Sin function predictors using Keras and Tensorflow with LSTM, but found the performance of Keras code is much slower which runs about 5 min while Tensorflow code runs the model just in 20 seconds. Moreover, the Keras prediction performance is less precide as Keras one. Could anyone help me find the code difference between the 2 model?
I hacked the code online and intend to train the model with the same hyper parameters. But the performance is not as expected. Tried searching many materials online, but found no reasons.
Keras Code:
import numpy as np
import os
import sys
import time
from tqdm._tqdm_notebook import tqdm_notebook
import pickle
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from keras import optimizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
stime = time.time()
BATCH_SIZE = 20
TIME_STEPS = 10
LN = 410
DIFF = 2
OUTPUT_PATH = '/Users/xiachang/Documents/RNN/test_outputs'
SCALER_COL_IDX = 0
params = {
"batch_size": BATCH_SIZE, # 20<16<10, 25 was a bust
"epochs": 500,
"lr": 0.00010000,
"time_steps": TIME_STEPS
}
TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
HIDDEN_UNITS = 20
# data = np.array([[i * (DIFF)] for i in range(LN)])
#
# min_max_scaler = MinMaxScaler()
# data = min_max_scaler.fit_transform(data)
def generate_data(seq):
X = []
y = []
for i in range(len(seq) - TIME_STEPS):
X.append([[e] for e in seq[i: i + TIME_STEPS]])
y.append([seq[i + TIME_STEPS]])
return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
test_start = (TRAINING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
0, test_start, TRAINING_EXAMPLES + TIME_STEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
test_start, test_end, TESTING_EXAMPLES + TIME_STEPS, dtype=np.float32)))
x_val, x_test = np.split(test_X, 2)
y_val, y_test = np.split(test_y, 2)
def print_time(text, stime):
seconds = (time.time()-stime)
print(text, seconds//60,"minutes : ",np.round(seconds%60),"seconds")
def create_model():
lstm_model = Sequential()
lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
lstm_model.add(LSTM(HIDDEN_UNITS))
lstm_model.add(Dense(1, activation=None))
lstm_model.compile(loss='mean_squared_error', optimizer=optimizers.Adagrad(lr=0.1))
return lstm_model
model = create_model()
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=30, min_delta=0.0001)
mcp = ModelCheckpoint(os.path.join(OUTPUT_PATH,
"best_model.h5"), monitor='val_loss', verbose=1,
save_best_only=True, save_weights_only=False, mode='min', period=1)
# Not used here. But leaving it here as a reminder for future
r_lr_plat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=30,
verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
csv_logger = CSVLogger(os.path.join(OUTPUT_PATH, 'training_log_' + time.ctime().replace(" ","_") + '.log'), append=True)
history = model.fit(train_X, train_y, epochs=params["epochs"], verbose=2, batch_size=BATCH_SIZE,
shuffle=False, validation_data=(x_val, y_val), callbacks=[es, mcp, csv_logger])
print("saving model...")
pickle.dump(model, open("test_outputs/lstm_model", "wb"))
# Visualize the training data
from matplotlib import pyplot as plt
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
#plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'train_vis_BS_'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))
# load the saved best model from above
saved_model = load_model(os.path.join(OUTPUT_PATH, 'best_model.h5')) # , "lstm_best_7-3-19_12AM",
print(saved_model)
y_pred = saved_model.predict(x_test, batch_size=BATCH_SIZE)
y_pred = y_pred.flatten()
y_test_t = y_test
error = mean_squared_error(y_test_t, y_pred)
print("Error is", error, y_pred.shape, y_test_t.shape)
print(y_pred[0:15])
print(y_test_t[0:15])
y_pred_org = y_pred
y_test_t_org = y_test_t
print(y_pred_org[0:15])
print(y_test_t_org[0:15])
# Visualize the prediction
from matplotlib import pyplot as plt
plt.figure()
plt.plot(y_pred_org)
plt.plot(y_test_t_org)
plt.title('Prediction vs Real Value')
plt.ylabel('Y')
plt.xlabel('X')
plt.legend(['Prediction', 'Real'], loc='upper left')
# plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'pred_vs_real_BS'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))
print_time("program completed ", stime)
Tensorflow code:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
NUM_EPOCH = 1000
HIDDEN_SIZE = 30
NUM_LAYERS = 2
TIMESTEPS = 10
TRAINING_STEPS = 10000
BATCH_SIZE = 20
TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
def generate_data(seq):
X = []
y = []
for i in range(len(seq) - TIMESTEPS):
X.append([seq[i: i + TIMESTEPS]])
y.append([seq[i + TIMESTEPS]])
return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
def lstm_model(X, y, is_training):
cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.LSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)])
outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
output = outputs[:, -1, :]
predictions = tf.contrib.layers.fully_connected(output, 1, activation_fn=None)
if not is_training:
return predictions, None, None
loss = tf.losses.mean_squared_error(labels=y, predictions=predictions)
train_op = tf.contrib.layers.optimize_loss(
loss, tf.train.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
return predictions, loss, train_op
def train(sess, train_X, train_Y):
ds = tf.data.Dataset.from_tensor_slices((train_X, train_Y))
ds = ds.repeat().shuffle(1000).batch(BATCH_SIZE)
X, y = ds.make_one_shot_iterator().get_next()
losses = np.array([])
with tf.variable_scope('model'):
predictions, loss, train_op = lstm_model(X, y, True)
sess.run(tf.global_variables_initializer())
for i in range(TRAINING_STEPS):
_, l = sess.run([train_op, loss])
losses = np.append(losses, l)
if i % NUM_EPOCH == 0:
print('train step: ' + str(i) + ', loss: ' + str(l))
plt.figure()
plt.plot(losses, label='loss')
plt.legend()
# plt.show()
plt.savefig('./test_outputs/loss.png')
def run_eval(sess, test_X, test_y):
ds = tf.data.Dataset.from_tensor_slices((test_X, test_y))
ds = ds.batch(1)
X, y = ds.make_one_shot_iterator().get_next()
with tf.variable_scope('model', reuse=True):
prediction, _, _ = lstm_model(X, [0, 0], False)
predictions = []
labels = []
for i in range(int(TESTING_EXAMPLES / 2)):
p, l = sess.run([prediction, y])
predictions.append(p)
labels.append(l)
predictions = np.array(predictions).squeeze()
labels = np.array(labels).squeeze()
rmse = np.sqrt(((predictions - labels) ** 2).mean(axis=0))
print('Mean Square Error is: %f' % rmse)
plt.figure()
print(predictions[:15])
print(labels[:15])
plt.plot(predictions, label='predictions')
plt.plot(labels, label='real_val')
plt.legend()
# plt.show()
plt.savefig('./test_outputs/test.png')
test_start = (TRAINING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
0, test_start, TRAINING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
test_start, test_end, TESTING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
x_val, test_X = np.split(test_X, 2)
y_val, test_y = np.split(test_y, 2)
with tf.Session() as sess:
train(sess, train_X, train_y)
run_eval(sess, test_X, test_y)
You maybe should try to use CuDNNLSTM instead of LSTM. They are CUDA accelerated.
Fast LSTM implementation with CuDNN.
See here: https://github.com/keras-team/keras/blob/master/keras/layers/cudnn_recurrent.py#L328
Your model structure is not same, first has 3 layers of LSTM, other has 2.
Tensorflow data API is highly optimized, It preparing the data-set, without wasting any resources.
Note that: You can even more accelerate the training in tensorflow using parallelization in dynamic_rnn cell. check out this.
Related
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score,\
accuracy_score, balanced_accuracy_score,classification_report,\
plot_confusion_matrix, confusion_matrix
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, Concatenate
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D, LeakyReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import RandomNormal
import tensorflow.keras.backend as K
from sklearn.utils import shuffle
import pickle
from tqdm import tqdm
import numpy as np
from scipy import stats
import pandas as pd
np.random.seed(1635848)
def get_data_XYZ_one_dimensional(n, a=-2, c=1/2, random_state=None, verbose=True):
"""
Generates pseudo-random data distributed according to the distribution defined in section 2.1 of the document
"Math/Confounders and data generation.pdf".
:param n: Number of data points to generate.
:param a: Mean of X.
:param c: Shape parameter for Weibull distribution.
:param random_state: Used to set the seed of numpy.random before generation of random numbers.
:param verbose: If True will display a progress bar. If False it will not display a progress bar.
:return: Pandas DataFrame with three columns (corresponding to X, Y and Z) and n rows (corresponding to the n
generated pseudo-random samples).
"""
np.random.seed(random_state)
output = []
iterator = tqdm(range(n)) if verbose else range(n)
for _ in iterator:
X = stats.norm.rvs(loc=-2, scale=1)
Y = stats.bernoulli.rvs(p=1/(1+np.exp(-X)))
if Y == 0:
Z = stats.expon.rvs(scale=np.exp(-X)) # note: np.exp(-X) could be cached for more computational efficiency but would render the code less useful
elif Y == 1:
Z = stats.weibull_min.rvs(c=c, scale=np.exp(-X))
else:
assert False
output.append((X, Y, Z))
return pd.DataFrame(output, columns=["Personal information", "Treatment", "Time to event"])
data = get_data_XYZ_one_dimensional(n=100, random_state=0)
print(data)
# The Architecture of CGAN
class cGAN():
"""
Class containing 3 methods (and __init__): generator, discriminator and train.
Generator is trained using random noise and label as inputs. Discriminator is trained
using real/fake samples and labels as inputs.
"""
def __init__(self,latent_dim=100, out_shape=3):
self.latent_dim = latent_dim
self.out_shape = out_shape
self.num_classes = 2
# using Adam as our optimizer
optimizer = Adam(0.0002, 0.5)
# building the discriminator
self.discriminator = self.discriminator()
self.discriminator.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
# building the generator
self.generator = self.generator()
noise = Input(shape=(self.latent_dim,))
label = Input(shape=(1,))
gen_samples = self.generator([noise, label])
# we don't train discriminator when training generator
self.discriminator.trainable = False
valid = self.discriminator([gen_samples, label])
# combining both models
self.combined = Model([noise, label], valid)
self.combined.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
def generator(self):
init = RandomNormal(mean=0.0, stddev=0.02)
model = Sequential()
model.add(Dense(128, input_dim=self.latent_dim))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(256))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(512))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(self.out_shape, activation='tanh'))
noise = Input(shape=(self.latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label))
model_input = multiply([noise, label_embedding])
gen_sample = model(model_input)
model.summary()
return Model([noise, label], gen_sample, name="Generator")
def discriminator(self):
init = RandomNormal(mean=0.0, stddev=0.02)
model = Sequential()
model.add(Dense(512, input_dim=self.out_shape, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(256, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.4))
model.add(Dense(128, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))
gen_sample = Input(shape=(self.out_shape,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(self.num_classes, self.out_shape)(label))
model_input = multiply([gen_sample, label_embedding])
validity = model(model_input)
model.summary()
return Model(inputs=[gen_sample, label], outputs=validity, name="Discriminator")
def train(self, X_train, y_train, pos_index, neg_index, epochs, sampling=False, batch_size=32, sample_interval=100, plot=True):
# though not recommended, defining losses as global helps as in analysing our cgan out of the class
global G_losses
global D_losses
G_losses = []
D_losses = []
# Adversarial ground truths
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
# if sampling==True --> train discriminator with 8 sample from positive class and rest with negative class
if sampling:
idx1 = np.random.choice(pos_index, 3)
idx0 = np.random.choice(neg_index, batch_size-3)
idx = np.concatenate((idx1, idx0))
# if sampling!=True --> train discriminator using random instances in batches of 32
else:
idx = np.random.choice(len(y_train), batch_size)
samples, labels = X_train[idx], y_train[idx]
samples, labels = shuffle(samples, labels)
# Sample noise as generator input
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
gen_samples = self.generator.predict([noise, labels])
# label smoothing
if epoch < epochs//1.5:
valid_smooth = (valid+0.1)-(np.random.random(valid.shape)*0.1)
fake_smooth = (fake-0.1)+(np.random.random(fake.shape)*0.1)
else:
valid_smooth = valid
fake_smooth = fake
# Train the discriminator
self.discriminator.trainable = True
d_loss_real = self.discriminator.train_on_batch([samples, labels], valid_smooth)
d_loss_fake = self.discriminator.train_on_batch([gen_samples, labels], fake_smooth)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train Generator
self.discriminator.trainable = False
sampled_labels = np.random.randint(0, 2, batch_size).reshape(-1, 1)
# Train the generator
g_loss = self.combined.train_on_batch([noise, sampled_labels], valid)
if (epoch+1)%sample_interval==0:
print('[%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f'
% (epoch, epochs, d_loss[0], g_loss[0]))
G_losses.append(g_loss[0])
D_losses.append(d_loss[0])
if plot:
if epoch+1==epochs:
plt.figure(figsize=(10,5))
plt.title("Generator and Discriminator Loss")
plt.plot(G_losses,label="G")
plt.plot(D_losses,label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()
data.Treatment.value_counts()
scaler = StandardScaler()
X = scaler.fit_transform(data.drop('Treatment', 1))
y = data['Treatment'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
lgb_1 = lgb.LGBMClassifier()
lgb_1.fit(X_train, y_train)
y_pred = lgb_1.predict(X_test)
# evaluation
print(classification_report(y_test, y_pred))
plot_confusion_matrix(lgb_1, X_test, y_test)
plt.show()
le = preprocessing.LabelEncoder()
for i in ['Personal information', 'Treatment', 'Time to event']:
data[i] = le.fit_transform(data[i].astype(str))
y_train = y_train.reshape(-1,1)
pos_index = np.where(y_train==1)[0]
neg_index = np.where(y_train==0)[0]
cgan.train(X_train, y_train, pos_index, neg_index, epochs=500)
Here, the training gives an error ValueError: Input 0 of layer "Discriminator" is incompatible with the layer: expected shape=(None, 3), found shape=(100, 2). Well I understand I have to fix the shape by changing the input but where and how to do it.
Also there are 3 columns in data so how to go about making this work?
I think the fix out_shape=2 and not 3 because the generated output has 2 and you stated the number of classes to be 2 as well. Unless there is something else I am missing.
def __init__(self, latent_dim=100, out_shape=2):
When I am using custom loss function with batch gradient descent. I am getting error give in photos below after first epoch.
The code runs fine with binaryCrossEntropy.
I am getting the error below:
optimizer.apply_gradients(zip(grads, model_2.trainable_weights))
No gradients provided for any variable: (['dense_22/kernel:0', 'dense_22/bias:0', 'dense_23/kernel:0', 'dense_23/bias:0', 'dense_24/kernel:0', 'dense_24/bias:0'],).
The code:
# importing necessary libraries and functions
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import InputLayer, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.applications.densenet import DenseNet121, preprocess_input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import Mean, CategoricalAccuracy
import matplotlib.pyplot as plt
import keras.backend as K
import pandas as pd
import tensorflow_datasets as tfds
from collections import deque
from sklearn.model_selection import train_test_split #train test split
from sklearn.model_selection import StratifiedKFold #Stratifying the data (for test train split)
from sklearn.preprocessing import MinMaxScaler #data normalization with sklearn
import matplotlib.pyplot as plt
import math
lambda_par = tf.Variable(0.5)
def fairnessLoss(y_true,y_pred):
print("HI",y_true,y_pred)
cse_min = cse_maj = tf.Variable(0.0)
n_min = n_maj = tf.Variable(0.0)
print(y_pred.shape[0])
for i in range(y_pred.shape[0]):
print(i)
if(y_true[i][0]==1):
cse_min.assign_add(tf.math.log(y_pred[i][0]))
n_min.assign_add(1.0)
else:
cse_maj.assign_add(tf.math.log(1-y_pred[i][0]))
n_maj.assign_add(1.0)
print("First step")
tem1 = tf.divide(cse_min,n_min)
tem2 = tf.divide(cse_maj,n_maj)
fe = tf.Variable(tem1)
fe.assign_add(-tem2)
fe = tf.math.multiply(fe,fe)
ans = tf.Variable(0.0)
ans.assign_add(cse_min)
ans.assign_add(cse_maj)
ans.assign_add(tf.math.multiply(lambda_par,fe))
return ans
model = tf.keras.Sequential([
tf.keras.layers.Dense(8, activation=tf.keras.activations.sigmoid), # hidden layer 1, ReLU activation
tf.keras.layers.Dense(8, activation=tf.keras.activations.sigmoid),
tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid)
])
batch_size=len(train_X)
train_yy = []
for i in range(len(train_y)):
train_yy.append([train_y[i]])
train_dataset = tf.data.Dataset.from_tensor_slices((train_X, train_yy))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
# # Prepare the validation dataset.
# val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
# val_dataset = val_dataset.batch(batch_size)
train_acc_metric = keras.metrics.BinaryAccuracy()
val_acc_metric = keras.metrics.BinaryAccuracy()
epochs = 500
# Instantiate an optimizer to train the model.
optimizer = keras.optimizers.Adam()
# Instantiate a loss function.
loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)
# storing variables to plot loss and accuracy
losses = []
accuracy = []
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
epoch_loss_avg = Mean()
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):[
# Open a GradientTape to record the operations run
# during the forward pass, which enables auto-differentiation.
with tf.GradientTape() as tape:
# Run the forward pass of the layer.
# The operations that the layer applies
# to its inputs are going to be recorded
# on the GradientTape.
logits = model(x_batch_train, training=True) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = fairnessLoss(y_batch_train, logits)
# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)
# Run one step of gradient descent by updating
# the value of the variables to minimize the loss.
optimizer.apply_gradients(zip(grads, model.trainable_weights))
epoch_loss_avg.update_state(loss_value)
train_acc_metric.update_state(y_batch_train, logits)
losses.append(epoch_loss_avg.result())
accuracy.append(train_acc_metric.result())
# Log every 200 batches.
if step % 200 == 0:
print(
"Training loss (for one batch) at step %d: %.4f"
% (step, float(loss_value))
)
print("Seen so far: %s samples" % ((step + 1) * batch_size))
print(train_acc_metric.result())
train_acc_metric.reset_states()
Photo of the error-1
Photo of the error-2
Loads and Optimizers are dual parallel in the statistics, accelerate to hear the Optimizers or vary their rates to see the true.
Sample: Gradient Tape when applying value to tf.variables, loss functions is what change or apply, and measurement from you provided logics but optimizers are how you achieved it or setting to goals.
Dataset: Image categories problem, image, and labels for categories.
Index Image Label
1 F:\datasets\downloads\Actors\train\Candidt Kibt\01.tif 0
2 F:\datasets\downloads\Actors\train\Candidt Kibt\02.tif 0
19 F:\datasets\downloads\Actors\train\Pikaploy\01.tif 1
Codes: For test Tape and Gradients only
import os
from os.path import exists
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
num_iter = 1000
train_generator_batch_size = 1
batch_size = 1
WIDTH = 256
HEIGHT = 256
CHANNEL = 3
checkpoint_path = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\TF_DataSets_01.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
if not exists(checkpoint_dir) :
os.mkdir(checkpoint_dir)
print("Create directory: " + checkpoint_dir)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Definition / Class
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def create_image_generator( ):
variables = pd.read_excel('F:\\temp\\Python\\excel\\Book 7.xlsx', index_col=None, header=[0], dtype=str)
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
validation_split=0.2,
)
train_image_ds = train_generator.flow_from_dataframe(
dataframe = variables,
directory=None,
x_col= 'Image',
y_col= 'Label',
weight_col=None,
target_size=( WIDTH, HEIGHT ),
color_mode='rgb',
classes=None,
class_mode='categorical', ####
batch_size=train_generator_batch_size,
shuffle=True,
seed=None,
save_to_dir=None,
save_prefix='',
save_format='png',
subset=None,
interpolation='nearest',
validate_filenames=True,
)
return train_image_ds
class gradient_tape_optimizer( ):
def __init__ ( self, model, num_iter, content_iter, batch_size ):
self.num_iter = num_iter
self.content_iter = content_iter
self.style_iter = content_iter
self.batch_size = batch_size
self.model = model
self.loss = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=False,
reduction=tf.keras.losses.Reduction.AUTO,
name='sparse_categorical_crossentropy' )
self.optimizer = tf.keras.optimizers.Nadam( learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name='Nadam' )
def _compute_mean_std( self, feats : tf.Tensor, eps=1e-8 ):
"""
feats: Features should be in shape N x H x W x C
"""
mean = tf.math.reduce_mean(feats, axis=[1,2], keepdims=True)
std = tf.math.reduce_std(feats, axis=[1,2], keepdims=True) + eps
return mean, std
def criterion( self, stylized_img : tf.Tensor, style_img : tf.Tensor, t : tf.Tensor ):
stylized_content_feats = self.model.encode(stylized_img)
stylized_feats = self.model.encode(stylized_img, return_all=True)
style_feats = self.model.encode(style_img, return_all=True)
content_loss = self.mse_loss(t, stylized_content_feats)
style_loss = 0
for f1, f2 in zip(stylized_feats, style_feats):
m1, s1 = self._compute_mean_std(f1)
m2, s2 = self._compute_mean_std(f2)
style_loss += self.mse_loss(m1, m2) + self.mse_loss(s1, s2)
return content_loss + self.style_weight * style_loss
def train( self ):
step = 0
while step < self.num_iter:
content_batch = self.content_iter.get_next()
if content_batch[0].shape[1] != self.batch_size:
content_batch = self.content_iter.get_next()
style_batch = self.style_iter.get_next()
if style_batch[0].shape[1] != self.batch_size:
style_batch = self.style_iter.get_next()
current_label = tf.constant( content_batch[1], shape=( 2, 1 ) ).numpy()
loss_value = tf.Variable( 10.0 )
with tf.GradientTape() as tape:
result = self.model( inputs=tf.constant( content_batch[0], shape=( 1, WIDTH, HEIGHT, CHANNEL ) ) )
result = tf.constant( result, shape=( 2, 1 ) )
predict_label = tf.Variable( tf.constant( self.model.trainable_weights[len(self.model.trainable_weights) - 1], shape=( 2, 1 ) ) )
loss_value = self.loss( result.numpy(), current_label )
loss_value = tf.Variable( tf.constant( loss_value, shape=( 1, ) ).numpy() )
tape.watch( loss_value )
gradients = tape.gradient( loss_value, loss_value )
self.optimizer.apply_gradients(zip(gradients, self.model.trainable_weights))
# log and save every 200 batches
if step % 200 == 0:
if result[tf.math.argmax(result).numpy()[0]][0] > 0 :
print(f'Training loss (for one batch) at step {step}: {self.loss} value {result[tf.math.argmax(result).numpy()[0]]}')
else :
print(f'Training loss (for one batch) at step {step}: {self.loss} value {result[abs( 1 - tf.math.argmax(result).numpy()[0]) ]}')
print(f'Seen so far: {(step+1)*self.batch_size} samples')
self.model.save_weights(checkpoint_path)
step += 1
print("Finished training...")
self.model.save_weights(checkpoint_path)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Dataset
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
variables = pd.read_excel('F:\\temp\\Python\\excel\\Book 7.xlsx', index_col=None, header=[0], dtype=str)
train_image_ds = tf.data.Dataset.from_generator(
create_image_generator,
output_types=None,
output_shapes=None,
args=None,
output_signature=(
tf.TensorSpec(shape=( 1, WIDTH, HEIGHT, CHANNEL ), dtype=tf.float32, name=None), tf.TensorSpec(shape=(1, 2), dtype=tf.float32, name=None),
),
name='train_image_ds'
)
train_image_ds = train_image_ds.batch( 1 )
iterator = iter( train_image_ds )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( WIDTH, HEIGHT, CHANNEL )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((128, 127 * 127)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(2),
])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=0.0000001,
name='Nadam'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=False,
reduction=tf.keras.losses.Reduction.AUTO,
name='sparse_categorical_crossentropy'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
gradient_tape_optimizer = gradient_tape_optimizer( model, num_iter, iterator, batch_size )
result = gradient_tape_optimizer.train()
input( '...' )
Result: Loss is change less that is because custom Optimizers is simply algorithms.
2022-10-15 14:23:57.141863: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
Training loss (for one batch) at step 0: <keras.losses.SparseCategoricalCrossentropy object at 0x00000238B5054550> value [0.06285592]
Seen so far: 1 samples
Training loss (for one batch) at step 200: <keras.losses.SparseCategoricalCrossentropy object at 0x00000238B5054550> value [0.05492945]
Seen so far: 201 samples
Training loss (for one batch) at step 400: <keras.losses.SparseCategoricalCrossentropy object at 0x00000238B5054550> value [0.05577546]
Seen so far: 401 samples
Training loss (for one batch) at step 600: <keras.losses.SparseCategoricalCrossentropy object at 0x00000238B5054550> value [0.06180618]
Seen so far: 601 samples
Training loss (for one batch) at step 800: <keras.losses.SparseCategoricalCrossentropy object at 0x00000238B5054550> value [0.05990243]
Seen so far: 801 samples
Finished training...
...
I am working on skin disease classification and trying to build a CNN model. I have approximitly 200 images. I have splited my data using by importing train_test_split from sklearn.model_selection.
code snippet:
data = [] #this is where I will store all the data
for category in categories:
path = os.path.join(data_dir,category)
# print(path)
class_num = categories.index(category)
# print(class_num)
for img in os.listdir(path):
# print(img)
try:
img_array = cv2.imread(os.path.join(path,img))
new_array = cv2.resize(img_array,img_size)
data.append([new_array,class_num])
except Exception as e:
pass
X = []
Y = []
for features,label in data:
X.append(features)
Y.append(label)
X = np.array(X)
X = X.astype('float32')/255.0
X = X.reshape(-1,height, width,3)
Y = np.array(Y)
from keras.utils.np_utils import to_categorical
Y = to_categorical(Y, num_classes = 4)
from sklearn.model_selection import train_test_split
# train_ratio = 0.75
# validation_ratio = 0.15
# test_ratio = 0.10
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42, stratify=Y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=1, stratify=y_train)
import pickle
pickle.dump(X_train, open('/content/drive/MyDrive/Data/X_train', 'wb'))
pickle.dump(y_train, open('/content/drive/MyDrive/Data/y_train', 'wb'))
pickle.dump(X_test, open('/content/drive/MyDrive/Data/X_test', 'wb'))
pickle.dump(y_test, open('/content/drive/MyDrive/Data/y_test', 'wb'))
pickle.dump(X_val, open('/content/drive/MyDrive/Data/X_val', 'wb'))
pickle.dump(y_val, open('/content/drive/MyDrive/Data/y_val', 'wb'))
The accuracy drastically drops when I use ImageDataGenerator for data augmentation.
code snippet:
Adam(learning_rate=0.00001, name='Adam')
model.compile(optimizer = 'Adam',loss = 'categorical_crossentropy',metrics = ['accuracy'])
epochs = 80
from tensorflow.keras import callbacks
import time
import keras
from keras.callbacks import EarlyStopping
es_callback = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=20)
datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=30,
shear_range=0.3,
zoom_range=0.3,
width_shift_range=0.4,
height_shift_range=0.4,
horizontal_flip=True,
fill_mode='nearest'
)
checkpoint = callbacks.ModelCheckpoint(
filepath='/content/drive/MyDrive/Model1/model.{epoch:02d}-{accuracy:.2f}-{val_accuracy:.2f}.h5',
monitor='val_accuracy',
verbose=1,
save_best_only=True,
mode='auto'
)
history5 = model.fit(datagen.flow(X_train,
y_train,
batch_size=32),
epochs = epochs,
validation_data = (X_val,y_val)
)
without data sugmentation the validation accuracy is 55%
with data augmentation the validation accuracy is 30%
How to perform linear regression in Tensorflow 2.0? An example or a tutorial link will be appreciated. All the tutorials on YouTube are using tensorflow 1
Example 1: regression using Tensorflow 2.0.0 :
import tensorflow as tf
# tensorflow 2.0.0
class Model:
def __init__(self):
self.W = tf.Variable(7.0) # initial value for model parameter W
self.b = tf.Variable(0.0) #initial value for model bias b
def model(self, x):
return self.W * x + self.b
def loss(predicted_label, target_label):
return tf.reduce_mean(tf.square(predicted_label - target_label))
def train(self,inputs, outputs, learning_rate):
with tf.GradientTape() as t:
current_loss = Model.loss(self.model(inputs), outputs)
#backpropagation
dW, db = t.gradient(current_loss, [self.W, self.b])
self.W.assign_sub(learning_rate * dW)
self.b.assign_sub(learning_rate * db)
return current_loss
def run(self):
import matplotlib.pyplot as plt
# Generate train data when true W=2.0 and b=3.0
TRUE_W = 2.0
TRUE_b = 3.0
NUM_INSTANCES = 500 # number of tarin data
inputs = tf.random.normal(shape=[NUM_INSTANCES])
noise = tf.random.normal(shape=[NUM_INSTANCES])
outputs = inputs * TRUE_W + TRUE_b + noise
print("Model before train (red dots):")
plt.scatter(inputs, outputs, c='b')
plt.scatter(inputs, self.model(inputs), c='r')
plt.show()
epochs = range(50)
for epoch in epochs:
current_loss=self.train(inputs, outputs, learning_rate=0.1)
if epoch%10==0:
print('Epoch %2d: loss=%2.5f' %
(epoch, current_loss))
print("Model after train (red dots):")
plt.scatter(inputs, outputs, c='b')
plt.scatter(inputs, self.model(inputs), c='r')
plt.show()
ob=Model()
ob.run()
Example 2: regression using Tensorflow 2.0.0 and keras optimizer:
import tensorflow as tf
#Tensorflow 2.0.0
class Model:
def __init__(self):
self.W = tf.Variable(5.0)
self.b = tf.Variable(0.0)
def model(self):
return self.W * self.inputs + self.b
def loss(self):
return tf.reduce_mean(tf.square(self.model() - self.outputs))
def run(self):
import matplotlib.pyplot as plt
# Generate train data when true W=4.0 and b=1.0
TRUE_W = 2.0
TRUE_b = 3.0
NUM_INSTANCES = 500 # number of tarin data
print("Model befor train (red dots):")
self.inputs = tf.random.normal(shape=[NUM_INSTANCES])
noise = tf.random.normal(shape=[NUM_INSTANCES])
self.outputs = self.inputs * TRUE_W + TRUE_b + noise
plt.scatter(self.inputs, self.outputs, c='b')
plt.scatter(self.inputs, self.model(), c='r')
plt.show()
opt = tf.keras.optimizers.Adam(learning_rate=0.1)
epochs = range(50)
for epoch in epochs:
opt.minimize(self.loss, var_list=[self.W,self.b])
current_loss=self.loss()
if epoch%10==0:
print('Epoch %2d: loss=%2.5f' %
(epoch, current_loss))
print("Model after train (red dots):")
plt.scatter(self.inputs, self.outputs, c='b')
plt.scatter(self.inputs, self.model(), c='r')
plt.show()
ob=Model()
ob.run()
Hope this helps.
I have made an example according to this: https://www.geeksforgeeks.org/linear-regression-using-tensorflow/ just in TF2:
import numpy as np
import tensorflow as tf
#tf.enable_v2_behavior()
import matplotlib.pyplot as plt
np.random.seed(101)
tf.set_random_seed(101)
x = np.linspace(0, 50, 50)
y = np.linspace(0, 50, 50)
# Adding noise to the random linear data
x += np.random.uniform(-4, 4, 50)
y += np.random.uniform(-4, 4, 50)
n = len(x) # Number of data points
plt.scatter(x, y)
plt.xlabel('x')
plt.xlabel('y')
plt.title("Training Data")
plt.show()
x=tf.constant(x, dtype=tf.float32)
y=tf.constant(y, dtype=tf.float32)
W = tf.Variable(np.random.randn(), name = "W")
b = tf.Variable(np.random.randn(), name = "b")
learning_rate = 0.01
training_epochs = 1000
def y_pred(x):
y_pred = tf.add(tf.multiply(x, W), b)
return y_pred
# Mean Squared Error Cost Function
def cost():
cost = tf.reduce_sum(tf.pow(y_pred(x)-y, 2)) / (2 * n)
return cost
# Adam Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate)
pred=y_pred(x) #run to initialize weight and bias
trainable_vars=[W,b]
for epoch in range(training_epochs):
optimizer.minimize(cost, trainable_vars)
pred=y_pred(x)
if (epoch + 1) % 50 == 0:
c = cost()
print("Epoch", (epoch + 1), ": cost =", c.numpy(), "W =", W.numpy(), "b =", b.numpy())
plt.plot(x, y, 'ro', label ='Original data')
plt.plot(x, pred, label ='Fitted line')
plt.title('Linear Regression Result')
plt.legend()
plt.show()
Basically, there is no sessions and it is easier. :) SGD optimizer was working poorly, so I used Adam.
I'm trying to work with lstm in tensorflow, but I got to the point I can't make a simple imdb sentiment model to converge.
I took a keras model and tried to duplicate the exact same model in tensorflow, in keras it trains and converge however in tensorflow it is just stuck at some point (0.69 loss).
I tried to make them as equal as possible, the only difference I can tell of is that in keras the padding is before the sequence, while in tensorflow I use 'post' padding due to the conventions in tensorflow.
Any idea whats wrong with my tensorflow model?
from __future__ import print_function
import random
import numpy as np
from tensorflow.contrib.keras.python.keras.preprocessing import sequence
from tensorflow.contrib.keras.python.keras.models import Sequential
from tensorflow.contrib.keras.python.keras.layers import Dense, Dropout, Activation
from tensorflow.contrib.keras.python.keras.layers import Embedding
from tensorflow.contrib.keras.python.keras.layers import LSTM
from tensorflow.contrib.keras.python.keras.layers import Conv1D, MaxPooling1D
from tensorflow.contrib.keras.python.keras.datasets import imdb
import tensorflow as tf
# Embedding
max_features = 30000
maxlen = 2494
embedding_size = 128
# Convolution
kernel_size = 5
filters = 64
pool_size = 4
# LSTM
lstm_output_size = 70
# Training
batch_size = 30
epochs = 2
class TrainData:
def __init__(self, batch_sz=batch_size):
(x_train, y_train), (_, _) = imdb.load_data(num_words=max_features)
y_train = [[int(x == 1), int(x != 1)] for x in y_train]
self._batch_size = batch_sz
self._train_data = sequence.pad_sequences(x_train, padding='pre')
self._train_labels = y_train
def next_batch(self):
if len(self._train_data) < self._batch_size:
self.__init__()
batch_x, batch_y = self._train_data[:self._batch_size], self._train_labels[:self._batch_size]
self._train_data = self._train_data[self._batch_size:]
self._train_labels = self._train_labels[self._batch_size:]
return batch_x, batch_y
def batch_generator(self):
while True:
if len(self._train_data) < self._batch_size:
self.__init__()
batch_x, batch_y = self._train_data[:self._batch_size], self._train_labels[:self._batch_size]
self._train_data = self._train_data[self._batch_size:]
self._train_labels = self._train_labels[self._batch_size:]
yield batch_x, batch_y
def get_num_batches(self):
return int(len(self._train_data) / self._batch_size)
def length(sequence):
used = tf.sign(tf.abs(sequence))
length = tf.reduce_sum(used, reduction_indices=1)
length = tf.cast(length, tf.int32)
return length
def get_model(x, y):
embedding = tf.get_variable("embedding", [max_features, embedding_size], dtype=tf.float32)
embedded_x = tf.nn.embedding_lookup(embedding, x)
print(x)
print(embedded_x)
print(length(x))
cell_1 = tf.contrib.rnn.BasicLSTMCell(lstm_output_size)
output_1, state_1 = tf.nn.dynamic_rnn(cell_1, embedded_x, dtype=tf.float32, scope="rnn_layer1",
sequence_length=length(x))
# Select last output.
last_index = tf.shape(output_1)[1] - 1
# reshaping to [seq_length, batch_size, num_units]
output = tf.transpose(output_1, [1, 0, 2])
last = tf.gather(output, last_index)
# Softmax layer
with tf.name_scope('fc_layer'):
weight = tf.get_variable(name="weights", shape=[lstm_output_size, 2])
bias = tf.get_variable(shape=[2], name="bias")
logits = tf.matmul(last, weight) + bias
loss = tf.losses.softmax_cross_entropy(y, logits=logits)
optimizer = tf.train.AdamOptimizer()
optimize_step = optimizer.minimize(loss=loss)
return loss, optimize_step
def tf_model():
x_holder = tf.placeholder(tf.int32, shape=[None, maxlen])
y_holder = tf.placeholder(tf.int32, shape=[None, 2])
loss, opt_step = get_model(x_holder, y_holder)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
for epoch in range(10):
cost_epochs = []
train_data = TrainData()
cost_batch = 0
for batch in range(train_data.get_num_batches()):
x_train, y_train = train_data.next_batch()
_, cost_batch = sess.run([opt_step, loss],
feed_dict={x_holder: x_train,
y_holder: y_train})
cost_epochs.append(cost_batch)
step += 1
# if step % 100 == 0:
print("Epoch: " + str(epoch))
print("\tcost: " + str(np.mean(cost_epochs)))
def keras_model():
# print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
y_test = [[int(x == 1), int(x != 1)] for x in y_test]
x_test = sequence.pad_sequences(x_test, maxlen=maxlen, padding='pre')
model = Sequential()
model.add(Embedding(max_features, embedding_size, input_length=maxlen))
model.add(LSTM(lstm_output_size))
model.add(Dense(2))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print('Train...')
data = TrainData()
model.fit_generator(data.batch_generator(), steps_per_epoch=data.get_num_batches(),
epochs=epochs,
validation_data=(x_test, y_test))
if __name__ == '__main__':
# keras_model()
tf_model()
EDIT
When I limit the sequence length to 100 both models converge, so I assume there is something different in the the lstm layer.
Check the initial values of your operations. In my case the adadelta optimizer in keras had initial learning rate of 1.0 and in tf.keras it had 0.001 so in the mnist dataset it converged much slowly.