performing linear regression using tensorflow 2.0 - tensorflow

How to perform linear regression in Tensorflow 2.0? An example or a tutorial link will be appreciated. All the tutorials on YouTube are using tensorflow 1

Example 1: regression using Tensorflow 2.0.0 :
import tensorflow as tf
# tensorflow 2.0.0
class Model:
def __init__(self):
self.W = tf.Variable(7.0) # initial value for model parameter W
self.b = tf.Variable(0.0) #initial value for model bias b
def model(self, x):
return self.W * x + self.b
def loss(predicted_label, target_label):
return tf.reduce_mean(tf.square(predicted_label - target_label))
def train(self,inputs, outputs, learning_rate):
with tf.GradientTape() as t:
current_loss = Model.loss(self.model(inputs), outputs)
#backpropagation
dW, db = t.gradient(current_loss, [self.W, self.b])
self.W.assign_sub(learning_rate * dW)
self.b.assign_sub(learning_rate * db)
return current_loss
def run(self):
import matplotlib.pyplot as plt
# Generate train data when true W=2.0 and b=3.0
TRUE_W = 2.0
TRUE_b = 3.0
NUM_INSTANCES = 500 # number of tarin data
inputs = tf.random.normal(shape=[NUM_INSTANCES])
noise = tf.random.normal(shape=[NUM_INSTANCES])
outputs = inputs * TRUE_W + TRUE_b + noise
print("Model before train (red dots):")
plt.scatter(inputs, outputs, c='b')
plt.scatter(inputs, self.model(inputs), c='r')
plt.show()
epochs = range(50)
for epoch in epochs:
current_loss=self.train(inputs, outputs, learning_rate=0.1)
if epoch%10==0:
print('Epoch %2d: loss=%2.5f' %
(epoch, current_loss))
print("Model after train (red dots):")
plt.scatter(inputs, outputs, c='b')
plt.scatter(inputs, self.model(inputs), c='r')
plt.show()
ob=Model()
ob.run()
Example 2: regression using Tensorflow 2.0.0 and keras optimizer:
import tensorflow as tf
#Tensorflow 2.0.0
class Model:
def __init__(self):
self.W = tf.Variable(5.0)
self.b = tf.Variable(0.0)
def model(self):
return self.W * self.inputs + self.b
def loss(self):
return tf.reduce_mean(tf.square(self.model() - self.outputs))
def run(self):
import matplotlib.pyplot as plt
# Generate train data when true W=4.0 and b=1.0
TRUE_W = 2.0
TRUE_b = 3.0
NUM_INSTANCES = 500 # number of tarin data
print("Model befor train (red dots):")
self.inputs = tf.random.normal(shape=[NUM_INSTANCES])
noise = tf.random.normal(shape=[NUM_INSTANCES])
self.outputs = self.inputs * TRUE_W + TRUE_b + noise
plt.scatter(self.inputs, self.outputs, c='b')
plt.scatter(self.inputs, self.model(), c='r')
plt.show()
opt = tf.keras.optimizers.Adam(learning_rate=0.1)
epochs = range(50)
for epoch in epochs:
opt.minimize(self.loss, var_list=[self.W,self.b])
current_loss=self.loss()
if epoch%10==0:
print('Epoch %2d: loss=%2.5f' %
(epoch, current_loss))
print("Model after train (red dots):")
plt.scatter(self.inputs, self.outputs, c='b')
plt.scatter(self.inputs, self.model(), c='r')
plt.show()
ob=Model()
ob.run()
Hope this helps.

I have made an example according to this: https://www.geeksforgeeks.org/linear-regression-using-tensorflow/ just in TF2:
import numpy as np
import tensorflow as tf
#tf.enable_v2_behavior()
import matplotlib.pyplot as plt
np.random.seed(101)
tf.set_random_seed(101)
x = np.linspace(0, 50, 50)
y = np.linspace(0, 50, 50)
# Adding noise to the random linear data
x += np.random.uniform(-4, 4, 50)
y += np.random.uniform(-4, 4, 50)
n = len(x) # Number of data points
plt.scatter(x, y)
plt.xlabel('x')
plt.xlabel('y')
plt.title("Training Data")
plt.show()
x=tf.constant(x, dtype=tf.float32)
y=tf.constant(y, dtype=tf.float32)
W = tf.Variable(np.random.randn(), name = "W")
b = tf.Variable(np.random.randn(), name = "b")
learning_rate = 0.01
training_epochs = 1000
def y_pred(x):
y_pred = tf.add(tf.multiply(x, W), b)
return y_pred
# Mean Squared Error Cost Function
def cost():
cost = tf.reduce_sum(tf.pow(y_pred(x)-y, 2)) / (2 * n)
return cost
# Adam Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate)
pred=y_pred(x) #run to initialize weight and bias
trainable_vars=[W,b]
for epoch in range(training_epochs):
optimizer.minimize(cost, trainable_vars)
pred=y_pred(x)
if (epoch + 1) % 50 == 0:
c = cost()
print("Epoch", (epoch + 1), ": cost =", c.numpy(), "W =", W.numpy(), "b =", b.numpy())
plt.plot(x, y, 'ro', label ='Original data')
plt.plot(x, pred, label ='Fitted line')
plt.title('Linear Regression Result')
plt.legend()
plt.show()
Basically, there is no sessions and it is easier. :) SGD optimizer was working poorly, so I used Adam.

Related

Why my chemical vae cannot learn any thing with toy dataset?

I m trying to implement a mini version of chemical vae referred in this paper: 10.1021/acscentsci.7b00572. The model can be successfully trained, and the loss is changing. However, the predicted properties of all samples are same, near to the mean value. And the autoencoder cannot reconstruct the input data. It means the model cannot learn anything by training. I have carefully check my codes, but failed to find any wrong. Can any one help? Thank you.
Here is my code:
import numpy as np
import tensorflow as tf
# example smiles and properties
smiles = ['CCCCO', 'C1CCCCC1', 'C[C##H](C(=O)O)N', 'C[C#H](C(=O)O)N', 'CC(=O)O'] * 200
y = [1,2,3,4,5] * 200
# smiles to one-hot
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
dicts = set(''.join(smiles))
num_words = len(dicts) + 1
max_lens = 15
tokenizer = Tokenizer(num_words=num_words, char_level=True)
tokenizer.fit_on_texts(smiles)
sequences = tokenizer.texts_to_sequences(smiles)
sequences = pad_sequences(sequences, maxlen = max_lens, padding='post', truncating='post')
x = to_categorical(sequences, num_classes=num_words)
# model
from tensorflow.keras import layers, Model
class VAEWithRegressor(Model):
"""Combines a variational autoencoder with a property regressor."""
def __init__(self, latent_dim):
super(VAEWithRegressor, self).__init__()
# Define the encoder layers
self.encoder = tf.keras.Sequential(
[
layers.InputLayer(input_shape=x[0].shape),
layers.GRU(units=64, return_sequences=True),
layers.BatchNormalization(),
layers.GRU(units=32),
layers.BatchNormalization(),
layers.Dense(units=16),
layers.BatchNormalization(),
layers.Dense(latent_dim * 2),
]
)
# Define the decoder layers
self.decoder = tf.keras.Sequential(
[
layers.InputLayer(input_shape=(latent_dim,)),
layers.Dense(units=16),
layers.BatchNormalization(),
layers.Dense(units=32),
layers.BatchNormalization(),
layers.RepeatVector(max_lens),
layers.GRU(units = max_lens, return_sequences=True),
layers.BatchNormalization(),
layers.TimeDistributed(layers.Dense(units=num_words)),
layers.Activation('softmax')
]
)
# Define the regressor layers
self.regressor = tf.keras.Sequential(
[
layers.InputLayer(input_shape=(latent_dim,)),
layers.Dense(units=32),
layers.Dense(units=16),
layers.Dense(units=1),
]
)
def encode(self, x):
# Compute the mean and log variance of the latent variable
h = self.encoder(x)
mean, log_var = tf.split(h, num_or_size_splits=2, axis=1)
return mean, log_var
def reparameterize(self, mean, log_var):
# Sample from the latent variable distribution
eps = tf.random.normal(tf.shape(mean))
std_dev = tf.exp(0.5 * log_var)
z = mean + std_dev * eps
return z
def decode(self, z):
# Reconstruct the input from the latent variable
return self.decoder(z)
def predict_properties(self, z):
# Predict the properties of the input
return self.regressor(z)
def call(self, x):
# Define the forward pass of the model
mean, log_var = self.encode(x)
z = self.reparameterize(mean, log_var)
x_pred = self.decode(z)
properties = self.predict_properties(z)
return x_pred, mean, log_var, properties
def vae_loss(self, x, x_pred, mean, log_var):
recon_loss = tf.reduce_sum(tf.keras.losses.binary_crossentropy(x, x_pred), axis = 1)
kl_loss = -0.5 * tf.reduce_sum(1 + log_var - tf.square(mean) - tf.exp(log_var), axis = 1)
return tf.reduce_mean(recon_loss + kl_loss)
def property_loss(self, y_true, y_pred):
# Compute the mean squared error between the true and predicted properties
return tf.reduce_mean(tf.keras.losses.mean_squared_error(y_true, y_pred))
def train_step(self, x, y_true):
with tf.GradientTape() as tape:
x_pred, mean, log_var, y_pred = self.call(x)
vae_loss_value = self.vae_loss(x, x_pred, mean, log_var)
property_loss_value = self.property_loss(y_true, y_pred)
total_loss = vae_loss_value + property_loss_value
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
gradients = tape.gradient(total_loss, self.trainable_variables)
optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return vae_loss_value, property_loss_value
latent_dim = 8
num_epochs = 50
batch_size = 256
vae = VAEWithRegressor(latent_dim)
x_train = x
y_train = y
for epoch in range(num_epochs):
epoch_vae_loss = 0
epoch_property_loss = 0
for i in range(0, len(x_train), batch_size):
x_batch = x_train[i:i+batch_size]
y_batch = y_train[i:i+batch_size]
vae_loss_value, property_loss_value = vae.train_step(x_batch, y_batch)
epoch_vae_loss += vae_loss_value
epoch_property_loss += property_loss_value
epoch_vae_loss /= (len(x_train) / batch_size)
epoch_property_loss /= (len(x_train) / batch_size)
print('Epoch {}, VAE loss: {}, Property loss: {}'.format(epoch+1, epoch_vae_loss, epoch_property_loss))
z_sample = vae.encoder.predict(x)[:,:latent_dim]
x_pred = np.array(vae.decoder.predict(z_sample))
y_pred = np.array(vae.predict_properties(z_sample))

Exploding LOSS in TensorFlow 2.0 Linear Regression Example using GradientTape

I'm trying to construct a little educational example for multivariate linear regresssion, but the LOSS is increasing until it explodes rather than getting smaller, any idea?
import tensorflow as tf
tf.__version__
import numpy as np
data = np.array(
[
[100,35,35,12,0.32],
[101,46,35,21,0.34],
[130,56,46,3412,12.42],
[131,58,48,3542,13.43]
]
)
x = data[:,1:-1]
y_target = data[:,-1]
def loss_function(y, pred):
return tf.reduce_mean(tf.square(y - pred))
def train(b, w, x, y, lr=0.012):
with tf.GradientTape() as t:
current_loss = loss_function(y, linear_model(x))
lr_weight, lr_bias = t.gradient(current_loss, [w, b])
w.assign_sub(lr * lr_weight)
b.assign_sub(lr * lr_bias)
epochs = 80
for epoch_count in range(epochs):
real_loss = loss_function(y_target, linear_model(x))
train(b, w, x, y_target, lr=0.12)
print(f"Epoch count {epoch_count}: Loss value: {real_loss.numpy()}")
This even happens if I initialize the weights with the "correct" values (found out via a scikit-learn regressor)
w = tf.Variable([-1.76770250e-04,3.46688912e-01,2.43827475e-03],dtype=tf.float64)
b = tf.Variable(-11.837184241807234,dtype=tf.float64)
Here's how you might use a TF2 optimizer for a toy example (as per the comment). I know this is not the answer but I didn't want to post this in the comments section, as it will mess up the indentation and all that.
tf_x = tf.Variable(tf.constant(2.0,dtype=tf.float32),name='x')
optimizer = tf.optimizers.SGD(learning_rate=0.1)
# Optimizing tf_x using gradient tape
x_series, y_series = [],[]
for step in range(5):
x_series.append(tf_x.numpy().item())
with tf.GradientTape() as tape:
tf_y = tf_x**2
gradients = tape.gradient(tf_y, tf_x)
optimizer.apply_gradients(zip([gradients], [tf_x]))
Based on #thushv89's input, I'm providing here an intermediate solution using a TF2 Optimizer which is working, although this is not 100% answering my question
import tensorflow as tf
tf.__version__
import numpy as np
data = np.array(
[
[100,35,35,12,0.32],
[101,46,35,21,0.34],
[130,56,46,3412,12.42],
[131,58,48,3542,13.43]
]
)
x = data[:,1:-1]
y_target = data[:,-1]
w = tf.Variable([1,1,1],dtype=tf.float64)
b = tf.Variable(1,dtype=tf.float64)
def linear_model(x):
return b + tf.tensordot(x,w,axes=1)
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.MeanSquaredLogarithmicError()
def train_step(x, y):
with tf.GradientTape() as tape:
predicted = linear_model(x)
loss_value = loss_object(y, predicted)
print(f"Loss Value:{loss_value}")
grads = tape.gradient(loss_value, [b,w])
optimizer.apply_gradients(zip(grads, [b,w]))
def train(epochs):
for epoch in range(epochs):
train_step(x, y_target)
print ('Epoch {} finished'.format(epoch))
train(epochs = 1000)

A question about simple Keras and Tensorflow code performance

I wrote simple Sin function predictors using Keras and Tensorflow with LSTM, but found the performance of Keras code is much slower which runs about 5 min while Tensorflow code runs the model just in 20 seconds. Moreover, the Keras prediction performance is less precide as Keras one. Could anyone help me find the code difference between the 2 model?
I hacked the code online and intend to train the model with the same hyper parameters. But the performance is not as expected. Tried searching many materials online, but found no reasons.
Keras Code:
import numpy as np
import os
import sys
import time
from tqdm._tqdm_notebook import tqdm_notebook
import pickle
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from keras import optimizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
stime = time.time()
BATCH_SIZE = 20
TIME_STEPS = 10
LN = 410
DIFF = 2
OUTPUT_PATH = '/Users/xiachang/Documents/RNN/test_outputs'
SCALER_COL_IDX = 0
params = {
"batch_size": BATCH_SIZE, # 20<16<10, 25 was a bust
"epochs": 500,
"lr": 0.00010000,
"time_steps": TIME_STEPS
}
TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
HIDDEN_UNITS = 20
# data = np.array([[i * (DIFF)] for i in range(LN)])
#
# min_max_scaler = MinMaxScaler()
# data = min_max_scaler.fit_transform(data)
def generate_data(seq):
X = []
y = []
for i in range(len(seq) - TIME_STEPS):
X.append([[e] for e in seq[i: i + TIME_STEPS]])
y.append([seq[i + TIME_STEPS]])
return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
test_start = (TRAINING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
0, test_start, TRAINING_EXAMPLES + TIME_STEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
test_start, test_end, TESTING_EXAMPLES + TIME_STEPS, dtype=np.float32)))
x_val, x_test = np.split(test_X, 2)
y_val, y_test = np.split(test_y, 2)
def print_time(text, stime):
seconds = (time.time()-stime)
print(text, seconds//60,"minutes : ",np.round(seconds%60),"seconds")
def create_model():
lstm_model = Sequential()
lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
lstm_model.add(LSTM(HIDDEN_UNITS))
lstm_model.add(Dense(1, activation=None))
lstm_model.compile(loss='mean_squared_error', optimizer=optimizers.Adagrad(lr=0.1))
return lstm_model
model = create_model()
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=30, min_delta=0.0001)
mcp = ModelCheckpoint(os.path.join(OUTPUT_PATH,
"best_model.h5"), monitor='val_loss', verbose=1,
save_best_only=True, save_weights_only=False, mode='min', period=1)
# Not used here. But leaving it here as a reminder for future
r_lr_plat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=30,
verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
csv_logger = CSVLogger(os.path.join(OUTPUT_PATH, 'training_log_' + time.ctime().replace(" ","_") + '.log'), append=True)
history = model.fit(train_X, train_y, epochs=params["epochs"], verbose=2, batch_size=BATCH_SIZE,
shuffle=False, validation_data=(x_val, y_val), callbacks=[es, mcp, csv_logger])
print("saving model...")
pickle.dump(model, open("test_outputs/lstm_model", "wb"))
# Visualize the training data
from matplotlib import pyplot as plt
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
#plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'train_vis_BS_'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))
# load the saved best model from above
saved_model = load_model(os.path.join(OUTPUT_PATH, 'best_model.h5')) # , "lstm_best_7-3-19_12AM",
print(saved_model)
y_pred = saved_model.predict(x_test, batch_size=BATCH_SIZE)
y_pred = y_pred.flatten()
y_test_t = y_test
error = mean_squared_error(y_test_t, y_pred)
print("Error is", error, y_pred.shape, y_test_t.shape)
print(y_pred[0:15])
print(y_test_t[0:15])
y_pred_org = y_pred
y_test_t_org = y_test_t
print(y_pred_org[0:15])
print(y_test_t_org[0:15])
# Visualize the prediction
from matplotlib import pyplot as plt
plt.figure()
plt.plot(y_pred_org)
plt.plot(y_test_t_org)
plt.title('Prediction vs Real Value')
plt.ylabel('Y')
plt.xlabel('X')
plt.legend(['Prediction', 'Real'], loc='upper left')
# plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'pred_vs_real_BS'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))
print_time("program completed ", stime)
Tensorflow code:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
NUM_EPOCH = 1000
HIDDEN_SIZE = 30
NUM_LAYERS = 2
TIMESTEPS = 10
TRAINING_STEPS = 10000
BATCH_SIZE = 20
TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
def generate_data(seq):
X = []
y = []
for i in range(len(seq) - TIMESTEPS):
X.append([seq[i: i + TIMESTEPS]])
y.append([seq[i + TIMESTEPS]])
return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
def lstm_model(X, y, is_training):
cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.LSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)])
outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
output = outputs[:, -1, :]
predictions = tf.contrib.layers.fully_connected(output, 1, activation_fn=None)
if not is_training:
return predictions, None, None
loss = tf.losses.mean_squared_error(labels=y, predictions=predictions)
train_op = tf.contrib.layers.optimize_loss(
loss, tf.train.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
return predictions, loss, train_op
def train(sess, train_X, train_Y):
ds = tf.data.Dataset.from_tensor_slices((train_X, train_Y))
ds = ds.repeat().shuffle(1000).batch(BATCH_SIZE)
X, y = ds.make_one_shot_iterator().get_next()
losses = np.array([])
with tf.variable_scope('model'):
predictions, loss, train_op = lstm_model(X, y, True)
sess.run(tf.global_variables_initializer())
for i in range(TRAINING_STEPS):
_, l = sess.run([train_op, loss])
losses = np.append(losses, l)
if i % NUM_EPOCH == 0:
print('train step: ' + str(i) + ', loss: ' + str(l))
plt.figure()
plt.plot(losses, label='loss')
plt.legend()
# plt.show()
plt.savefig('./test_outputs/loss.png')
def run_eval(sess, test_X, test_y):
ds = tf.data.Dataset.from_tensor_slices((test_X, test_y))
ds = ds.batch(1)
X, y = ds.make_one_shot_iterator().get_next()
with tf.variable_scope('model', reuse=True):
prediction, _, _ = lstm_model(X, [0, 0], False)
predictions = []
labels = []
for i in range(int(TESTING_EXAMPLES / 2)):
p, l = sess.run([prediction, y])
predictions.append(p)
labels.append(l)
predictions = np.array(predictions).squeeze()
labels = np.array(labels).squeeze()
rmse = np.sqrt(((predictions - labels) ** 2).mean(axis=0))
print('Mean Square Error is: %f' % rmse)
plt.figure()
print(predictions[:15])
print(labels[:15])
plt.plot(predictions, label='predictions')
plt.plot(labels, label='real_val')
plt.legend()
# plt.show()
plt.savefig('./test_outputs/test.png')
test_start = (TRAINING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
0, test_start, TRAINING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
test_start, test_end, TESTING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
x_val, test_X = np.split(test_X, 2)
y_val, test_y = np.split(test_y, 2)
with tf.Session() as sess:
train(sess, train_X, train_y)
run_eval(sess, test_X, test_y)
You maybe should try to use CuDNNLSTM instead of LSTM. They are CUDA accelerated.
Fast LSTM implementation with CuDNN.
See here: https://github.com/keras-team/keras/blob/master/keras/layers/cudnn_recurrent.py#L328
Your model structure is not same, first has 3 layers of LSTM, other has 2.
Tensorflow data API is highly optimized, It preparing the data-set, without wasting any resources.
Note that: You can even more accelerate the training in tensorflow using parallelization in dynamic_rnn cell. check out this.

how to make my logistic regression faster

I have to do simple logistic regression (only in numpy, I can't use pytourch or tensorflow).
Data: part of MNIST
Goal: I should have accuracy about 86%.
Unfortunately i have only about 70%, and my loss function oscillate strangely.
It must be sth wrong with functions: t_cross_entropy or np_cross_entropy_grad
Of Course i tried to change learning rate, but without any satisfying results.
Could you help? (below you have a code and charts)
I CAN CHANGE ONLY functions: np_linear, np_softmax, np_cross_entropy,
np_cross_entropy_grad (and eventually in class NumpyLogisticRegression only forward function)
1. load part of MINST
# Import MNIST dataset
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
%matplotlib inline
def load_dataset(dataset_name):
data = np.load('data/{}/{}.npz'.format(dataset_name.upper(),
dataset_name))
return data['X_train'], data['y_train'], data['X_test'],
data['y_test']
X_train, y_train, X_test, y_test = load_dataset('mini_mnist')
f, ax = plt.subplots(1, 10, sharex='col', sharey='row',figsize=(18, 16))
for a in ax:
a.imshow(X_train[np.random.randint(X_train.shape[0])].reshape(28,
28), cmap='gray')
plt.show()
X_train = np.c_[np.ones(X_train.shape[0]), X_train]
X_test = np.c_[np.ones(X_test.shape[0]), X_test]
print("train data shape: {}, test data shape:
{}".format(X_train.shape, X_test.shape))
2. main class and functions
def np_linear(x, a):
return np.dot(x, a.transpose())
'''
Calculate l(x;a) in BxK
:param x: Bx(D+1) input data
:param a: Kx(D+1) weight matrix
'''
def np_softmax(l):
exps = np.exp(l - np.max(l))
return exps / np.sum(exps)
'''
Calculate p(l) in BxK
:param l: BxK logits
'''
def np_cross_entropy(p, y):
m = y.shape[0]
log_likelihood = -np.log(p[range(m),y])
loss = np.sum(log_likelihood) / m
return loss
'''
Calculate L(p,y)
:param p: BxK predictions
:param y: B true labels
'''
def np_cross_entropy_grad(p, y, x):
m = y.shape[0]
grad = p
grad[range(m),y] -= 1
grad = grad/m
grad = grad.transpose()
return np.dot(grad, x)
'''
Calculate dL/da in Kx(D+1)
:param p: BxK predictions
:param y: B true labels
:param x: Bx(D+1) input data
'''
class NumpyLogisticRegression:
def __init__(self, n_classes, n_epochs, input_size, learning_rate=0.1, batch_size=256):
self.A = np.zeros((n_classes, input_size))
self.learning_rate = learning_rate
self.batch_size = batch_size
self.input_size = input_size
self.n_classes = n_classes
self.n_epochs = n_epochs
def forward(self, x):
return np_softmax(np_linear(x, self.A))
def train(self, X, Y, X_test=None, y_test=None):
loss, train_accuracy, test_accuracy = [], [], []
for e in tqdm(range(self.n_epochs)):
perm = np.random.permutation(len(X))
X, Y, = X[perm], Y[perm]
for batch in range(len(X) // self.batch_size):
x = X[batch * self.batch_size:(batch + 1) * self.batch_size]
y = Y[batch * self.batch_size:(batch + 1) * self.batch_size]
p = self.forward(x)
l = np_cross_entropy(p, y)
loss.append(l)
train_accuracy.append(self.test(x, y))
if X_test is not None and y_test is not None:
test_accuracy.append(self.test(X_test, y_test))
grad_A = np_cross_entropy_grad(p, y, x)
self.A -= grad_A * self.learning_rate
return loss, train_accuracy, test_accuracy
def test(self, X, Y):
p = np.argmax(self.forward(X), axis=1)
return np.mean(p == Y)
3. Test
clf = NumpyLogisticRegression(n_classes=10, n_epochs=10, input_size=785)
loss, train_accuracy, test_accuracy = clf.train(X_train, y_train, X_test, y_test)
4. Charts (without code, only results)
Problem was in np_softmax function, it should look like this:
def np_softmax(l):
exps = np.exp(l - np.max(l))
return exps / np.sum(exps, axis=1).reshape(-1,1)
Mine was prepered for single vector argument, this is proper version for matrix input.

TensorFlow Linear Regression gives 'NaN' result

I am currently running the TensorFlow model with Linear Regression. However, I don't understand why, even when I decrease the learning_rate from 0.01 to 0.001 and increase the training iterations from 1000 to 50000, I still obtain the 'nan' result for the cost function, as well as the two coefficients. Could anyone please help me detect the problem in the following code?
from __future__ import print_function
import tensorflow as tf
import numpy
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import random
rng = numpy.random
# Parameters
learning_rate = 0.001
training_epochs = 20000 #number of iterations
display_step = 400
#read csv file
datapath = [directory path]
Ha_Noi = pd.read_csv(datapath+"HaNoi_1month_LW_WeatherTest.csv")
#Add an additional column into the table
sLength = len(Ha_Noi['accept_rate'])
Ha_Noi['accept_rate_timeT'] = pd.Series(Ha_Noi['accept_rate'], index=Ha_Noi.index)
#Shift the entries in the accept_rate column upward
Ha_Noi.accept_rate = Ha_Noi.accept_rate.shift(-1)
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent4"])
Ha_Noi = Ha_Noi.dropna(subset=["accept_rate"])
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent2"])
df2 = pd.DataFrame(Ha_Noi)
#split the dataset into training and testing sets
train_set, test_set = train_test_split(Ha_Noi, test_size=0.2, random_state = random.randint(20, 200))
Xtrain = train_set['longwait_percent2'].reshape(-1,1)
Ytrain = train_set['accept_rate'].reshape(-1,1)
Xtrain2 = train_set['Weather Weight_Longwait_percent2'].reshape(-1,1)
Xtest2 = test_set['Weather Weight_Longwait_percent2'].reshape(-1,1)
# Xtest = test_set['longwait_percent2'].reshape(-1,1)
# Ytest = test_set['accept_rate'].reshape(-1,1)
# Training Data
train_X = Xtrain
train_Y = Ytrain
n_samples = train_X.shape[0]
#Testing Data
Xtest = np.asarray(test_set['longwait_percent2'])
Ytest = np.asarray(test_set['accept_rate'])
# tf Graph Input
X = tf.placeholder("float")
Y = tf.placeholder("float")
# Set model weights
W = tf.Variable(rng.randn(), name="weight")
b = tf.Variable(rng.randn(), name="bias")
# Construct a linear model
pred = tf.add(tf.multiply(X, W), b)
# Mean squared error
cost = tf.sqrt(tf.reduce_sum(tf.pow(pred-Y, 2))/(n_samples))
# Gradient descent method
# Note, minimize() knows to modify W and b because Variable objects are "trained" (trainable=True by default)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
saver = tf.train.Saver() #save all the initialized data
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: x, Y: y})
# Display logs per epoch step
if (epoch+1) % display_step == 0: # checkpoint every 50 epochs
c = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
"W=", sess.run(W), "b=", sess.run(b))
print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
# Graphic display
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
testing_cost = sess.run(
tf.reduce_sum(tf.pow(pred - Y, 2)) / (Xtest.shape[0]),
feed_dict={X: Xtest, Y: Ytest}) # square root of function cost above
print("Root Mean Square Error =", tf.sqrt(testing_cost))
print("Absolute mean square loss difference:", abs(
training_cost - testing_cost))
plt.plot(Xtest, Ytest, 'bo', label='Testing data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
Don't have your data, so it's hard to tell whether the problem is caused by data or by training problem. You can make learning rate and training iteration much smaller such 0.00005 and 100 to see is there still NaN.