how to make my logistic regression faster - numpy

I have to do simple logistic regression (only in numpy, I can't use pytourch or tensorflow).
Data: part of MNIST
Goal: I should have accuracy about 86%.
Unfortunately i have only about 70%, and my loss function oscillate strangely.
It must be sth wrong with functions: t_cross_entropy or np_cross_entropy_grad
Of Course i tried to change learning rate, but without any satisfying results.
Could you help? (below you have a code and charts)
I CAN CHANGE ONLY functions: np_linear, np_softmax, np_cross_entropy,
np_cross_entropy_grad (and eventually in class NumpyLogisticRegression only forward function)
1. load part of MINST
# Import MNIST dataset
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
%matplotlib inline
def load_dataset(dataset_name):
data = np.load('data/{}/{}.npz'.format(dataset_name.upper(),
dataset_name))
return data['X_train'], data['y_train'], data['X_test'],
data['y_test']
X_train, y_train, X_test, y_test = load_dataset('mini_mnist')
f, ax = plt.subplots(1, 10, sharex='col', sharey='row',figsize=(18, 16))
for a in ax:
a.imshow(X_train[np.random.randint(X_train.shape[0])].reshape(28,
28), cmap='gray')
plt.show()
X_train = np.c_[np.ones(X_train.shape[0]), X_train]
X_test = np.c_[np.ones(X_test.shape[0]), X_test]
print("train data shape: {}, test data shape:
{}".format(X_train.shape, X_test.shape))
2. main class and functions
def np_linear(x, a):
return np.dot(x, a.transpose())
'''
Calculate l(x;a) in BxK
:param x: Bx(D+1) input data
:param a: Kx(D+1) weight matrix
'''
def np_softmax(l):
exps = np.exp(l - np.max(l))
return exps / np.sum(exps)
'''
Calculate p(l) in BxK
:param l: BxK logits
'''
def np_cross_entropy(p, y):
m = y.shape[0]
log_likelihood = -np.log(p[range(m),y])
loss = np.sum(log_likelihood) / m
return loss
'''
Calculate L(p,y)
:param p: BxK predictions
:param y: B true labels
'''
def np_cross_entropy_grad(p, y, x):
m = y.shape[0]
grad = p
grad[range(m),y] -= 1
grad = grad/m
grad = grad.transpose()
return np.dot(grad, x)
'''
Calculate dL/da in Kx(D+1)
:param p: BxK predictions
:param y: B true labels
:param x: Bx(D+1) input data
'''
class NumpyLogisticRegression:
def __init__(self, n_classes, n_epochs, input_size, learning_rate=0.1, batch_size=256):
self.A = np.zeros((n_classes, input_size))
self.learning_rate = learning_rate
self.batch_size = batch_size
self.input_size = input_size
self.n_classes = n_classes
self.n_epochs = n_epochs
def forward(self, x):
return np_softmax(np_linear(x, self.A))
def train(self, X, Y, X_test=None, y_test=None):
loss, train_accuracy, test_accuracy = [], [], []
for e in tqdm(range(self.n_epochs)):
perm = np.random.permutation(len(X))
X, Y, = X[perm], Y[perm]
for batch in range(len(X) // self.batch_size):
x = X[batch * self.batch_size:(batch + 1) * self.batch_size]
y = Y[batch * self.batch_size:(batch + 1) * self.batch_size]
p = self.forward(x)
l = np_cross_entropy(p, y)
loss.append(l)
train_accuracy.append(self.test(x, y))
if X_test is not None and y_test is not None:
test_accuracy.append(self.test(X_test, y_test))
grad_A = np_cross_entropy_grad(p, y, x)
self.A -= grad_A * self.learning_rate
return loss, train_accuracy, test_accuracy
def test(self, X, Y):
p = np.argmax(self.forward(X), axis=1)
return np.mean(p == Y)
3. Test
clf = NumpyLogisticRegression(n_classes=10, n_epochs=10, input_size=785)
loss, train_accuracy, test_accuracy = clf.train(X_train, y_train, X_test, y_test)
4. Charts (without code, only results)

Problem was in np_softmax function, it should look like this:
def np_softmax(l):
exps = np.exp(l - np.max(l))
return exps / np.sum(exps, axis=1).reshape(-1,1)
Mine was prepered for single vector argument, this is proper version for matrix input.

Related

Why my chemical vae cannot learn any thing with toy dataset?

I m trying to implement a mini version of chemical vae referred in this paper: 10.1021/acscentsci.7b00572. The model can be successfully trained, and the loss is changing. However, the predicted properties of all samples are same, near to the mean value. And the autoencoder cannot reconstruct the input data. It means the model cannot learn anything by training. I have carefully check my codes, but failed to find any wrong. Can any one help? Thank you.
Here is my code:
import numpy as np
import tensorflow as tf
# example smiles and properties
smiles = ['CCCCO', 'C1CCCCC1', 'C[C##H](C(=O)O)N', 'C[C#H](C(=O)O)N', 'CC(=O)O'] * 200
y = [1,2,3,4,5] * 200
# smiles to one-hot
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
dicts = set(''.join(smiles))
num_words = len(dicts) + 1
max_lens = 15
tokenizer = Tokenizer(num_words=num_words, char_level=True)
tokenizer.fit_on_texts(smiles)
sequences = tokenizer.texts_to_sequences(smiles)
sequences = pad_sequences(sequences, maxlen = max_lens, padding='post', truncating='post')
x = to_categorical(sequences, num_classes=num_words)
# model
from tensorflow.keras import layers, Model
class VAEWithRegressor(Model):
"""Combines a variational autoencoder with a property regressor."""
def __init__(self, latent_dim):
super(VAEWithRegressor, self).__init__()
# Define the encoder layers
self.encoder = tf.keras.Sequential(
[
layers.InputLayer(input_shape=x[0].shape),
layers.GRU(units=64, return_sequences=True),
layers.BatchNormalization(),
layers.GRU(units=32),
layers.BatchNormalization(),
layers.Dense(units=16),
layers.BatchNormalization(),
layers.Dense(latent_dim * 2),
]
)
# Define the decoder layers
self.decoder = tf.keras.Sequential(
[
layers.InputLayer(input_shape=(latent_dim,)),
layers.Dense(units=16),
layers.BatchNormalization(),
layers.Dense(units=32),
layers.BatchNormalization(),
layers.RepeatVector(max_lens),
layers.GRU(units = max_lens, return_sequences=True),
layers.BatchNormalization(),
layers.TimeDistributed(layers.Dense(units=num_words)),
layers.Activation('softmax')
]
)
# Define the regressor layers
self.regressor = tf.keras.Sequential(
[
layers.InputLayer(input_shape=(latent_dim,)),
layers.Dense(units=32),
layers.Dense(units=16),
layers.Dense(units=1),
]
)
def encode(self, x):
# Compute the mean and log variance of the latent variable
h = self.encoder(x)
mean, log_var = tf.split(h, num_or_size_splits=2, axis=1)
return mean, log_var
def reparameterize(self, mean, log_var):
# Sample from the latent variable distribution
eps = tf.random.normal(tf.shape(mean))
std_dev = tf.exp(0.5 * log_var)
z = mean + std_dev * eps
return z
def decode(self, z):
# Reconstruct the input from the latent variable
return self.decoder(z)
def predict_properties(self, z):
# Predict the properties of the input
return self.regressor(z)
def call(self, x):
# Define the forward pass of the model
mean, log_var = self.encode(x)
z = self.reparameterize(mean, log_var)
x_pred = self.decode(z)
properties = self.predict_properties(z)
return x_pred, mean, log_var, properties
def vae_loss(self, x, x_pred, mean, log_var):
recon_loss = tf.reduce_sum(tf.keras.losses.binary_crossentropy(x, x_pred), axis = 1)
kl_loss = -0.5 * tf.reduce_sum(1 + log_var - tf.square(mean) - tf.exp(log_var), axis = 1)
return tf.reduce_mean(recon_loss + kl_loss)
def property_loss(self, y_true, y_pred):
# Compute the mean squared error between the true and predicted properties
return tf.reduce_mean(tf.keras.losses.mean_squared_error(y_true, y_pred))
def train_step(self, x, y_true):
with tf.GradientTape() as tape:
x_pred, mean, log_var, y_pred = self.call(x)
vae_loss_value = self.vae_loss(x, x_pred, mean, log_var)
property_loss_value = self.property_loss(y_true, y_pred)
total_loss = vae_loss_value + property_loss_value
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
gradients = tape.gradient(total_loss, self.trainable_variables)
optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return vae_loss_value, property_loss_value
latent_dim = 8
num_epochs = 50
batch_size = 256
vae = VAEWithRegressor(latent_dim)
x_train = x
y_train = y
for epoch in range(num_epochs):
epoch_vae_loss = 0
epoch_property_loss = 0
for i in range(0, len(x_train), batch_size):
x_batch = x_train[i:i+batch_size]
y_batch = y_train[i:i+batch_size]
vae_loss_value, property_loss_value = vae.train_step(x_batch, y_batch)
epoch_vae_loss += vae_loss_value
epoch_property_loss += property_loss_value
epoch_vae_loss /= (len(x_train) / batch_size)
epoch_property_loss /= (len(x_train) / batch_size)
print('Epoch {}, VAE loss: {}, Property loss: {}'.format(epoch+1, epoch_vae_loss, epoch_property_loss))
z_sample = vae.encoder.predict(x)[:,:latent_dim]
x_pred = np.array(vae.decoder.predict(z_sample))
y_pred = np.array(vae.predict_properties(z_sample))

computing gradients in parallel in Tensorflow

I need to compute gradient of a scalar valued function with vector inputs for various inputs. I am currently doing something like below. But no matter what value I set for parallel_iterations of the tf.while_loop, it is only computing gradients for one input at a time. What am I missing?
import tensorflow as tf
import time
#tf.function
def f(x):
x_hat = tf.signal.rfft(x)
x_recon = tf.signal.irfft(x_hat)
lnp = tf.reduce_sum(x_recon)
tf.print(lnp)
return lnp
#tf.function
def ode_fn(x):
return -x + 1.0
#tf.function
def integrate(x, nsteps, time_step):
y = tf.TensorArray(dtype=tf.float32, size=nsteps)
x_next = x
for i in tf.range(nsteps):
x_next = x_next + time_step * ode_fn(x_next)
y = y.write(i, x_next)
return y.stack()
#tf.function
def compute_grads(x):
lnpgrad = tf.TensorArray(dtype=tf.float32, size=x.shape[0])
def cond(i, lnpgrad):
return tf.less(i, x.shape[0])
def body(i, lnpgrad):
x_i = x[i]
with tf.GradientTape() as tape:
tape.watch(x_i)
lnp_i = tf.reduce_sum(integrate(x_i, 20000, 0.1))
tf.print(lnp_i)
lnpgrad = lnpgrad.write(i, tape.gradient(lnp_i, x_i))
return i + 1, lnpgrad
i = tf.constant(0, dtype=tf.int32)
i, lnpgrad = tf.while_loop(cond,
body,
loop_vars=[i, lnpgrad],
parallel_iterations=5)
lnpgrad = lnpgrad.stack()
return lnpgrad
x = tf.random.normal(shape=[10, 5000],
mean=10.0,
stddev=1.0,
dtype=tf.float32)
start = time.time()
fx_grads = compute_grads(x)
end = time.time()
print(f"Elapsed {end - start} seconds")

performing linear regression using tensorflow 2.0

How to perform linear regression in Tensorflow 2.0? An example or a tutorial link will be appreciated. All the tutorials on YouTube are using tensorflow 1
Example 1: regression using Tensorflow 2.0.0 :
import tensorflow as tf
# tensorflow 2.0.0
class Model:
def __init__(self):
self.W = tf.Variable(7.0) # initial value for model parameter W
self.b = tf.Variable(0.0) #initial value for model bias b
def model(self, x):
return self.W * x + self.b
def loss(predicted_label, target_label):
return tf.reduce_mean(tf.square(predicted_label - target_label))
def train(self,inputs, outputs, learning_rate):
with tf.GradientTape() as t:
current_loss = Model.loss(self.model(inputs), outputs)
#backpropagation
dW, db = t.gradient(current_loss, [self.W, self.b])
self.W.assign_sub(learning_rate * dW)
self.b.assign_sub(learning_rate * db)
return current_loss
def run(self):
import matplotlib.pyplot as plt
# Generate train data when true W=2.0 and b=3.0
TRUE_W = 2.0
TRUE_b = 3.0
NUM_INSTANCES = 500 # number of tarin data
inputs = tf.random.normal(shape=[NUM_INSTANCES])
noise = tf.random.normal(shape=[NUM_INSTANCES])
outputs = inputs * TRUE_W + TRUE_b + noise
print("Model before train (red dots):")
plt.scatter(inputs, outputs, c='b')
plt.scatter(inputs, self.model(inputs), c='r')
plt.show()
epochs = range(50)
for epoch in epochs:
current_loss=self.train(inputs, outputs, learning_rate=0.1)
if epoch%10==0:
print('Epoch %2d: loss=%2.5f' %
(epoch, current_loss))
print("Model after train (red dots):")
plt.scatter(inputs, outputs, c='b')
plt.scatter(inputs, self.model(inputs), c='r')
plt.show()
ob=Model()
ob.run()
Example 2: regression using Tensorflow 2.0.0 and keras optimizer:
import tensorflow as tf
#Tensorflow 2.0.0
class Model:
def __init__(self):
self.W = tf.Variable(5.0)
self.b = tf.Variable(0.0)
def model(self):
return self.W * self.inputs + self.b
def loss(self):
return tf.reduce_mean(tf.square(self.model() - self.outputs))
def run(self):
import matplotlib.pyplot as plt
# Generate train data when true W=4.0 and b=1.0
TRUE_W = 2.0
TRUE_b = 3.0
NUM_INSTANCES = 500 # number of tarin data
print("Model befor train (red dots):")
self.inputs = tf.random.normal(shape=[NUM_INSTANCES])
noise = tf.random.normal(shape=[NUM_INSTANCES])
self.outputs = self.inputs * TRUE_W + TRUE_b + noise
plt.scatter(self.inputs, self.outputs, c='b')
plt.scatter(self.inputs, self.model(), c='r')
plt.show()
opt = tf.keras.optimizers.Adam(learning_rate=0.1)
epochs = range(50)
for epoch in epochs:
opt.minimize(self.loss, var_list=[self.W,self.b])
current_loss=self.loss()
if epoch%10==0:
print('Epoch %2d: loss=%2.5f' %
(epoch, current_loss))
print("Model after train (red dots):")
plt.scatter(self.inputs, self.outputs, c='b')
plt.scatter(self.inputs, self.model(), c='r')
plt.show()
ob=Model()
ob.run()
Hope this helps.
I have made an example according to this: https://www.geeksforgeeks.org/linear-regression-using-tensorflow/ just in TF2:
import numpy as np
import tensorflow as tf
#tf.enable_v2_behavior()
import matplotlib.pyplot as plt
np.random.seed(101)
tf.set_random_seed(101)
x = np.linspace(0, 50, 50)
y = np.linspace(0, 50, 50)
# Adding noise to the random linear data
x += np.random.uniform(-4, 4, 50)
y += np.random.uniform(-4, 4, 50)
n = len(x) # Number of data points
plt.scatter(x, y)
plt.xlabel('x')
plt.xlabel('y')
plt.title("Training Data")
plt.show()
x=tf.constant(x, dtype=tf.float32)
y=tf.constant(y, dtype=tf.float32)
W = tf.Variable(np.random.randn(), name = "W")
b = tf.Variable(np.random.randn(), name = "b")
learning_rate = 0.01
training_epochs = 1000
def y_pred(x):
y_pred = tf.add(tf.multiply(x, W), b)
return y_pred
# Mean Squared Error Cost Function
def cost():
cost = tf.reduce_sum(tf.pow(y_pred(x)-y, 2)) / (2 * n)
return cost
# Adam Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate)
pred=y_pred(x) #run to initialize weight and bias
trainable_vars=[W,b]
for epoch in range(training_epochs):
optimizer.minimize(cost, trainable_vars)
pred=y_pred(x)
if (epoch + 1) % 50 == 0:
c = cost()
print("Epoch", (epoch + 1), ": cost =", c.numpy(), "W =", W.numpy(), "b =", b.numpy())
plt.plot(x, y, 'ro', label ='Original data')
plt.plot(x, pred, label ='Fitted line')
plt.title('Linear Regression Result')
plt.legend()
plt.show()
Basically, there is no sessions and it is easier. :) SGD optimizer was working poorly, so I used Adam.

How to make lstm/rnn focus more on certain parts of time series while less on other parts using tensorflow?

I have a time series prediction problem where most of the observed values (95%) are 0s while remaining values are non-zeros. How can I make use of RNN for this problem.
I want to predict surface flow from environmental data(air temperature, rainfall, humidity etc). We know surface flow is 0.0 for most of the time in an year. However, I also don't want to simply ignore 0s as the 0s represent the period of the year when when surface flow is 0.0. The image below shows possible observed output and three inputs. The three inputs here are just random but in reality they will be data like rainfall, humidity etc and these input data have some periodic pattern.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psutil
import tensorflow as tf
import sys
print(sys.version)
print('tensorflow version: ', tf.__version__)
#clean computation graph
tf.reset_default_graph()
tf.set_random_seed(777) # reproducibility
np.random.seed(0)
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
class generate_data(object):
def __init__(self, data_len, in_series, y_pred, seq_lengths, method='sum' ):
self.data_len = data_len
self.data = None
self.in_series = in_series #number of input series
self.y_pred = y_pred #number of final outputs from model
self.seq_lengths = seq_lengths
self.method = method
def _f(self, x):
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(scale=1)
return np.array(result)
def _runningMean(self, x, N):
return np.convolve(x, np.ones((N,))/N)[(N-1):]
def sine(self):
DATA = np.zeros((self.data_len, self.in_series))
xx = [None]
data_0 = np.sin(np.linspace(0, 20, self.data_len*self.in_series))
xx = data_0.reshape(self.data_len, self.in_series)
DATA[:,0: self.in_series] = xx
y = self._get_y(DATA)
return xx,y, DATA
def _get_y(self, xx):
if self.method=='sum':
yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'self_mul':
yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean_mirror':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
return yy
def normalize(self, xx1,yy1):
yy = [None]*len(yy1)
YMinMax = {}
xx = MinMaxScaler(xx1)
for i in range(self.y_pred):
YMinMax['ymin_' + str(i)] = np.min(yy1[0])
YMinMax['ymax_' + str(i)] = np.max(yy1[0])
yy[i] = MinMaxScaler(yy1[0])
setattr(self, 'YMinMax', YMinMax)
return xx,yy
def create_dataset(self, xx, yy, percent_of_zeros):
'''creates a dataset consisting of windows for x and y data'''
dataX = self._build_input_windows(xx, self.seq_lengths)
if self.y_pred > 1:
pass
elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
pass
else:
dataY = self._build_y_windows(yy[0] , self.seq_lengths)
indices = np.random.choice(np.arange(dataY.size), replace=False,
size=int(dataY.size * percent_of_zeros))
dataY[indices] = 0
return dataX, dataY
def _build_input_windows(self, time_series, seq_length):
dataX = []
for i in range(0, len(time_series) - seq_length):
_x = time_series[i:i + seq_length, :]
dataX.append(_x)
return np.array(dataX)
def _build_y_windows(self, iny, seq_length):
dataY = []
for i in range(0, len(iny) - seq_length):
_y = iny[i + seq_length, ] # Next close price
dataY.append(_y)
return np.array(dataY)
def TrainTestSplit(self, dataX, dataY, train_frac):
train_size = int(len(dataY) * train_frac)
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
return trainX, trainY, testX, testY, train_size
#training/hyper parameters
tot_epochs = 500
batch_size = 16
learning_rate = 0.01
seq_lengths = 5 #sequence lengths/window size for RNN
rnn_inputs = 3 # no of inputs for RNN
y_pred = 1
data_length = 1005 #this can be overwritten or useless
gen_data = generate_data(data_length, rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine()
# xx = abs(xx)
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
zeros = 0.96
dataX, dataY = gen_data.create_dataset(xx1,yy1, zeros)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)
keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y = tf.placeholder(tf.float32, [None, 1])
plt.plot(dataY, '.', label='output')
plt.plot(xx[:,0], '.', label='input1')
plt.plot(xx[:,1], '.', label='input2')
plt.plot(xx[:,2], '.', label='input3')
plt.legend()
# build neural network
with tf.variable_scope('scope0'): #defining RNN
# cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
cell = tf.keras.layers.LSTMCell(units = 128)
outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
# Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
Y_pred1 = tf.keras.layers.Dense(1)(outputs1[:,-1])
Y_pred = Y_pred1
## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y)) # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
saver = tf.train.Saver(max_to_keep=41)
writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for epoch in range(tot_epochs):
total_batches = int(train_size / batch_size) ##total batches/ no. of steps in an epoch
#for batch in range(total_batches):
_, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
print('epoch: # {} loss: {}'.format(epoch, step_loss))
# # evaluating on test data
test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )
#evaluating on training data
train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})
rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
print("RMSE: {}".format(rmse_val))
# Plot predictions
fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
fig.set_figwidth(14)
fig.set_figheight(5)
ax2.plot(testY, 'b', label='observed')
ax2.plot(test_predict, 'k', label='predicted')
ax2.legend(loc="best")
ax2.set_xlabel("Time Period")
ax2.set_title('Testing')
ax1.plot(trainY, 'b', label='observed')
ax1.plot(train_predict, 'k',label= 'predicted')
ax1.legend(loc="best")
ax1.set_xlabel("Time Period")
ax1.set_ylabel("discharge (cms)")
ax1.set_title('Training')
plt.show()
The problem is that while training, the model focuses on majority of values i.e. 0s and thus makes the predictions equal to 0s. How can I make the model focus on non-zero values (positive surface flow) while at the same time also consider 0s (when there is no surface flow). I have read about attention mechanism but have not understood that how I can implement it in such scenarios.

Cost-sensitive loss function in Tensorflow

I'm doing research for cost-sensitive neural network based on Tensorflow. But because of the static graph structure of Tensorflow. Some NN structure couldn't be realized by myself.
My loss function(cost) ,cost matrix and the computational progress is described as follow and my target is to compute the total cost and then optimize the NN :
Approximately computational progress:
the y_ is the last full-connect output of a CNN which has shape (1024,5)
the y is a Tensor which has shape(1024) and indicates the ground truth of x[i]
the y_soft[i] [j] indicates the probability of x[i] to be class j
How can I realize this in Tensorflow?
cost_matrix:
[[0,1,100],
[1,0,1],
[1,20,0]]
label:
[1,2]
y*:
[[0,1,0],
[0,0,1]]
y(prediction):
[[0.2,0.3,0.5],
[0.1,0.2,0.7]]
label,cost_matrix-->cost_embedding:
[[1,0,1],
[1,20,0]]
It obvious 0.3 in [0.2,0.3,0.5] refers to right lable probility of [0,1,0], so it should not contibute to loss.
0.7 in [0.1,0.2,0.7] is the same. In other words, the pos with value 1 in y* not contibute to loss.
So I have (1-y*):
[[1,0,1],
[1,1,0]]
Then the entropy is target*log(predict) + (1-target) * log(1-predict),and value 0 in y*,should use (1-target)*log(1-predict), so I use (1-predict) said (1-y)
1-y:
[[0.8,*0.7*,0.5],
[0.9,0.8,*0.3*]]
(italic num is useless)
the custom loss is
[[1,0,1], [1,20,0]] * log([[0.8,0.7,0.5],[0.9,0.8,0.3]]) *
[[1,0,1],[1,1,0]]
and you can see the (1-y*) can be drop here
so the loss is -tf.reduce_mean(cost_embedding*log(1-y))
,to make it applicable , should be:
-tf.reduce_mean(cost_embedding*log(tf.clip((1-y),1e-10)))
the demo is below
import tensorflow as tf
import numpy as np
hidden_units = 50
num_class = 3
class Model():
def __init__(self,name_scope,is_custom):
self.name_scope = name_scope
self.is_custom = is_custom
self.input_x = tf.placeholder(tf.float32,[None,hidden_units])
self.input_y = tf.placeholder(tf.int32,[None])
self.instantiate_weights()
self.logits = self.inference()
self.predictions = tf.argmax(self.logits,axis=1)
self.losses,self.train_op = self.opitmizer()
def instantiate_weights(self):
with tf.variable_scope(self.name_scope + 'FC'):
self.W = tf.get_variable('W',[hidden_units,num_class])
self.b = tf.get_variable('b',[num_class])
self.cost_matrix = tf.constant(
np.array([[0,1,100],[1,0,100],[20,5,0]]),
dtype = tf.float32
)
def inference(self):
return tf.matmul(self.input_x,self.W) + self.b
def opitmizer(self):
if not self.is_custom:
loss = tf.nn.sparse_softmax_cross_entropy_with_logits\
(labels=self.input_y,logits=self.logits)
else:
batch_cost_matrix = tf.nn.embedding_lookup(
self.cost_matrix,self.input_y
)
loss = - tf.log(1 - tf.nn.softmax(self.logits))\
* batch_cost_matrix
train_op = tf.train.AdamOptimizer().minimize(loss)
return loss,train_op
import random
batch_size = 128
norm_model = Model('norm',False)
custom_model = Model('cost',True)
split_point = int(0.9 * dataset_size)
train_set = datasets[:split_point]
test_set = datasets[split_point:]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100):
batch_index = random.sample(range(split_point),batch_size)
train_batch = train_set[batch_index]
train_labels = lables[batch_index]
_,eval_predict,eval_loss = sess.run([norm_model.train_op,
norm_model.predictions,norm_model.losses],
feed_dict={
norm_model.input_x:train_batch,
norm_model.input_y:train_labels
})
_,eval_predict1,eval_loss1 = sess.run([custom_model.train_op,
custom_model.predictions,custom_model.losses],
feed_dict={
custom_model.input_x:train_batch,
custom_model.input_y:train_labels
})
# print 'norm',eval_predict,'\ncustom',eval_predict1
print np.sum(((eval_predict == train_labels)==True).astype(np.int)),\
np.sum(((eval_predict1 == train_labels)==True).astype(np.int))
if i%10 == 0:
print 'norm_test',sess.run(norm_model.predictions,
feed_dict={
norm_model.input_x:test_set,
norm_model.input_y:lables[split_point:]
})
print 'custom_test',sess.run(custom_model.predictions,
feed_dict={
custom_model.input_x:test_set,
custom_model.input_y:lables[split_point:]
})