Why does the cost in my implementation of a deep neural network increase after a few iterations? - numpy

I am a beginner in machine learning and neural networks. Recently, after watching Andrew Ng's lectures on deep learning, I tried to implement a binary classifier using deep neural networks on my own.
However, the cost of the function is expected to decrease after each iteration.
In my program, it decreases slightly in the beginning, but rapidly increases later. I tried to make changes in learning rate and number of iterations, but to no avail. I am very confused.
Here is my code
1. Neural network classifier class
class NeuralNetwork:
def __init__(self, X, Y, dimensions, alpha=1.2, iter=3000):
self.X = X
self.Y = Y
self.dimensions = dimensions # Including input layer and output layer. Let example be dimensions=4
self.alpha = alpha # Learning rate
self.iter = iter # Number of iterations
self.length = len(self.dimensions)-1
self.params = {} # To store parameters W and b for each layer
self.cache = {} # To store cache Z and A for each layer
self.grads = {} # To store dA, dZ, dW, db
self.cost = 1 # Initial value does not matter
def initialize(self):
np.random.seed(3)
# If dimensions is 4, then layer 0 and 3 are input and output layers
# So we only need to initialize w1, w2 and w3
# There is no need of w0 for input layer
for l in range(1, len(self.dimensions)):
self.params['W'+str(l)] = np.random.randn(self.dimensions[l], self.dimensions[l-1])*0.01
self.params['b'+str(l)] = np.zeros((self.dimensions[l], 1))
def forward_propagation(self):
self.cache['A0'] = self.X
# For last layer, ie, the output layer 3, we need to activate using sigmoid
# For layer 1 and 2, we need to use relu
for l in range(1, len(self.dimensions)-1):
self.cache['Z'+str(l)] = np.dot(self.params['W'+str(l)], self.cache['A'+str(l-1)]) + self.params['b'+str(l)]
self.cache['A'+str(l)] = relu(self.cache['Z'+str(l)])
l = len(self.dimensions)-1
self.cache['Z'+str(l)] = np.dot(self.params['W'+str(l)], self.cache['A'+str(l-1)]) + self.params['b'+str(l)]
self.cache['A'+str(l)] = sigmoid(self.cache['Z'+str(l)])
def compute_cost(self):
m = self.Y.shape[1]
A = self.cache['A'+str(len(self.dimensions)-1)]
self.cost = -1/m*np.sum(np.multiply(self.Y, np.log(A)) + np.multiply(1-self.Y, np.log(1-A)))
self.cost = np.squeeze(self.cost)
def backward_propagation(self):
A = self.cache['A' + str(len(self.dimensions) - 1)]
m = self.X.shape[1]
self.grads['dA'+str(len(self.dimensions)-1)] = -(np.divide(self.Y, A) - np.divide(1-self.Y, 1-A))
# Sigmoid derivative for final layer
l = len(self.dimensions)-1
self.grads['dZ' + str(l)] = self.grads['dA' + str(l)] * sigmoid_prime(self.cache['Z' + str(l)])
self.grads['dW' + str(l)] = 1 / m * np.dot(self.grads['dZ' + str(l)], self.cache['A' + str(l - 1)].T)
self.grads['db' + str(l)] = 1 / m * np.sum(self.grads['dZ' + str(l)], axis=1, keepdims=True)
self.grads['dA' + str(l - 1)] = np.dot(self.params['W' + str(l)].T, self.grads['dZ' + str(l)])
# Relu derivative for previous layers
for l in range(len(self.dimensions)-2, 0, -1):
self.grads['dZ'+str(l)] = self.grads['dA'+str(l)] * relu_prime(self.cache['Z'+str(l)])
self.grads['dW'+str(l)] = 1/m*np.dot(self.grads['dZ'+str(l)], self.cache['A'+str(l-1)].T)
self.grads['db'+str(l)] = 1/m*np.sum(self.grads['dZ'+str(l)], axis=1, keepdims=True)
self.grads['dA'+str(l-1)] = np.dot(self.params['W'+str(l)].T, self.grads['dZ'+str(l)])
def update_parameters(self):
for l in range(1, len(self.dimensions)):
self.params['W'+str(l)] = self.params['W'+str(l)] - self.alpha*self.grads['dW'+str(l)]
self.params['b'+str(l)] = self.params['b'+str(l)] - self.alpha*self.grads['db'+str(l)]
def train(self):
np.random.seed(1)
self.initialize()
for i in range(self.iter):
#print(self.params)
self.forward_propagation()
self.compute_cost()
self.backward_propagation()
self.update_parameters()
if i % 100 == 0:
print('Cost after {} iterations is {}'.format(i, self.cost))
2. Testing code for odd or even number classifier
import numpy as np
from main import NeuralNetwork
X = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
Y = np.array([[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]])
clf = NeuralNetwork(X, Y, [1, 1, 1], alpha=0.003, iter=7000)
clf.train()
3. Helper Code
import math
import numpy as np
def sigmoid_scalar(x):
return 1/(1+math.exp(-x))
def sigmoid_prime_scalar(x):
return sigmoid_scalar(x)*(1-sigmoid_scalar(x))
def relu_scalar(x):
if x > 0:
return x
else:
return 0
def relu_prime_scalar(x):
if x > 0:
return 1
else:
return 0
sigmoid = np.vectorize(sigmoid_scalar)
sigmoid_prime = np.vectorize(sigmoid_prime_scalar)
relu = np.vectorize(relu_scalar)
relu_prime = np.vectorize(relu_prime_scalar)
Output

I believe your cross-entropy derivative is wrong. Instead of this:
# WRONG!
self.grads['dA'+str(len(self.dimensions)-1)] = -(np.divide(self.Y, A) - np.divide(1-self.Y, A))
... do this:
# CORRECT
self.grads['dA'+str(len(self.dimensions)-1)] = np.divide(A - self.Y, (1 - A) * A)
See these lecture notes for the details. I think you meant the formula (5), but forgot 1-A. Anyway, use formula (6).

Related

Google Colab freezes my browser and pc when trying to reconnect to a notebook

I am training a Machine learning model in google colab, to be more specific I am training a GAN with PyTorch-lightning. The problem occurs is when I get disconnected from my current runtime due to inactivity. When I try to reconnect my Browser(tried on firefox and chrome) becomes first laggy and than freezes, my pc starts to lag so that I am not able to close my browser and it doesn't go away. I am forced to press the power button of my PC in order to restart the PC.
I have no clue why this happens.
I tried various batch sizes(also the size 1) but it still happens. It can't be that my dataset is too big either(since i tried it on a dataset with 10images for testing puposes).
I hope someone can help me.
Here is my code (For using the code you will need comet.nl and enter the comet.ml api key):
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from pytorch_lightning.callbacks import ModelCheckpoint
import pytorch_lightning as pl
from pytorch_lightning import loggers
import numpy as np
from numpy.random import choice
from PIL import Image
import os
from pathlib import Path
import shutil
from collections import OrderedDict
# custom weights initialization called on netG and netD
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
# randomly flip some labels
def noisy_labels(y, p_flip=0.05): # # flip labels with 5% probability
# determine the number of labels to flip
n_select = int(p_flip * y.shape[0])
# choose labels to flip
flip_ix = choice([i for i in range(y.shape[0])], size=n_select)
# invert the labels in place
y[flip_ix] = 1 - y[flip_ix]
return y
class AddGaussianNoise(object):
def __init__(self, mean=0.0, std=0.1):
self.std = std
self.mean = mean
def __call__(self, tensor):
return tensor + torch.randn(tensor.size()) * self.std + self.mean
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
def get_valid_labels(img):
return (0.8 - 1.1) * torch.rand(img.shape[0], 1, 1, 1) + 1.1 # soft labels
def get_unvalid_labels(img):
return noisy_labels((0.0 - 0.3) * torch.rand(img.shape[0], 1, 1, 1) + 0.3) # soft labels
class Generator(nn.Module):
def __init__(self, ngf, nc, latent_dim):
super(Generator, self).__init__()
self.ngf = ngf
self.latent_dim = latent_dim
self.nc = nc
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d(latent_dim, ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
def forward(self, input):
return self.main(input)
class Discriminator(nn.Module):
def __init__(self, ndf, nc):
super(Discriminator, self).__init__()
self.nc = nc
self.ndf = ndf
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, input):
return self.main(input)
class DCGAN(pl.LightningModule):
def __init__(self, hparams, logger, checkpoint_folder, experiment_name):
super().__init__()
self.hparams = hparams
self.logger = logger # only compatible with comet_logger at the moment
self.checkpoint_folder = checkpoint_folder
self.experiment_name = experiment_name
# networks
self.generator = Generator(ngf=hparams.ngf, nc=hparams.nc, latent_dim=hparams.latent_dim)
self.discriminator = Discriminator(ndf=hparams.ndf, nc=hparams.nc)
self.generator.apply(weights_init)
self.discriminator.apply(weights_init)
# cache for generated images
self.generated_imgs = None
self.last_imgs = None
# For experience replay
self.exp_replay_dis = torch.tensor([])
# creating checkpoint folder
dirpath = Path(self.checkpoint_folder)
if not dirpath.exists():
os.makedirs(dirpath, 0o755)
def forward(self, z):
return self.generator(z)
def adversarial_loss(self, y_hat, y):
return F.binary_cross_entropy(y_hat, y)
def training_step(self, batch, batch_nb, optimizer_idx):
# For adding Instance noise for more visit: https://www.inference.vc/instance-noise-a-trick-for-stabilising-gan-training/
std_gaussian = max(0, self.hparams.level_of_noise - ((self.hparams.level_of_noise * 1.5) * (self.current_epoch / self.hparams.epochs)))
AddGaussianNoiseInst = AddGaussianNoise(std=std_gaussian) # the noise decays over time
imgs, _ = batch
imgs = AddGaussianNoiseInst(imgs) # Adding instance noise to real images
self.last_imgs = imgs
# train generator
if optimizer_idx == 0:
# sample noise
z = torch.randn(imgs.shape[0], self.hparams.latent_dim, 1, 1)
# generate images
self.generated_imgs = self(z)
self.generated_imgs = AddGaussianNoiseInst(self.generated_imgs) # Adding instance noise to fake images
# Experience replay
# for discriminator
perm = torch.randperm(self.generated_imgs.size(0)) # Shuffeling
r_idx = perm[:max(1, self.hparams.experience_save_per_batch)] # Getting the index
self.exp_replay_dis = torch.cat((self.exp_replay_dis, self.generated_imgs[r_idx]), 0).detach() # Add our new example to the replay buffer
# ground truth result (ie: all fake)
g_loss = self.adversarial_loss(self.discriminator(self.generated_imgs), get_valid_labels(self.generated_imgs)) # adversarial loss is binary cross-entropy
tqdm_dict = {'g_loss': g_loss}
log = {'g_loss': g_loss, "std_gaussian": std_gaussian}
output = OrderedDict({
'loss': g_loss,
'progress_bar': tqdm_dict,
'log': log
})
return output
# train discriminator
if optimizer_idx == 1:
# Measure discriminator's ability to classify real from generated samples
# how well can it label as real?
real_loss = self.adversarial_loss(self.discriminator(imgs), get_valid_labels(imgs))
# Experience replay
if self.exp_replay_dis.size(0) >= self.hparams.experience_batch_size:
fake_loss = self.adversarial_loss(self.discriminator(self.exp_replay_dis.detach()), get_unvalid_labels(self.exp_replay_dis)) # train on already seen images
self.exp_replay_dis = torch.tensor([]) # Reset experience replay
# discriminator loss is the average of these
d_loss = (real_loss + fake_loss) / 2
tqdm_dict = {'d_loss': d_loss}
log = {'d_loss': d_loss, "d_exp_loss": fake_loss, "std_gaussian": std_gaussian}
output = OrderedDict({
'loss': d_loss,
'progress_bar': tqdm_dict,
'log': log
})
return output
else:
fake_loss = self.adversarial_loss(self.discriminator(self.generated_imgs.detach()), get_unvalid_labels(self.generated_imgs)) # how well can it label as fake?
# discriminator loss is the average of these
d_loss = (real_loss + fake_loss) / 2
tqdm_dict = {'d_loss': d_loss}
log = {'d_loss': d_loss, "std_gaussian": std_gaussian}
output = OrderedDict({
'loss': d_loss,
'progress_bar': tqdm_dict,
'log': log
})
return output
def configure_optimizers(self):
lr = self.hparams.lr
b1 = self.hparams.b1
b2 = self.hparams.b2
opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2))
opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2))
return [opt_g, opt_d], []
def train_dataloader(self):
transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])])
dataset = MNIST(os.getcwd(), train=True, download=True, transform=transform)
return DataLoader(dataset, batch_size=self.hparams.batch_size)
# transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
# transforms.ToTensor(),
# transforms.Normalize([0.5], [0.5])
# ])
# train_dataset = torchvision.datasets.ImageFolder(
# root="./drive/My Drive/datasets/ghibli_dataset_small_overfit/",
# transform=transform
# )
# return DataLoader(train_dataset, num_workers=self.hparams.num_workers, shuffle=True, batch_size=self.hparams.batch_size)
def on_epoch_end(self):
z = torch.randn(4, self.hparams.latent_dim, 1, 1)
# match gpu device (or keep as cpu)
if self.on_gpu:
z = z.cuda(self.last_imgs.device.index)
# log sampled images
sample_imgs = self.generator(z)
sample_imgs = sample_imgs.view(-1, self.hparams.nc, self.hparams.image_size, self.hparams.image_size)
grid = torchvision.utils.make_grid(sample_imgs, nrow=2)
self.logger.experiment.log_image(grid.permute(1, 2, 0), f'generated_images_epoch{self.current_epoch}', step=self.current_epoch)
# save model
if self.current_epoch % self.hparams.save_model_every_epoch == 0:
trainer.save_checkpoint(self.checkpoint_folder + "/" + self.experiment_name + "_epoch_" + str(self.current_epoch) + ".ckpt")
comet_logger.experiment.log_asset_folder(self.checkpoint_folder, step=self.current_epoch)
# Deleting the folder where we saved the model so that we dont upload a thing twice
dirpath = Path(self.checkpoint_folder)
if dirpath.exists() and dirpath.is_dir():
shutil.rmtree(dirpath)
# creating checkpoint folder
access_rights = 0o755
os.makedirs(dirpath, access_rights)
from argparse import Namespace
args = {
'batch_size': 48,
'lr': 0.0002,
'b1': 0.5,
'b2': 0.999,
'latent_dim': 128, # tested value which worked(in V4_1): 100
'nc': 1,
'ndf': 32,
'ngf': 32,
'epochs': 10,
'save_model_every_epoch': 5,
'image_size': 64,
'num_workers': 2,
'level_of_noise': 0.15,
'experience_save_per_batch': 1, # this value should be very low; tested value which works: 1
'experience_batch_size': 50 # this value shouldnt be too high; tested value which works: 50
}
hparams = Namespace(**args)
# Parameters
experiment_name = "DCGAN_V4_2_MNIST"
dataset_name = "MNIST"
checkpoint_folder = "DCGAN/"
tags = ["DCGAN", "MNIST", "OVERFIT", "64x64"]
dirpath = Path(checkpoint_folder)
# init logger
comet_logger = loggers.CometLogger(
api_key="",
rest_api_key="",
project_name="gan",
experiment_name=experiment_name,
#experiment_key="f23d00c0fe3448ee884bfbe3fc3923fd" # used for resuming trained id can be found in comet.ml
)
#defining net
net = DCGAN(hparams, comet_logger, checkpoint_folder, experiment_name)
#logging
comet_logger.experiment.set_model_graph(str(net))
comet_logger.experiment.add_tags(tags=tags)
comet_logger.experiment.log_dataset_info(dataset_name)
trainer = pl.Trainer(#resume_from_checkpoint="GHIBLI_DCGAN_OVERFIT_64px_epoch_6000.ckpt",
logger=comet_logger,
max_epochs=args["epochs"]
)
trainer.fit(net)
comet_logger.experiment.end()
I fixed it with importing this:
from IPython.display import clear_output

Deep neural-network with backpropagation implementation does not work - python

I want to implement a multilayer NN with backpropagation. I have been trying for days, but it simply does not work. It is extremely clear in my head how it is supposed to work, I have streamline my code to be as simple as possible but I can't do it. It's probably something stupid, but I cannot see it.
The implementation I have done is with an input layer of 784 (28x28), two (L) hidden layers of 300 and an output of 10 classes. I have a bias in every layer (except last...)
The output activation is softmax and the hidden activation is ReLU.
I use mini batches of 600 examples over a dataset of 60k examples with 50 to 500 epoches.
Here the core of my code:
Preparation:
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
L = 2
K = len(np.unique(train_labels))
lr = 0.001
nb_epochs = 50
node_per_hidden_layer = 300
nb_batches = 100
W = []
losses_test = []
X_train = np.reshape(train_images, (train_images.shape[0], train_images.shape[1]*train_images.shape[2]))
X_test = np.reshape(test_images, (test_images.shape[0], train_images.shape[1]*train_images.shape[2]))
Y_train = np.zeros((train_labels.shape[0], K))
Y_train[np.arange(Y_train.shape[0]), train_labels] = 1
Y_test = np.zeros((test_labels.shape[0], K))
Y_test[np.arange(Y_test.shape[0]), test_labels] = 1
W.append(np.random.normal(0, 0.01, (X_train.shape[1]+1, node_per_hidden_layer)))
for i in range(L-1):
W.append(np.random.normal(0, 0.01, (node_per_hidden_layer+1, node_per_hidden_layer)))
W.append(np.random.normal(0, 0.01, (node_per_hidden_layer+1, K)))
Helper function:
def softmax(z):
exp = np.exp(z - z.max(1)[:,np.newaxis])
return np.array(exp / exp.sum(1)[:,np.newaxis])
def softmax_derivative(z):
sm = softmax(z)
return sm * (1-sm)
def ReLU(z):
return np.maximum(z, 0)
def ReLU_derivative(z):
return (z >= 0).astype(int)
def get_loss(y, y_pred):
return -np.sum(y * np.log(y_pred))
fitting
def fit():
minibatch_size = len(X_train) // nb_batches
for epoch in range(nb_epochs):
permutaion = list(np.random.permutation(X_train.shape[0]))
X_shuffle = X_train[permutaion]
Y_shuffle = Y_train[permutaion]
print("Epoch----------------", epoch)
for batche in range(0, X_shuffle.shape[0], minibatch_size):
Z = [None] * (L + 2)
a = [None] * (L + 2)
delta = [None] * (L + 2)
X = X_train[batche:batche+minibatch_size]
Y = Y_shuffle[batche:batche+minibatch_size]
### forward propagation
a[0] = np.append(X, np.ones((minibatch_size, 1)), axis=1)
for i in range(L):
Z[i + 1] = a[i] # W[i]
a[i + 1] = np.append(ReLU(Z[i+1]), np.ones((minibatch_size, 1), dtype=int), axis=1)
Z[-1] = a[L] # W[L]
a[-1] = softmax(Z[-1])
### back propagation
delta[-1] = (Y - a[-1]) * softmax_derivative(Z[-1])
for i in range(L, 0, -1):
delta[i] = (delta[i+1] # W[i].T)[:,:-1] * ReLU_derivative(Z[i])
for i in range(len(W)):
g = a[i].T # delta[i+1] / minibatch_size
W[i] = W[i] + lr * g
get_loss_on_test()
loss
def get_loss_on_test():
Z_test = [None] * (L + 2)
a_test = [None] * (L + 2)
a_test[0] = np.append(X_test, np.ones((len(X_test), 1)), axis=1)
for i in range(L):
Z_test[i + 1] = a_test[i] # W[i]
a_test[i + 1] = np.append(ReLU(Z_test[i+1]), np.ones((len(X_test), 1)), axis=1)
Z_test[-1] = a_test[L] # W[L]
a_test[-1] = softmax(Z_test[-1])
losses_test.append(get_loss(Y_test, a_test[-1]))
main
losses_test.clear()
fit()
plt.plot(losses_test)
plt.show()
If you want to see it in my notebook with an example of losses graph, here the link: https://github.com/beurnii/INF8225/blob/master/tp2/jpt.ipynb
If you want more details on my assignment, this is part 1b (page 2 for english):
https://github.com/beurnii/INF8225/blob/master/tp2/INF8225_TP2_2020.pdf

draw a line in tensorflow

I want to create a human pose skeleton estimation network and for this, I have a two-part network, first part generates 16 heatmaps as output(each heatmap for different joint and hence a key point can be extracted), using these 16 key points I wish to create a human skeleton and feed it to second half of my network. My problem is, how do I draw lines between the key points to create the skeleton? I couldn't find a way to do it on a tensor object using tensorflow or keras.
I know i'm a bit late but here is some code that I think does what you're after (in TFv2.3). Hopefully it will save someone time in the future!
It uses solely tensorflow ops, so you can use it in data loaders etc. The real pain here is that Tensorflow doesn't allow Eager Assignment, so you can't just update tensors by index. This works around that by creating two sparse tensors, one for the mask (where to apply the line) and another for the new_values (what value to apply at the line). The code for simply designing the line might not be applicable in your case (based on https://stackoverflow.com/a/47381058) but ported away from numpy.
import tensorflow as tf
def trapez(y, y0, w):
return tf.clip_by_value(tf.minimum(y + 1 + w/2 - y0, -y + 1 + w/2 + y0), 0, 1)
def apply_output(img, yy, xx, val):
stack = tf.stack([yy, xx], axis=1)
stack = tf.cast(stack, tf.int64)
values = tf.ones(stack.shape[0], tf.float32)
mask = tf.sparse.SparseTensor(indices=stack, values=values, dense_shape=img.shape)
mask = tf.sparse.reorder(mask)
mask = tf.sparse.to_dense(mask)
mask = tf.cast(mask, tf.float32)
new_values = tf.sparse.SparseTensor(indices=stack, values=val, dense_shape=img.shape)
new_values = tf.sparse.reorder(new_values)
new_values = tf.sparse.to_dense(new_values)
img = img * (1 - mask) + new_values * mask
img = tf.cast(tf.expand_dims(img * 255, axis=-1), tf.uint8)
return img
def weighted_line(img, r0, c0, r1, c1, w):
output = img
x = tf.range(c0, c1 + 1, dtype=tf.float32)
slope = (r1-r0) / (c1-c0)
w *= tf.sqrt(1 + tf.abs(slope)) / 2
y = x * slope + (c1*r0-c0*r1) / (c1-c0)
thickness = tf.math.ceil(w/2)
yy = (tf.reshape(tf.math.floor(y), [-1, 1]) + tf.reshape(tf.range(-thickness-1, thickness+2), [1, -1]))
xx = tf.repeat(x, yy.shape[1])
values = tf.reshape(trapez(yy, tf.reshape(y, [-1, 1]), w), [-1])
yy = tf.reshape(yy, [-1])
limits_y = tf.math.logical_and(yy >= 0, yy < img.shape[0])
limits_x = tf.math.logical_and(xx >= 0, xx < img.shape[1])
limits = tf.math.logical_and(limits_y, limits_x)
limits = tf.math.logical_and(limits, values > 0)
yy = tf.cast(yy[limits], tf.float32)
xx = tf.cast(xx[limits], tf.float32)
return yy, xx, values[limits], apply_output(output, yy, xx, values[limits])
Just for a sanity check, you can call it with the following, and display it using opencv
if __name__ == "__main__":
IMG = tf.zeros((500, 500), tf.float32)
yy, xx, vals, FINAL_IMG = weighted_line(IMG, 10, 20, 100, 200, 5)
jpeg_string = tf.io.encode_jpeg(FINAL_IMG)
tf.io.write_file("output.jpg", jpeg_string)
import cv2
img = cv2.imread("output.jpg")
cv2.imshow("Output", img)
cv2.waitKey(0)

Stratify batch in Tensorflow 2

I have minibatches that I get from an sqlite database with data of integer and float type, x, and a binary label in 0 and 1, y. I am looking for something like X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(y, x, test_size=0.1, random_state=1, stratify=True) from scikit-learn, where a keyword could stratify the data (i.e. the same number of class-0 and class-1 instances).
In Tensorflow 2, stratification seems not straightforwardly possible. My very complicated solution works for me, but takes a lot of time because of all the reshaping and transposing:
def stratify(x, y):
# number of positive instances (the smaller class)
pos = np.sum(y).item() # how many positive bonds there are
x = np.transpose(x)
# number of features
f = np.shape(x)[1]
# filter only class 1
y = tf.transpose(y)
x_pos = tf.boolean_mask(x,
y_pos = tf.boolean_mask(y, y)
# filter only class 1
x_neg = tf.boolean_mask(x, tf.bitwise.invert(y)-254)
x_neg = tf.reshape(x_neg, [f,-1])
y_neg = tf.boolean_mask(y, tf.bitwise.invert(y)-254)
# just take randomy as many class-0 as there are class-1
x_neg = tf.transpose(tf.random.shuffle(tf.transpose(x_neg)))
x_neg = x_neg[:,0:pos]
y_neg = y_neg[0:pos]
# concat the class-1 and class-0 together, then shuffle, and concat back together
x = tf.concat([x_pos,tf.transpose(x_neg)],0)
y = tf.concat([y_pos, tf.transpose(y_neg)],0)
xy = tf.concat([tf.transpose(x), tf.cast(np.reshape(y,[1, -1]), tf.float64)],0)
xy = tf.transpose((tf.random.shuffle(tf.transpose(xy)))) # because there is no axis arg in shuffle
x = xy[0:f,:]
x = tf.transpose(x)
y = xy[f,:]
return x, y
I am happy to see some feedback/improvement on my own function or novel, easier ideas.
Data division is best if it is done in raw format only or before you transform it into tensors. If there is a strong requirement to do it in TensorFlow only, then I will suggest you to make use of tf.data.Dataset class. I have added the demo code with relevant comments explaining the steps.
import tensorflow as tf
import numpy as np
TEST_SIZE = 0.1
DATA_SIZE = 1000
# Create data
X_data = np.random.rand(DATA_SIZE, 28, 28, 1)
y_data = np.random.randint(0, 2, [DATA_SIZE])
samples1 = np.sum(y_data)
print('Percentage of 1 = ', samples1 / len(y_data))
# Create TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices((X_data, y_data))
# Gather data with 0 and 1 labels separately
class0_dataset = dataset.filter(lambda x, y: y == 0)
class1_dataset = dataset.filter(lambda x, y: y == 1)
# Shuffle them
class0_dataset = class0_dataset.shuffle(DATA_SIZE)
class1_dataset = class1_dataset.shuffle(DATA_SIZE)
# Split them
class0_test_samples_len = int((DATA_SIZE - samples1) * TEST_SIZE)
class0_test = class0_dataset.take(class0_test_samples_len)
class0_train = class0_dataset.skip(class0_test_samples_len)
class1_test_samples_len = int(samples1 * TEST_SIZE)
class1_test = class1_dataset.take(class1_test_samples_len)
class1_train = class1_dataset.skip(class1_test_samples_len)
print('Train Class 0 = ', len(list(class0_train)), ' Class 1 = ', len(list(class1_train)))
print('Test Class 0 = ', len(list(class0_test)), ' Class 1 = ', len(list(class1_test)))
# Gather datasets
train_dataset = class0_train.concatenate(class1_train).shuffle(DATA_SIZE)
test_dataset = class0_test.concatenate(class1_test).shuffle(DATA_SIZE)
print('Train dataset size = ', len(list(train_dataset)))
print('Test dataset size = ', len(list(test_dataset)))
Sample output:
Percentage of 1 = 0.474
Train Class 0 = 474 Class 1 = 427
Test Class 0 = 52 Class 1 = 47
Train dataset size = 901
Test dataset size = 99

How to extract cell state from a LSTM at each timestep in Keras?

Is there a way in Keras to retrieve the cell state (i.e., c vector) of a LSTM layer at every timestep of a given input?
It seems the return_state argument returns the last cell state after the computation is done, but I need also the intermediate ones. Also, I don't want to pass these cell states to the next layer, I only want to be able to access them.
Preferably using TensorFlow as backend.
Thanks
I was looking for a solution to this issue and after reading the guidance for creating your own custom RNN Cell in tf.keras (https://www.tensorflow.org/api_docs/python/tf/keras/layers/AbstractRNNCell), I believe the following is the most concise and easy to read way of doing this for Tensorflow 2:
import tensorflow as tf
from tensorflow.keras.layers import LSTMCell
class LSTMCellReturnCellState(LSTMCell):
def call(self, inputs, states, training=None):
real_inputs = inputs[:,:self.units] # decouple [h, c]
outputs, [h,c] = super().call(real_inputs, states, training=training)
return tf.concat([h, c], axis=1), [h,c]
num_units = 512
test_input = tf.random.uniform([5,100,num_units])
rnn = tf.keras.layers.RNN(LSTMCellReturnCellState(num_units),
return_sequences=True, return_state=True)
whole_seq_output, final_memory_state, final_carry_state = rnn(test_input)
print(whole_seq_output.shape)
>>> (5,100,1024)
# Hidden state sequence
h_seq = whole_seq_output[:,:,:num_units] # (5,100,512)
# Cell state sequence
c_seq = whole_seq_output[:,:,num_units:] # (5,100,512)
As mentioned in an above solution, you can see the advantage of this is that it can be easily wrapped into tf.keras.layers.RNN as a drop-in for the normal LSTMCell.
Here is a Colab Notebook with the code running as expected for tensorflow==2.6.0
I know it's pretty late, I hope this can help.
what you are asking, technically, is possible by modifying the LSTM-cell in call method. I modify it and make it return 4 dimension instead of 3 when you give return_sequences=True.
Code
from keras.layers.recurrent import _generate_dropout_mask
class Mod_LSTMCELL(LSTMCell):
def call(self, inputs, states, training=None):
if 0 < self.dropout < 1 and self._dropout_mask is None:
self._dropout_mask = _generate_dropout_mask(
K.ones_like(inputs),
self.dropout,
training=training,
count=4)
if (0 < self.recurrent_dropout < 1 and
self._recurrent_dropout_mask is None):
self._recurrent_dropout_mask = _generate_dropout_mask(
K.ones_like(states[0]),
self.recurrent_dropout,
training=training,
count=4)
# dropout matrices for input units
dp_mask = self._dropout_mask
# dropout matrices for recurrent units
rec_dp_mask = self._recurrent_dropout_mask
h_tm1 = states[0] # previous memory state
c_tm1 = states[1] # previous carry state
if self.implementation == 1:
if 0 < self.dropout < 1.:
inputs_i = inputs * dp_mask[0]
inputs_f = inputs * dp_mask[1]
inputs_c = inputs * dp_mask[2]
inputs_o = inputs * dp_mask[3]
else:
inputs_i = inputs
inputs_f = inputs
inputs_c = inputs
inputs_o = inputs
x_i = K.dot(inputs_i, self.kernel_i)
x_f = K.dot(inputs_f, self.kernel_f)
x_c = K.dot(inputs_c, self.kernel_c)
x_o = K.dot(inputs_o, self.kernel_o)
if self.use_bias:
x_i = K.bias_add(x_i, self.bias_i)
x_f = K.bias_add(x_f, self.bias_f)
x_c = K.bias_add(x_c, self.bias_c)
x_o = K.bias_add(x_o, self.bias_o)
if 0 < self.recurrent_dropout < 1.:
h_tm1_i = h_tm1 * rec_dp_mask[0]
h_tm1_f = h_tm1 * rec_dp_mask[1]
h_tm1_c = h_tm1 * rec_dp_mask[2]
h_tm1_o = h_tm1 * rec_dp_mask[3]
else:
h_tm1_i = h_tm1
h_tm1_f = h_tm1
h_tm1_c = h_tm1
h_tm1_o = h_tm1
i = self.recurrent_activation(x_i + K.dot(h_tm1_i,
self.recurrent_kernel_i))
f = self.recurrent_activation(x_f + K.dot(h_tm1_f,
self.recurrent_kernel_f))
c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1_c,
self.recurrent_kernel_c))
o = self.recurrent_activation(x_o + K.dot(h_tm1_o,
self.recurrent_kernel_o))
else:
if 0. < self.dropout < 1.:
inputs *= dp_mask[0]
z = K.dot(inputs, self.kernel)
if 0. < self.recurrent_dropout < 1.:
h_tm1 *= rec_dp_mask[0]
z += K.dot(h_tm1, self.recurrent_kernel)
if self.use_bias:
z = K.bias_add(z, self.bias)
z0 = z[:, :self.units]
z1 = z[:, self.units: 2 * self.units]
z2 = z[:, 2 * self.units: 3 * self.units]
z3 = z[:, 3 * self.units:]
i = self.recurrent_activation(z0)
f = self.recurrent_activation(z1)
c = f * c_tm1 + i * self.activation(z2)
o = self.recurrent_activation(z3)
h = o * self.activation(c)
if 0 < self.dropout + self.recurrent_dropout:
if training is None:
h._uses_learning_phase = True
return tf.expand_dims(tf.concat([h,c],axis=0),0), [h, c]
Sample code
# create a cell
test = Mod_LSTMCELL(100)
# Input timesteps=10, features=7
in1 = Input(shape=(10,7))
out1 = RNN(test, return_sequences=True)(in1)
M = Model(inputs=[in1],outputs=[out1])
M.compile(keras.optimizers.Adam(),loss='mse')
ans = M.predict(np.arange(7*10,dtype=np.float32).reshape(1, 10, 7))
print(ans.shape)
# state_h
print(ans[0,0,0,:])
# state_c
print(ans[0,0,1,:])
First, this is not possible do with the tf.keras.layers.LSTM. You have to use LSTMCell instead or subclass LSTM. Second, there is no need to subclass LSTMCell to get the sequence of cell states. LSTMCell already returns a list of the hidden state (h) and cell state (c) everytime you call it.
For those not familiar with LSTMCell, it takes in the current [h, c] tensors, and the input at the current timestep (it cannot take in a sequence of times) and returns the activations, and the updated [h,c].
Here is an example of showing how to use LSTMCell to process a sequence of timesteps and to return the accumulated cell states.
# example inputs
inputs = tf.convert_to_tensor(np.random.rand(3, 4), dtype='float32') # 3 timesteps, 4 features
h_c = [tf.zeros((1,2)), tf.zeros((1,2))] # must initialize hidden/cell state for lstm cell
h_c = tf.convert_to_tensor(h_c, dtype='float32')
lstm = tf.keras.layers.LSTMCell(2)
# example of how you accumulate cell state over repeated calls to LSTMCell
inputs = tf.unstack(inputs, axis=0)
c_states = []
for cur_inputs in inputs:
out, h_c = lstm(tf.expand_dims(cur_inputs, axis=0), h_c)
h, c = h_c
c_states.append(c)
You can access the states of any RNN by setting return_sequences = True in the initializer. You can find more information about this parameter here.