I created the BatchNormalization layer in Keras, PyTorch and calculated the same operation using Numpy but I get three different results. Am I making some error here?
Things I assume below: layer.get_weights() in tf.keras for BN layer returns in order gamma, beta, running_mean, running_var. For the BN operation I am using the following operation: gamma * (x - running_mean) / sqrt(running_var + epsilon) + beta
Code snippet to reproduce the issue:
import torch
import tensorflow
from torch.nn import Module, BatchNorm1d, Conv1d
from torch.nn.functional import pad
import numpy as np
from tensorflow.keras.layers import Conv1D, BatchNormalization, Input
from tensorflow.keras.models import Model
torch.backends.cudnn.deterministic = True
np.random.seed(12345)
z = Input((1024, 8), dtype=np.float32)
inp = z
z = Conv1D(64, 16, padding='same', use_bias=False)(z)
z = BatchNormalization(epsilon=0.001)(z)
keras_model = Model(inp, z)
# in order: conv-layer weight, gamma, beta, running_mean, running_var
weights = [np.random.random((16, 8, 64)), np.random.random((64,)), np.random.random((64,)), np.random.random((64,)),
np.random.random((64,))]
weights = [np.array(x, dtype=np.float32) for x in weights]
keras_model.layers[1].set_weights([weights[0]])
keras_model.layers[2].set_weights(weights[1:])
keras_model_subpart = Model(keras_model.inputs, keras_model.layers[1].output)
class TorchModel(Module):
def __init__(self):
super(TorchModel, self).__init__()
self.l1 = Conv1d(8, 64, 16, bias=False)
self.l2 = BatchNorm1d(64, 0.001)
def forward(self, x):
x = pad(x, (7, 8))
x = self.l1(x)
y = x
x = self.l2(x)
return y, x
torch_model = TorchModel().to(torch.device('cpu'))
torch_model.l1.weight.data = torch.from_numpy(weights[0].T).float()
torch_model.l2.weight.data = torch.from_numpy(weights[1].T).float()
torch_model.l2.bias.data = torch.from_numpy(weights[2]).float()
torch_model.l2.running_mean = torch.from_numpy(weights[3]).float()
torch_model.l2.running_var = torch.from_numpy(weights[4]).float()
torch_model.eval()
input_value = np.array(np.random.random((1024, 8)), dtype=np.float32)
keras_results = [np.array(keras_model_subpart.predict(input_value[np.newaxis, :, :])),
np.array(keras_model.predict(input_value[np.newaxis, :, :]))]
with torch.no_grad():
torch_results = [x.detach().numpy() for x in torch_model(torch.from_numpy(input_value.T[np.newaxis, :, :]).float())]
keras_results = [np.squeeze(x) for x in keras_results]
torch_results = [np.squeeze(x) for x in torch_results]
numpy_results = weights[1] * (keras_results[0] - weights[3]) / np.sqrt(weights[4] + 0.001) + weights[2]
print(torch.__version__, tensorflow.__version__, np.__version__, sep=",")
print('\nRESULTS:')
print('\tLayer 1 difference:', np.mean(np.abs(keras_results[0] - torch_results[0].T).flatten()))
print('\tLayer 2 difference:', np.mean(np.abs(keras_results[1] - torch_results[1].T).flatten()))
print('\tLayer 2 keras - numpy:', np.mean(np.abs(keras_results[1] - numpy_results).flatten()))
print('\tLayer 2 torch - numpy:', np.mean(np.abs(torch_results[1] - numpy_results.T).flatten()))
The output I get (after all the initialization printing of tensorflow)
1.7.1+cu110,2.4.1,1.19.5
RESULTS:
Layer 1 difference: 0.0
Layer 2 difference: 6.8671216e-07
Layer 2 keras - numpy: 2.291581e-06
Layer 2 torch - numpy: 1.8929532e-06
Related
I have a classification model in Keras API of Tensorflow 2. The model has two losses in the example below, categorical cross entropy and KL-divergence. Categorical cross-entropy is being added while initializing the model and hence it can be printed while training. KL-divergence is being added separately using model.add_loss(). However, it does not display during training. Is there a way to print it while training? The sample code is shown below. Since it is on simulated data, loss value might come to be nan.
import tensorflow as tf
keras = tf.keras
import keras.backend as K
import numpy as np
def ohc(y):
y = np.random.randint(4, size=100)
b = np.zeros((y.size, y.max() + 1))
b[np.arange(y.size), y] = 1
return b
xtrain = np.random.rand(100,50)
y = np.random.randint(4, size=100)
y_one_hot = ohc(y)
def my_kld(y_true, y_pred):
y_pred2 = tf.keras.layers.Lambda(lambda x: x + 0.00001)(y_pred)
LR = y_true/y_pred2
logLR = K.log(LR)
kld = y_true*logLR
loss = K.mean(kld)
loss = tf.keras.layers.Lambda(lambda x: x * 0.2)(loss)
return loss
x_t = keras.layers.Input((50,))
y_t = keras.layers.Input((4,))
x1 = keras.layers.Dense(100, activation = 'relu')(x_t)
x2 = keras.layers.Dense(4, activation = 'softmax')(x1)
model = keras.models.Model([x_t,y_t], x2)
model.add_loss(my_kld(y_t, x2))
optim = keras.optimizers.Nadam(0.00006)
model.compile(loss=['categorical_crossentropy'], optimizer=optim, metrics=['accuracy'])
model.fit(x=[xtrain,y_one_hot], y = y_one_hot, epochs = 100)
The code that I have tried has been included in the question in an implementable way. Hwever, there does not seem to be any way to print the loss from model.add_loss().
I have been trying to design neural network which can fit this polynomial function :
y = 2x^2 + 4x^3 + 5
and i did this
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
import tensorflow as tf
from tensorflow import keras
def dataset(show=True):
X = np.arange(-25,25,0.1)
y = 2*X**2 + 4*X**3 + 5 + np.random.randn(500)*1000
if show :
plt.scatter(X,y)
plt.show()
return X,y
X,y = dataset()
X_scaled = X/max(X)
y_scaled = y/max(y)
poly = PolynomialFeatures(degree=4)
X_4 = poly.fit_transform(X_scaled.reshape(-1,1))
model = tf.keras.Sequential([keras.layers.Dense(units=1,input_shape=[5])])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model.compile(optimizer=optimizer,loss='mean_squared_error')
tf_history = model.fit(X_4, y_scaled, epochs=200, verbose=True)
mse = tf_history.history['loss'][-1]
y_hat = model.predict(X_4)
The instruction said that use 1 input , 1 output and 1 hidden layer with 3 neurons.
How should i configure those ?
model = tf.keras.Sequential()
# hidden layer with 3 neurons
model.add(tf.keras.layers.Dense(3, activation='relu', input_shape=input_shape))
# output layer
model.add(tf.keras.layers.Dense(1, activation='relu'))
I'm trying to use a distribution from tensorflow_probability to define a custom loss function in Keras. More specifically, I'm trying to build a Mixture Density Network.
My model works on a toy dataset when batch_size = 1 (it learns to predict the correct mixture distribution for y using x). But it "fails" when batch_size > 1 (it predicts the same distribution for all y, ignoring x). This makes me think my problem has to do with batch_shape vs. sample_shape.
To reproduce:
import random
import keras
from keras import backend as K
from keras.layers import Dense, Activation, LSTM, Input, Concatenate, Reshape, concatenate, Flatten, Lambda
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.models import Sequential, Model
import tensorflow
import tensorflow_probability as tfp
tfd = tfp.distributions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# generate toy dataset
random.seed(12902)
n_obs = 20000
x = np.random.uniform(size=(n_obs, 4))
df = pd.DataFrame(x, columns = ['x_{0}'.format(i) for i in np.arange(4)])
# 2 latent classes, with noisy assignment based on x_0, x_1, (x_2 and x_3 are noise)
df['latent_class'] = 0
df.loc[df.x_0 + df.x_1 + np.random.normal(scale=.05, size=n_obs) > 1, 'latent_class'] = 1
df.latent_class.value_counts()
# Latent class will determines which mixture distribution we draw from
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(probs=[0.3, 0.7]),
components_distribution=tfd.Normal(
loc=[-1., 1], scale=[0.1, 0.5]))
d0_samples = d0.sample(sample_shape=(df.latent_class == 0).sum()).numpy()
d1 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(probs=[0.5, 0.5]),
components_distribution=tfd.Normal(
loc=[-2., 2], scale=[0.2, 0.6]))
d1_samples = d1.sample(sample_shape=(df.latent_class == 1).sum()).numpy()
df.loc[df.latent_class == 0, 'y'] = d0_samples
df.loc[df.latent_class == 1, 'y'] = d1_samples
fig, ax = plt.subplots()
bins = np.linspace(-4, 5, 9*4 + 1)
df.y[df.latent_class == 0].hist(ax=ax, bins=bins, label='Class 0', alpha=.4, density=True)
df.y[df.latent_class == 1].hist(ax=ax, bins=bins, label='Class 1', alpha=.4, density=True)
ax.legend();
# mixture density network
N_COMPONENTS = 2 # number of components in the mixture
input_feature_space = 4
flat_input = Input(shape=(input_feature_space,),
batch_shape=(None, input_feature_space),
name='inputs')
x = Dense(6, activation='relu',
kernel_initializer='glorot_uniform',
bias_initializer='ones')(flat_input)
x = Dense(6, activation='relu',
kernel_initializer='glorot_uniform',
bias_initializer='ones')(x)
# 3 params per component: weight, loc, scale
output = Dense(N_COMPONENTS*3,
kernel_initializer='glorot_uniform',
bias_initializer='ones')(x)
model = Model(inputs=[flat_input],
outputs=[output])
I suspect the problem is in the next 3 functions:
def get_mixture_coef(output, num_components):
"""
Extract mixture params from output
"""
out_pi = output[:, :num_components]
out_sigma = output[:, num_components:2*num_components]
out_mu = output[:, 2*num_components:]
# use softmax to normalize pi into prob distribution
max_pi = K.max(out_pi, axis=1, keepdims=True)
out_pi = out_pi - max_pi
out_pi = K.exp(out_pi)
normalize_pi = 1 / K.sum(out_pi, axis=1, keepdims=True)
out_pi = normalize_pi * out_pi
# use exp to ensure sigma is pos
out_sigma = K.exp(out_sigma)
return out_pi, out_sigma, out_mu
def get_lossfunc(out_pi, out_sigma, out_mu, y):
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(
probs=out_pi),
components_distribution=tfd.Normal(
loc=out_mu, scale=out_sigma,
),
)
# I suspect the problem is here
return -1 * d0.log_prob(y)
def mdn_loss(num_components):
def loss(y_true, y_pred):
out_pi, out_sigma, out_mu = get_mixture_coef(y_pred, num_components)
return get_lossfunc(out_pi, out_sigma, out_mu, y_true)
return loss
opt = Adam(lr=.001)
model.compile(
optimizer=opt,
loss = mdn_loss(N_COMPONENTS),
)
es = EarlyStopping(monitor='val_loss',
min_delta=1e-5,
patience=5,
verbose=1, mode='auto')
validation = .15
validate_idx = np.random.choice(df.index.values,
size=int(validation * df.shape[0]),
replace=False)
train_idx = [i for i in df.index.values if i not in validate_idx]
x_cols = ['x_0', 'x_1', 'x_2', 'x_3']
model.fit(x=df.loc[train_idx, x_cols].values,
y=df.loc[train_idx, 'y'].values[:, np.newaxis],
validation_data=(
df.loc[validate_idx, x_cols].values,
df.loc[validate_idx, 'y'].values[:, np.newaxis]),
# model works when batch_size = 1
# model fails when batch_size > 1
epochs=2, batch_size=1, verbose=1, callbacks=[es])
def sample(output, n_samples, num_components):
"""Sample from a mixture distribution parameterized by
model output."""
pi, sigma, mu = get_mixture_coef(output, num_components)
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(
probs=pi),
components_distribution=tfd.Normal(
loc=mu,
scale=sigma))
return d0.sample(sample_shape=n_samples).numpy()
yhat = model.predict(df.loc[train_idx, x_cols].values)
out_pi, out_sigma, out_mu = get_mixture_coef(yhat, 2)
latent_1_samples = sample(yhat[:1], n_samples=1000, num_components=2)
latent_1_samples = pd.DataFrame({'latent_1_samples': latent_1_samples.ravel()})
fig, ax = plt.subplots()
bins = np.linspace(-4, 5, 9*4 + 1)
latent_1_samples.latent_1_samples.hist(ax=ax, bins=bins, label='Class 1: yHat', alpha=.4, density=True)
df.y[df.latent_class == 0].hist(ax=ax, bins=bins, label='Class 0: True', density=True, histtype='step')
df.y[df.latent_class == 1].hist(ax=ax, bins=bins, label='Class 1: True', density=True, histtype='step')
ax.legend();
Thanks in advance!
Update
I found two ways to solve the problem, guided by this answer. Both solutions point to the fact that Keras is awkwardly broadcasting y to match y_pred:
def get_lossfunc(out_pi, out_sigma, out_mu, y):
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(
probs=out_pi),
components_distribution=tfd.Normal(
loc=out_mu, scale=out_sigma,
),
)
# this also works:
# return -1 * d0.log_prob(tensorflow.transpose(y))
return -1 * d0.log_prob(y[:, 0])
Specifying the workaround here (Answer Section) even though it is specified by Dan in the question, for the benefit of the Community.
The problem of predicting the same distribution for all y, ignoring x can be resolved in two ways.
Code for Solution 1 is mentioned below:
def get_lossfunc(out_pi, out_sigma, out_mu, y):
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(
probs=out_pi),
components_distribution=tfd.Normal(
loc=out_mu, scale=out_sigma,
),
)
return -1 * d0.log_prob(tensorflow.transpose(y))
Code for Solution 2 is mentioned below:
def get_lossfunc(out_pi, out_sigma, out_mu, y):
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(
probs=out_pi),
components_distribution=tfd.Normal(
loc=out_mu, scale=out_sigma,
),
)
return -1 * d0.log_prob(y[:, 0])
Hope this helps. Happy Learning!
I am trying to implement a custom loss function using Tensorflow as the negative loglikelihood of this expression (which is a compound Poisson-Gamma):
The first term (represented by the Dirac delta) refers to the case when z == 0, while the sum (which needs to be truncated at some point in the implementation as it goes to infinity) represents the product of the probability from a Gamma and a Poisson distribution.
This is the tentative implementation in Tensorflow:
import tensorflow as tf
import tensorflow_probability as tfp
from functools import partial
tf.enable_eager_execution()
import numpy as np
def pois_gamma_compound_loss(y_true, y_pred):
lambda_, alpha, beta = y_pred[:, 0], y_pred[:, 1], y_pred[:, 2]
poisson_distr = tfp.distributions.Poisson(rate=lambda_)
ijk_0 = (1.0, tf.zeros_like(y_true))
c = lambda i, p: i < 4
b = lambda i, p: (tf.add(i, 1),
p + tf.math.multiply(x = poisson_distr.prob(tf.zeros_like(y_true) + i),
y = tfp.distributions.Gamma(concentration=tf.math.multiply(x = alpha,
y = tf.zeros_like(y_true) + i),
rate=beta).prob(y_tru)))
ijk_final = tf.while_loop(c, b, ijk_0)
batch_lik = tf.add(ijk_final[1], tf.math.exp(tf.multiply(lambda_, -1.0)))
return -tf.reduce_mean(batch_lik)
inputs = Input(shape=(39,))
x = Dense(4, activation='relu', kernel_initializer='random_uniform')(inputs)
x = Dense(4, activation='relu', kernel_initializer='random_uniform')(inputs)
x = Dense(6, activation='relu', kernel_initializer='random_uniform')(inputs)
lambda_ = Dense(1, activation="softmax", name="lambda", kernel_initializer='random_uniform')(x)
alpha = Dense(1, activation="softmax", name="alpha", kernel_initializer='random_uniform')(x)
beta = Dense(1, activation="softmax", name="beta", kernel_initializer='random_uniform')(x)
output_params = Concatenate(name="pvec", axis=1)([lambda_, alpha, beta])
model = Model(inputs, output_params)
model.compile(loss=pois_gamma_compound_loss, optimizer='adam')
model.fit(X_train, y_train, epochs=60, batch_size=20)
I'm trying to make the following code piece at the end run.
However, i'm getting the following error when i try to fit my model:
"ValueError: setting an array element with a sequence."
I'm trying to use a RNN to predict the next 5 days of prices. So, in the function create_ts I'm trying to create two time series, one with the first X items and another with X+1, X+2, X+3, X+4, and X+5 - these five items being the next five days of prices i'd like to predict.
I suspect the problem is here somewhere:
def create_ts(ds, series, day_gap):
x, y = [], []
for i in range(len(ds) - series - 1):
item = ds[i:(i+series),0]
x.append(item)
next_item = ds[i+series:(i+series+day_gap), 0]
y.append(next_item)
#print(type(np.array(x)), type(np.array(y)))
return np.array(x), np.array(y).reshape(-1,1)
series = 5
predict_days = 5
train_x, train_y = create_ts(stock_train, series, predict_days)
test_x, test_y = create_ts(stock_test, series, predict_days)
#reshape into LSTM format - samples, steps, features
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
#build model
model = Sequential()
model.add(LSTM(4,input_shape = (series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer = 'adam')
#fit model
model.fit(train_x, train_y, epochs = 100, batch_size = 32)
Thanks in advance for any help!
Below is the full code piece:
from keras import backend as k
import os
from importlib import reload
def set_keras_backend(backend):
if k.backend() != backend:
os.environ['KERAS_BACKEND'] = backend
reload(k)
assert k.backend() == backend
set_keras_backend("cntk")
import numpy as np
import pandas as pd
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import math
np.random.seed(7)
#load dataset
fileloc = "C:\\Stock Data\\CL1.csv"
stock_data = pd.read_csv(fileloc)
stock_data.head()
stock_data.dtypes
stock_data['Date'] = pd.to_datetime(stock_data['Date'])
stock_data['Price'] = pd.to_numeric(stock_data['Price'], downcast = 'float')
stock_data.set_index('Date', inplace=True)
stock_close = stock_data['Price']
stock_close = stock_close.values.reshape(len(stock_close), 1)
plt.plot(stock_close)
#normalize data
scaler = MinMaxScaler(feature_range = (0,1))
stock_close = scaler.fit_transform(stock_close)
#split data into a train, test set
train_size = int(len(stock_close)*0.7)
test_size = len(stock_close) - train_size
stock_train, stock_test = stock_close[0:train_size, :], stock_close[train_size:len(stock_close), :]
#convert the data into a time series looking back over a period fo days
def create_ts(ds, series, day_gap):
x, y = [], []
for i in range(len(ds) - series - 1):
item = ds[i:(i+series),0]
x.append(item)
next_item = ds[i+series:(i+series+day_gap), 0]
y.append(next_item)
#print(type(np.array(x)), type(np.array(y)))
return np.array(x), np.array(y).reshape(-1,1)
series = 5
predict_days = 5
train_x, train_y = create_ts(stock_train, series, predict_days)
test_x, test_y = create_ts(stock_test, series, predict_days)
#reshape into LSTM format - samples, steps, features
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
#build model
model = Sequential()
model.add(LSTM(4,input_shape = (series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer = 'adam')
#fit model
model.fit(train_x, train_y, epochs = 100, batch_size = 32)