Why arbitrarily define a tf.constant (not using it anywhere) in a project which built by tf1 will cause network performance degradation? - tensorflow

Why does pinn's Burgers identification code (which can be understood as a TF1 neural network) define a tf.constant whether in the network class, in the main function or the outermost (not using it in any functions) will cause network performance degradation?
Before defining it, my network has better prediction accuracy and convergence time.
Here is the code:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
from scipy.interpolate import griddata
from plotting import newfig, savefig
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.gridspec as gridspec
import time
class PhysicsInformedNN:
# Initialize the class
def __init__(self, X, u, layers, lb, ub):
self.lb = lb
self.ub = ub
self.x = X[:,0:1]
self.t = X[:,1:2]
self.u = u
self.layers = layers
# Initialize NNs
self.weights, self.biases = self.initialize_NN(layers)
# tf placeholders and graph
self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
# Initialize parameters
self.lambda_1 = tf.Variable([0.0], dtype=tf.float32)
self.lambda_2 = tf.Variable([-6.0], dtype=tf.float32)
self.x_tf = tf.placeholder(tf.float32, shape=[None, self.x.shape[1]])
self.t_tf = tf.placeholder(tf.float32, shape=[None, self.t.shape[1]])
self.u_tf = tf.placeholder(tf.float32, shape=[None, self.u.shape[1]])
self.u_pred = self.net_u(self.x_tf, self.t_tf)
self.f_pred = self.net_f(self.x_tf, self.t_tf)
self.loss = tf.reduce_mean(tf.square(self.u_tf - self.u_pred)) + \
self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(self.loss,
method = 'L-BFGS-B',
options = {'maxiter': 50000,
'maxfun': 50000,
'maxcor': 50,
'maxls': 50,
'ftol' : 1.0 * np.finfo(float).eps})
self.optimizer_Adam = tf.train.AdamOptimizer()
self.train_op_Adam = self.optimizer_Adam.minimize(self.loss)
init = tf.global_variables_initializer()
def initialize_NN(self, layers):
weights = []
biases = []
num_layers = len(layers)
for l in range(0,num_layers-1):
W = self.xavier_init(size=[layers[l], layers[l+1]])
b = tf.Variable(tf.zeros([1,layers[l+1]], dtype=tf.float32), dtype=tf.float32)
return weights, biases
def xavier_init(self, size):
in_dim = size[0]
out_dim = size[1]
xavier_stddev = np.sqrt(2/(in_dim + out_dim))
return tf.Variable(tf.truncated_normal([in_dim, out_dim], stddev=xavier_stddev), dtype=tf.float32)
def neural_net(self, X, weights, biases):
num_layers = len(weights) + 1
H = 2.0*(X - self.lb)/(self.ub - self.lb) - 1.0
for l in range(0,num_layers-2):
W = weights[l]
b = biases[l]
H = tf.tanh(tf.add(tf.matmul(H, W), b))
W = weights[-1]
b = biases[-1]
Y = tf.add(tf.matmul(H, W), b)
return Y
def net_u(self, x, t):
u = self.neural_net(tf.concat([x,t],1), self.weights, self.biases)
return u
def net_f(self, x, t):
lambda_1 = self.lambda_1
lambda_2 = tf.exp(self.lambda_2)
u = self.net_u(x,t)
u_t = tf.gradients(u, t)[0]
u_x = tf.gradients(u, x)[0]
u_xx = tf.gradients(u_x, x)[0]
f = u_t + lambda_1*u*u_x - lambda_2*u_xx
return f
def callback(self, loss, lambda_1, lambda_2):
print('Loss: %e, l1: %.5f, l2: %.5f' % (loss, lambda_1, np.exp(lambda_2)))
def train(self, nIter):
tf_dict = {self.x_tf: self.x, self.t_tf: self.t, self.u_tf: self.u}
start_time = time.time()
for it in range(nIter):
self.sess.run(self.train_op_Adam, tf_dict)
# Print
if it % 10 == 0:
elapsed = time.time() - start_time
loss_value = self.sess.run(self.loss, tf_dict)
lambda_1_value = self.sess.run(self.lambda_1)#run的参数为返回值fetches
lambda_2_value = np.exp(self.sess.run(self.lambda_2))
print('It: %d, Loss: %.3e, Lambda_1: %.3f, Lambda_2: %.6f, Time: %.2f' %
(it, loss_value, lambda_1_value, lambda_2_value, elapsed))
start_time = time.time()
feed_dict = tf_dict,
fetches = [self.loss, self.lambda_1, self.lambda_2],
loss_callback = self.callback)
def predict(self, X_star):
tf_dict = {self.x_tf: X_star[:,0:1], self.t_tf: X_star[:,1:2]}
u_star = self.sess.run(self.u_pred, tf_dict)
f_star = self.sess.run(self.f_pred, tf_dict)
return u_star, f_star
if __name__ == "__main__":
nu = 0.01/np.pi
N_u = 2000
layers = [2, 20, 20, 20, 20, 20, 20, 20, 20, 1]
data = scipy.io.loadmat('../Data/burgers_shock.mat')
t = data['t'].flatten()[:,None]
x = data['x'].flatten()[:,None]
Exact = np.real(data['usol']).T
X, T = np.meshgrid(x,t)
X_star = np.hstack((X.flatten()[:,None], T.flatten()[:,None]))
u_star = Exact.flatten()[:,None]
# Doman bounds
lb = X_star.min(0)
ub = X_star.max(0)
######################## Noiseles Data ###############################
noise = 0.0
idx = np.random.choice(X_star.shape[0], N_u, replace=False)
X_u_train = X_star[idx,:]
u_train = u_star[idx,:]
model = PhysicsInformedNN(X_u_train, u_train, layers, lb, ub)
u_pred, f_pred = model.predict(X_star)
error_u = np.linalg.norm(u_star-u_pred,2)/np.linalg.norm(u_star,2)
#U_pred = griddata(X_star, u_pred.flatten(), (X, T), method='cubic')
lambda_1_value = model.sess.run(model.lambda_1)
lambda_2_value = model.sess.run(model.lambda_2)
lambda_2_value = np.exp(lambda_2_value)
error_lambda_1 = np.abs(lambda_1_value - 1.0)*100
error_lambda_2 = np.abs(lambda_2_value - nu)/nu * 100
print('Error u: %e' % (error_u))
print('Error l1: %.5f%%' % (error_lambda_1))
print('Error l2: %.5f%%' % (error_lambda_2))


Error in defining custom layers using subclasses in keras

I am getting an error in defining layers to my model. The input for self.layer[i] is not working when I'm implementing X = self.layers[i](A, A)inside the code. But the layer[i] which is made from GTLayer, works well for when I call it separetly, A = tf.random.uniform(shape=[2,2]) d = GTLayer(2,2,1) d.call(A,A)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class GTN(keras.Model): # layers.Layer keeps track of everything under the hood!
def __init__(self, num_edge, num_channels, w_in, w_out, num_class,num_layers,norm):
super(GTN, self).__init__()
self.num_layers = 3
self.num_edge = 6
self.num_channels = 3
self.w_in = 2#tf.random.uniform(shape=[2,2])
self.w_out = 2#tf.random.uniform(shape=[2,2])
self.num_class =2
layers = []
for i in tf.range(num_layers):
print (4)
if i == 0:
layers.append(GTLayer(num_edge, num_channels, first=True))
layers.append(GTLayer(num_edge, num_channels, first=False))
#print ((self.w_out*self.num_channels, ))
self.linear1 = tf.keras.layers.Dense( self.w_out, input_shape =(self.w_out*self.num_channels, ), activation= None)
self.linear2 = tf.keras.layers.Dense( self.num_class, input_shape=(self.w_out, ), activation= None)
def call(self, A, X, target_x, target):
#A = tf.expand_dims(A, 0)
Ws = []
for i in range(self.num_layers):
if i == 0:
print('layers[i](A):', self.layers[i](A))
H, W = self.layers[i](A) #self.layers = nn.ModuleList(layers)
#H = self.normalization(H)
print('A', A)
X = self.layers[i](A, A)
H, W = self.layers[i](A, H)
for i in range(self.num_channels):
if i==0:
X_ = tf.nn.relu(self.gcn_conv(X,H[i])).numpy()
X_tmp = tf.nn.relu(self.gcn_conv(X,H[i])).numpy()
X_ = tf.concat((X_,X_tmp), dim=1)
X_ = self.linear1(X_)
X_ = tf.nn.relu(X_).numpy()
y = self.linear2(X_[target_x])
loss = self.loss(y, target)
return loss, y, Ws
class GTLayer(keras.layers.Layer):
def __init__(self, in_channels, out_channels, first=True):
super(GTLayer, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.first = first
def call(self, A, H_ = None):
if self.first == True:
a = tf.random.uniform(shape=[2,2])
b = tf.random.uniform(shape=[2,2])
#H = torch.bmm(np.array(a),np.array(b))
H = tf.matmul( a, b)
a = tf.random.uniform(shape=[2])
H = tf.random.uniform(shape=[2])
return H,W
Input used for check:
d = GTN(2,3,4,2,2,2,1)
A = tf.random.uniform(shape=[2,2])
X = tf.random.uniform(shape=[2,2])
t_x = 5
t = 4
---> 47 X = self.layers[i](A, A)
TypeError: call() takes 2 positional arguments but 3 were given

scipy.optimize.minimize: l2 norm constraints in matrix rows

I am interested to apply l2 norm constraint in each row of the parameters matrix in scipy.optimize.minimize. What I have tried so far is
def l2_const(x):
x = x.reshape(r, c)
b = np.sqrt((x**2).sum(axis=1)) - 1
return np.broadcast_to(b[:, None], (r, c)).flatten()
x0 = np.random.random((r, c))
const = ({'type': 'eq', 'fun': l2_const},)
f_min = minimize(fun=cost, x0=x0, method='SLSQP', jac=gradient, constraints=const)
but the computed parameters f_min.x are all zeros. Does anyone know how to implement correctly this type of constraints?
EDIT 1: An example to apply this type of constraints can be found in my answer of my previous post.
EDIT 2: Below you can find a complete working example. The results are very low when the constrains are used. Any suggestions are welcome.
import numpy as np
from scipy.optimize import minimize
from sklearn import preprocessing
class myLR():
def __init__(self, reltol=1e-8, maxit=1000, opt_method=None, verbose=True, seed=0):
self.maxit = maxit
self.reltol = reltol
self.seed = seed
self.verbose = verbose
self.opt_method = opt_method
self.lbin = preprocessing.LabelBinarizer()
def w_2d(self, w, n_classes):
return np.reshape(w, (n_classes, -1))
def softmax(self, W, X):
a = np.exp(X # W.T)
o = a / np.sum(a, axis=1, keepdims=True)
return o
def squared_norm(self, x):
x = np.ravel(x, order='K')
return np.dot(x, x)
def cost(self, W, X, T, n_samples, n_classes):
W = self.w_2d(W, n_classes)
log_O = np.log(self.softmax(W, X))
c = -(T * log_O).sum()
return c / n_samples
def gradient(self, W, X, T, n_samples, n_classes):
W = self.w_2d(W, n_classes)
O = self.softmax(W, X)
grad = -(T - O).T.dot(X)
return grad.ravel() / n_samples
def l1_constraint(self, x, n_classes, n_features):
x = x.reshape(n_classes, -1)
b = x.sum(axis=1) - 1
return np.broadcast_to(b[:, None], (n_classes, n_features)).flatten()
def fit(self, X, y=None):
n_classes = len(np.unique(y))
n_samples, n_features = X.shape
if n_classes == 2:
T = np.zeros((n_samples, n_classes), dtype=np.float64)
for i, cls in enumerate(np.unique(y)):
T[y == cls, i] = 1
T = self.lbin.fit_transform(y)
W_0 = np.random.random((n_classes, n_features))
const = ({'type': 'eq', 'fun': self.l1_constraint, 'args': (n_classes, n_features,)},)
options = {'disp': self.verbose, 'maxiter': self.maxit}
f_min = minimize(fun=self.cost, x0=W_0,
args=(X, T, n_samples, n_classes),
self.coef_ = self.w_2d(f_min.x, n_classes)
self.W_ = self.coef_
return self
def predict_proba(self, X):
O = self.softmax(self.W_, X)
return O
def predict(self, X):
sigma = self.predict_proba(X)
y_pred = np.argmax(sigma, axis=1)
return y_pred
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from myLR import myLR
iris = datasets.load_iris()
X = iris.data[:, 0:2]
y = iris.target
par_dict2 = {'reltol': 1e-6,
'maxit': 20000,
'verbose': 20,
'seed': 0}
# Create different classifiers.
classifiers = {
'myLR': myLR(**par_dict2),
n_classifiers = len(classifiers)
plt.figure(figsize=(3 * 2, n_classifiers * 2))
plt.subplots_adjust(bottom=.2, top=.95)
xx = np.linspace(3, 9, 100)
yy = np.linspace(1, 5, 100).T
xx, yy = np.meshgrid(xx, yy)
Xfull = np.c_[xx.ravel(), yy.ravel()]
for index, (name, classifier) in enumerate(classifiers.items()):
classifier.fit(X, y)
coef_ = classifier.coef_
print(np.linalg.norm(coef_, axis=1))
y_pred = classifier.predict(X)
accuracy = accuracy_score(y, y_pred)
print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))
# View probabilities:
probas = classifier.predict_proba(Xfull)
n_classes = np.unique(y_pred).size
for k in range(n_classes):
plt.subplot(n_classifiers, n_classes, index * n_classes + k + 1)
plt.title("Class %d" % k)
if k == 0:
imshow_handle = plt.imshow(probas[:, k].reshape((100, 100)),
extent=(3, 9, 1, 5), origin='lower')
idx = (y_pred == k)
if idx.any():
plt.scatter(X[idx, 0], X[idx, 1], marker='o', c='w', edgecolor='k')
ax = plt.axes([0.15, 0.04, 0.7, 0.05])
plt.colorbar(imshow_handle, cax=ax, orientation='horizontal')
EDIT 3: I replaced the constraints, with
def l1_constraint(self, x, n_classes, n_features):
x = x.reshape(n_classes, -1)
b = x.sum(axis=1) - 1
return b
It produces better results. However, the computed components x1 and x2 do not sum to 1? Is that fine?

TensorFlow training with large dataset takes too long

Yesterday, I have created a pretrained VGG19 with custom head and tried to train it with 60000 images. After more than 12 hours, the training of first epoch didn't complete.
The batch size has been set to 64 and the number of steps per epoch has been set to training_set_size/batch_size.
Below is the code of DataLoader:
def crop(image, margin):
return image[margin:-margin, margin:-margin]
def random_rotation(image, angle):
M = cv2.getRotationMatrix2D((0, 0),angle,1)
rows,cols, _ = image.shape
new_img = cv2.warpAffine(image, M, (cols, rows))
return new_img
def get_generator(in_gen, should_augment=True):
weights = None
if should_augment:
image_gen = tf.keras.preprocessing.image.ImageDataGenerator(fill_mode='reflect',
brightness_range=[0.5, 1.5])
image_gen = tf.keras.preprocessing.image.ImageDataGenerator(fill_mode='reflect',
brightness_range=[1, 1])
for items in in_gen:
in_x, in_y = items
g_x = image_gen.flow(255 * in_x, in_y, batch_size=in_x.shape[0])
x, y = next(g_x)
yield x / 255.0, y
class DataLoader:
def __init__(self, source_filename, dataset_path, image_size, batch_size, training_set_size=0.8, sample_size=None):
path_dataset = Path(dataset_path)
path_image_folders = path_dataset / 'images'
self.data = pd.read_pickle(source_filename)
if sample_size is not None:
self.data = self.data[:sample_size]
self.image_size = image_size
self.batch_size = batch_size
self.training_set_size = training_set_size
self.steps_per_epoch = int(self.data.shape[0] * training_set_size // batch_size)
if self.steps_per_epoch == 0: self.steps_per_epoch = 1
self.validation_steps = int(self.data.shape[0] * (1 - training_set_size)//batch_size)
if self.validation_steps == 0: self.validation_steps = 1
def draw_idx(self, i):
img_path = self.data.iloc[i].image
img = tf.keras.preprocessing.image.img_to_array(tf.keras.preprocessing.image.load_img(str(img_path)))
# print(img.shape)
height, width, _ = img.shape
fig = plt.figure(figsize=(15, 15), facecolor='w')
# original image
ax = fig.add_subplot(1, 1, 1)
ax.imshow(img / 255.0)
openness = self.data.iloc[i].Openness
conscientiousness = self.data.iloc[i].Conscientiousness
extraversion = self.data.iloc[i].Extraversion
agreeableness = self.data.iloc[i].Agreeableness
neuroticism = self.data.iloc[i].Neuroticism
f'O: {openness}, C: {conscientiousness}, E: {extraversion}, A: {agreeableness}, N: {neuroticism}')
def get_image(self, index, data, should_augment):
# Read image and appropiate landmarks
image = cv2.imread(data['image'].values[index])
h, w, _ = image.shape
o, c, e, a, n = data[['Openness', 'Conscientiousness', 'Extraversion', 'Agreeableness', 'Neuroticism']].values[
should_flip = random.randint(0, 1)
should_rotate = random.randint(0, 1)
should_crop = random.randint(0, 1)
if should_augment:
if should_flip == 1:
# print("Image {} flipped".format(data['path'].values[index]))
image = cv2.flip(image, 1)
if should_rotate == 1:
angle = random.randint(-5, 5)
image = random_rotation(image, angle)
if should_crop == 1:
margin = random.randint(1, 10)
image = crop(image, margin)
image = cv2.resize(image, (self.image_size, self.image_size))
return [image, o, c, e, a, n]
def generator(self, data, should_augment=True):
while True:
# Randomize the indices to make an array
indices_arr = np.random.permutation(data.count()[0])
for batch in range(0, len(indices_arr), self.batch_size):
# slice out the current batch according to batch-size
current_batch = indices_arr[batch:(batch + self.batch_size)]
# initializing the arrays, x_train and y_train
x_train = np.empty(
[0, self.image_size, self.image_size, IMAGE_CHANNEL], dtype=np.float32)
y_train = np.empty([0, 5], dtype=np.int32)
for i in current_batch:
# get an image and its corresponding color for an traffic light
[image, o, c, e, a, n] = self.get_image(i, data, should_augment)
# Appending them to existing batch
x_train = np.append(x_train, [image], axis=0)
y_train = np.append(y_train, [[o, c, e, a, n]], axis=0)
# replace nan values with zeros
y_train = np.nan_to_num(y_train)
yield (x_train, y_train)
def get_training_and_test_generators(self, should_augment_training=True, should_augment_test=True):
msk = np.random.rand(len(self.data)) < self.training_set_size
train = self.data[msk]
test = self.data[~msk]
train_gen = self.generator(train, should_augment_training)
test_gen = self.generator(test, should_augment_test)
return get_generator(train_gen, should_augment_training), get_generator(test_gen, should_augment_test)
def show_batch_images_sample(self, images, landmarks, n_rows=3, n_cols=3):
assert n_rows * n_cols <= self.batch_size, "Number of expected images to display is larger than batch!"
fig = plt.figure(figsize=(15, 15))
xs, ys = [], []
count = 1
for img, y in zip(images, landmarks):
ax = fig.add_subplot(n_rows, n_cols, count)
h, w, _ = img.shape
o, c, e, a, n = y
ax.title.set_text(f'{o}, {c}, {e}, {a}, {n}')
if count == n_rows * n_cols:
count += 1
class CallbackTensorboardImageOutput(Callback):
def __init__(self, model, generator, log_dir, feed_inputs_display=9):
# assert ((feed_inputs_display & (feed_inputs_display - 1)) == 0) and feed_inputs_display != 0
self.generator = generator
self.model = model
self.log_dir = log_dir
self.writer = tf.summary.create_file_writer(self.log_dir)
self.feed_inputs_display = feed_inputs_display
self.seen = 0
def plot_to_image(figure):
"""Converts the matplotlib plot specified by 'figure' to a PNG image and
returns it. The supplied figure is closed and inaccessible after this call."""
# Save the plot to a PNG in memory.
buf = io.BytesIO()
plt.savefig(buf, format='png')
# Closing the figure prevents it from being displayed directly inside
# the notebook.
# Convert PNG buffer to TF image
image = tf.image.decode_png(buf.getvalue(), channels=4)
# Add the batch dimension
image = tf.expand_dims(image, 0)
return image
def get_loss(gt, predictions):
return tf.losses.mse(gt, predictions)
def on_epoch_end(self, epoch, logs={}):
self.seen += 1
if self.seen % 1 == 0:
items = next(self.generator)
images_to_display = self.feed_inputs_display
images_per_cell_count = int(math.sqrt(images_to_display))
# in case of regular model training using generator, an array is passed
if not isinstance(items, dict):
frames_arr, ocean_scores = items
# Take just 1st sample from batch
batch_size = frames_arr.shape[0]
if images_to_display > batch_size:
images_to_display = batch_size
frames_arr = frames_arr[0:images_to_display]
ocean_scores = ocean_scores[0:images_to_display]
y_pred = self.model.predict(frames_arr)
# in case of adversarial training, a dictionary is passed
batch_size = items['feature'].shape[0]
if images_to_display > batch_size:
images_to_display = batch_size
# items['feature'] = items['feature'][0:images_to_display]
# landmarks = items['label'][0:images_to_display]
frames_arr = items['feature']
landmarks = items['label']
y_pred = self.model.predict(items)
figure = plt.figure(figsize=(15, 15))
for i in range(images_to_display):
image_current = frames_arr[i]
y_prediction_current = y_pred[i]
y_gt_current = ocean_scores[i]
lbl_prediction = 'plot/img/{}'.format(i)
ax = plt.subplot(images_per_cell_count, images_per_cell_count, i + 1, title=lbl_prediction)
with self.writer.as_default():
tf.summary.image("Training Data", CallbackTensorboardImageOutput.plot_to_image(figure), step=self.seen)
Below is the definition of the network architecture and the call of fit_generator function:
data_loader = dataloader.DataLoader('dataset.pkl', '/home/niko/data/PsychoFlickr', 224, 64)
train_gen, test_gen = data_loader.get_training_and_test_generators()
pre_trained_model = tf.keras.applications.VGG19(input_shape=(data_loader.image_size, data_loader.image_size, dataloader.IMAGE_CHANNEL), weights='imagenet', include_top=False)
x = pre_trained_model.output
x = tf.keras.layers.Flatten()(x)
# Add a fully connected layer with 256 hidden units and ReLU activation
x = tf.keras.layers.Dense(256)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
x = tf.keras.layers.Dropout(rate=0.5)(x)
x = tf.keras.layers.Dense(256)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
x = tf.keras.layers.Dropout(rate=0.5)(x)
x = tf.keras.layers.Dense(5, name='regresion_output')(x)
x = tf.keras.layers.Activation('linear')(x)
model = tf.keras.Model(pre_trained_model.input, x)
log_dir = "logs/{}".format(model_name)
model_filename = "saved-models/{}.h5".format(model_name)
cb_tensorboard = TensorBoard(log_dir=log_dir)
callback_save_images = dataloader.CallbackTensorboardImageOutput(model, test_gen, log_dir)
checkpoint = ModelCheckpoint(model_filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
lr = 1e-3
opt = tf.optimizers.Adam(lr=lr)
model.compile(loss=loss_mse, optimizer=opt, metrics=[loss_mse])
history = model.fit_generator(
callbacks=[checkpoint, callback_save_images, cb_tensorboard]
When I tried to run the same procedure with small sample data (200 records), everything seemed to work fine. On the dataset of 60000 records, however, after more than 12 hours the training of 1st epoch hasn't completed.
The training is performed on NVIDIA RTX2080Ti.
I would be thankful if anyone suggested what has to be modified or in general configured in order to train the network on reasonable time.

WGAN-GP Large Oscillating Loss

I am trying to train a WaveGAN as described here: https://github.com/chrisdonahue/wavegan
In the paper, the WaveGAN is trained using WGAN-GP, so I have tried to implement it myself by adapting code from: https://github.com/LynnHo/DCGAN-LSGAN-WGAN-GP-DRAGAN-Tensorflow-2. However, after even only 2000 steps (~1 epoch), the loss values I am getting for the critic and the generator are large (< 1000) and oscillate between negative and positive. My audio is the same piano recordings that they used, just resampled at 16000Hz and converted to mono from stereo.
My loss graphs are:
I was hoping someone could please validate whether my implementation is correct and if so, what experiments can I run to diagnose this problem?
Note: TIMESTEPS indicates the number of samples I wish to generate for each generator pass. Currently this is set to 1 to replicate WaveGAN, and I wish to experiment with this in the future. For now, I don't think it is relevant to the issue.
My train.py script is:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
import numpy as np
import librosa
import random
import os
import sys
import time
import GANModels
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
EPOCHS = 2000
Fs = 16000
class GAN:
def __init__(self, model_dims=MODEL_DIMS, num_samples=NUM_SAMPLES, timesteps=TIMESTEPS, gradient_penalty_weight=GRADIENT_PENALTY_WEIGHT,
noise_len=NOISE_LEN, batch_size=BATCH_SIZE, sr=Fs):
self.model_dims = model_dims
self.num_samples = num_samples
self.timesteps = timesteps
self.noise_dims = (timesteps, noise_len)
self.batch_size = batch_size
self.G = GANModels.Generator(self.model_dims, self.timesteps, num_samples)
self.D = GANModels.Critic(self.model_dims, self.timesteps, num_samples)
self.G_optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9)
self.D_optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9)
self.gradient_penalty_weight = gradient_penalty_weight
self.sr = sr
def _d_loss_fn(self, r_logit, f_logit):
r_loss = - tf.reduce_mean(r_logit)
f_loss = tf.reduce_mean(f_logit)
return r_loss, f_loss
def _g_loss_fn(self, f_logit):
f_loss = - tf.reduce_mean(f_logit)
return f_loss
def _gradient_penalty(self, real, fake):
def _interpolate(a, b):
shape = [tf.shape(a)[0]] + [1] * (a.shape.ndims - 1)
alpha = tf.random.uniform(shape=shape, minval=0., maxval=1.)
inter = a + alpha * (b - a)
return inter
x = _interpolate(real, fake)
with tf.GradientTape() as t:
pred = self.D(x, training=True)
grad = t.gradient(pred, x)
norm = tf.norm(tf.reshape(grad, [tf.shape(grad)[0], -1]), axis=1)
gp = tf.reduce_mean((norm - 1.)**2)
return gp
def train_G(self):
with tf.GradientTape() as t:
z = tf.random.normal(shape=(self.batch_size,) + self.noise_dims)
x_fake = self.G(z, training=True)
x_fake_d_logit = self.D(x_fake, training=True)
G_loss = self._g_loss_fn(x_fake_d_logit)
G_grad = t.gradient(G_loss, self.G.trainable_variables)
self.G_optimizer.apply_gradients(zip(G_grad, self.G.trainable_variables))
return {'g_loss': G_loss}
def train_D(self, x_real):
with tf.GradientTape() as t:
z = tf.random.normal(shape=(x_real.shape[0],) + self.noise_dims) #Half fake and half real
x_fake = self.G(z, training=True)
x_real_d_logit = self.D(x_real, training=True)
x_fake_d_logit = self.D(x_fake, training=True)
x_real_d_loss, x_fake_d_loss = self._d_loss_fn(x_real_d_logit, x_fake_d_logit)
gp = self._gradient_penalty(x_real, x_fake)
D_loss = (x_real_d_loss + x_fake_d_loss) + gp * self.gradient_penalty_weight
D_grad = t.gradient(D_loss, self.D.trainable_variables)
self.D_optimizer.apply_gradients(zip(D_grad, self.D.trainable_variables))
return {'d_loss': x_real_d_loss + x_fake_d_loss, 'gp': gp}
def sample(self, epoch, num_samples=10):
z = tf.random.normal(shape=(num_samples,) + self.noise_dims)
result = self.G(z, training=False)
for i in range(num_samples):
audio = np.array(result[i, :, :])
librosa.output.write_wav(f"output/piano/{epoch}-{i}.wav", audio, sr=self.sr)
gan = GAN()
X_train = []
for file in os.listdir(r"D:\ML_Datasets\mancini_piano\piano\train"):
with open(r"D:\ML_Datasets\mancini_piano\piano\train" + fr"\{file}", "rb") as f:
samples, _ = librosa.load(f, Fs)
if len(samples) < TIMESTEPS*NUM_SAMPLES:
audio = np.array([np.array([sample]) for sample in samples])
padding = np.zeros(shape=(TIMESTEPS*NUM_SAMPLES - len(samples), 1), dtype='float32')
X_train.append(np.append(audio, padding, axis=0))
for i in range(len(samples) // (TIMESTEPS*NUM_SAMPLES)):
X_train.append(np.array([np.array([sample]) for sample in samples[:TIMESTEPS*NUM_SAMPLES]]))
samples = np.delete(samples, np.s_[:TIMESTEPS*NUM_SAMPLES])
print(f"X_train shape = {(len(X_train),) + X_train[0].shape}")
librosa.output.write_wav("output/piano/test.wav", X_train[0], sr=Fs)
train_summary_writer = tf.summary.create_file_writer("logs/train")
with train_summary_writer.as_default():
steps_per_epoch = len(X_train) // BATCH_SIZE
for e in range(EPOCHS):
for i in range(steps_per_epoch):
D_loss_sum = 0
for n in range(D_UPDATES_PER_G_UPDATE):
D_loss_dict = gan.train_D(np.array(random.sample(X_train, BATCH_SIZE)))
D_loss_sum += D_loss_dict['d_loss']
D_loss = D_loss_sum / D_UPDATES_PER_G_UPDATE
G_loss_dict = gan.train_G()
G_loss = G_loss_dict['g_loss']
tf.summary.scalar('d_loss', D_loss, step=(e*steps_per_epoch)+i)
tf.summary.scalar('g_loss', G_loss, step=(e*steps_per_epoch)+i)
print(f"step {(e*steps_per_epoch)+i}: d_loss = {D_loss} g_loss = {G_loss}")
if e % EPOCHS_PER_SAMPLE == 0:
My GANModels.py script is:
def Generator(d, a, num_samples, c=16):
# Prelim layers
input_layer = Input(shape=(100,))
dense_layer0 = Dense(256*d, input_shape=(100,))(input_layer)#
reshape_layer0 = Reshape((c, c*d))(dense_layer0)#
relu_layer0 = Activation('relu')(reshape_layer0)#
# WaveCNN layers
c //= 2
expanded_layer0 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer0)#relu_layer1
conv1d_t_layer0 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer0)
slice_layer0 = Lambda(lambda x: x[:, 0])(conv1d_t_layer0)
relu_layer2 = Activation('relu')(slice_layer0)
c //= 2
expanded_layer1 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer2)
conv1d_t_layer1 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer1)
slice_layer1 = Lambda(lambda x: x[:, 0])(conv1d_t_layer1)
relu_layer3 = Activation('relu')(slice_layer1)
c //= 2
expanded_layer2 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer3)
conv1d_t_layer2 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer2)
slice_layer2 = Lambda(lambda x: x[:, 0])(conv1d_t_layer2)
relu_layer4 = Activation('relu')(slice_layer2)
c //= 2
expanded_layer3 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer4)
conv1d_t_layer3 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer3)
slice_layer3 = Lambda(lambda x: x[:, 0])(conv1d_t_layer3)
relu_layer5 = Activation('relu')(slice_layer3)
expanded_layer4 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer5)
conv1d_t_layer4 = Conv2DTranspose(1, (1, 25), strides=(1, 4), padding='same')(expanded_layer4)#strides=(1,1)
slice_layer4 = Lambda(lambda x: x[:, 0])(conv1d_t_layer4)
tanh_layer0 = Activation('tanh')(slice_layer4)
model = Model(inputs=input_layer, outputs=tanh_layer0)
return model
def _apply_phaseshuffle(x, rad=2, pad_type='reflect'):
b, x_len, nch = x.get_shape().as_list()
phase = tf.random.uniform([], minval=-rad, maxval=rad + 1, dtype=tf.int32)
pad_l = tf.maximum(phase, 0)
pad_r = tf.maximum(-phase, 0)
phase_start = pad_r
x = tf.pad(x, [[0, 0], [pad_l, pad_r], [0, 0]], mode=pad_type)
x = x[:, phase_start:phase_start+x_len]
x.set_shape([b, x_len, nch])
return x
def Critic(d, a, num_samples, c=1):
input_layer = Input(shape=(a*num_samples, 1))#d*d
conv1d_layer0 = Conv1D(c*d, 25, strides=4, padding='same')(input_layer)#//2
LReLU_layer0 = LeakyReLU(alpha=0.2)(conv1d_layer0)
phaseshuffle_layer0 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer0)
c *= 2
conv1d_layer1 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer0)#d
LReLU_layer1 = LeakyReLU(alpha=0.2)(conv1d_layer1)
phaseshuffle_layer1 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer1)
c *= 2
conv1d_layer2 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer1)#2*d
LReLU_layer2 = LeakyReLU(alpha=0.2)(conv1d_layer2)
phaseshuffle_layer2 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer2)
c *= 2
conv1d_layer3 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer2)#4*d
LReLU_layer3 = LeakyReLU(alpha=0.2)(conv1d_layer3)
phaseshuffle_layer3 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer3)
c *= 2
conv1d_layer4 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer3)#8*d,strides=4
LReLU_layer4 = LeakyReLU(alpha=0.2)(conv1d_layer4)
phaseshuffle_layer4 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer4)
slice_layer0 = Lambda(lambda x: x[:, 0])(phaseshuffle_layer4)
dense_layer1 = Dense(1, input_shape=(256*d,))(slice_layer0)
model = Model(inputs=input_layer, outputs=dense_layer1)
return model

implementation a simple siamese network on Tensorflow

I want to implement a Siamese MLP network using mnist dataset.
I built my code based on Keras mnist_siamese_graph, but error value and accuracy are very huge compare to Keras version.
I cannot figure out where are problems.
This is my code:
import random
import numpy as np
import time
import tensorflow as tf
import input_data
mnist = input_data.read_data_sets("/tmp/data",one_hot=False)
import pdb
def create_pairs(x, digit_indices):
'''Positive and negative pair creation.
Alternates between positive and negative pairs.
pairs = []
labels = []
n = min([len(digit_indices[d]) for d in range(10)]) - 1
for d in range(10):
for i in range(n):
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
pairs += [[x[z1], x[z2]]]
inc = random.randrange(1, 10)
dn = (d + inc) % 10
z1, z2 = digit_indices[d][i], digit_indices[dn][i]
pairs += [[x[z1], x[z2]]]
labels += [1, 0]
return np.array(pairs), np.array(labels)
def mlp(input_,input_dim,output_dim,name="mlp"):
with tf.variable_scope(name):
w = tf.get_variable('w',[input_dim,output_dim],tf.float32,tf.random_normal_initializer())
return tf.nn.relu(tf.matmul(input_,w))
def build_model_mlp(X_,_dropout):
model = mlpnet(X_,_dropout)
return model
def mlpnet(image,_dropout):
l1 = mlp(image,784,128,name='l1')
l1 = tf.nn.dropout(l1,_dropout)
l2 = mlp(l1,128,128,name='l2')
l2 = tf.nn.dropout(l2,_dropout)
l3 = mlp(l2,128,128,name='l3')
return l3
def contrastive_loss(y,d):
tmp= y *tf.square(d)
#tmp= tf.mul(y,tf.square(d))
tmp2 = (1-y) *tf.square(tf.maximum((1 - d),0))
return tf.reduce_sum(tmp +tmp2)/batch_size/2
def compute_accuracy(prediction,labels):
return labels[prediction.ravel() < 0.5].mean()
#return tf.reduce_mean(labels[prediction.ravel() < 0.5])
def next_batch(s,e,inputs,labels):
input1 = inputs[s:e,0]
input2 = inputs[s:e,1]
y= np.reshape(labels[s:e],(len(range(s,e)),1))
return input1,input2,y
# Initializing the variables
init = tf.initialize_all_variables()
# the data, shuffled and split between train and test sets
X_train = mnist.train._images
y_train = mnist.train._labels
X_test = mnist.validation._images
y_test = mnist.validation._labels
batch_size =128
# create training+test positive and negative pairs
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
tr_pairs, tr_y = create_pairs(X_train, digit_indices)
digit_indices = [np.where(y_test == i)[0] for i in range(10)]
te_pairs, te_y = create_pairs(X_test, digit_indices)
images_L = tf.placeholder(tf.float32,shape=([None,784]),name='L')
images_R = tf.placeholder(tf.float32,shape=([None,784]),name='R')
labels = tf.placeholder(tf.float32,shape=([None,1]),name='gt')
dropout_f = tf.placeholder("float")
with tf.variable_scope("siamese") as scope:
model1= build_model_mlp(images_L,dropout_f)
model2 = build_model_mlp(images_R,dropout_f)
distance = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(model1,model2),2),1,keep_dims=True))
loss = contrastive_loss(labels,distance)
#contrastice loss
t_vars = tf.trainable_variables()
d_vars = [var for var in t_vars if 'l' in var.name]
batch = tf.Variable(0)
optimizer = tf.train.RMSPropOptimizer(0.001,momentum=0.9,epsilon=1e-6).minimize(loss)
# Launch the graph
with tf.Session() as sess:
# Training cycle
for epoch in range(40):
print('epoch %d' % epoch)
avg_loss = 0.
avg_acc = 0.
total_batch = int(X_train.shape[0]/batch_size)
start_time = time.time()
# Loop over all batches
for i in range(total_batch):
s = i * batch_size
e = (i+1) *batch_size
# Fit training using batch data
input1,input2,y =next_batch(s,e,tr_pairs,tr_y)
_,loss_value,predict=sess.run([optimizer,loss,distance], feed_dict={images_L:input1,images_R:input2 ,labels:y,dropout_f:0.9})
tr_acc = compute_accuracy(predict,y)
avg_loss += loss_value
avg_acc +=tr_acc*100
#print('epoch %d loss %0.2f' %(epoch,avg_loss/total_batch))
duration = time.time() - start_time
print('epoch %d time: %f loss %0.2f acc %0.2f' %(epoch,duration,avg_loss/(total_batch),avg_acc/total_batch))
y = np.reshape(tr_y,(tr_y.shape[0],1))
tr_acc = compute_accuracy(predict,y)
print('Accuract training set %0.2f' % (100 * tr_acc))