rewrite TensorFlow 1.x to 2.x version - tensorflow

I need to rewrite a Tensorflow 1.x code to 2.x version. So, I rewrote the commented code as follows (the different activations and initializers were modified by myself):
def model(X, nact):
# h = conv(tf.cast(X, tf.float32), nf=32, rf=8, stride=1, init_scale=np.sqrt(2))
h = tf.keras.layers.Conv2D(filters=32,
kernel_size=8,
activation='relu',
kernel_initializer=orthogonal(np.sqrt(2)))(X)
# h2 = conv(h, nf=64, rf=4, stride=1, init_scale=np.sqrt(2))
h2 = tf.keras.layers.Conv2D(filters=64,
kernel_size=4,
activation='relu',
kernel_initializer=orthogonal(np.sqrt(2)))(h)
. . .
# pi = fc(h4, nact, act=lambda x: x)
pi = tf.keras.layers.Dense(units=nact,
activation='linear',
kernel_initializer=orthogonal(np.sqrt(2)))(h4)
# vf = fc(h4, 1, act=lambda x: tf.tanh(x))
vf = tf.keras.layers.Dense(units=1,
activation='tanh',
kernel_initializer=orthogonal(np.sqrt(2)))(h4)
# filter out non-valid actions from pi
valid = tf.reduce_max(tf.cast(X, tf.float32), axis=1)
valid_flat = tf.reshape(valid, [-1, nact])
pi_fil = pi + (valid_flat - tf.ones(tf.shape(valid_flat))) * 1e32
return pi_fil, vf[:, 0]
Some methods further I have the following:
def build_model(args):
nh = args.max_clause
nw = args.max_var
nc = 2
nact = nc * nw
ob_shape = (None, nh, nw, nc * args.n_stack)
X = tf.placeholder(tf.float32, ob_shape)
Y = tf.placeholder(tf.float32, (None, nact))
Z = tf.placeholder(tf.float32, None)
p, v = model(X, nact)
params = tf.trainable_variables()
with tf.name_scope("loss"):
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=p))
value_loss = tf.losses.mean_squared_error(labels=Z, predictions=v)
lossL2 = tf.add_n([tf.nn.l2_loss(vv) for vv in params])
loss = cross_entropy + value_loss + args.l2_coeff * lossL2
return X, Y, Z, p, v, params, loss
def self_play(args, status_track):
X, _, _, p, v, params, _ = build_model(args)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
model_dir = status_track.get_model_dir()
sess.run(load(params, os.path.join(args.save_dir, model_dir)))
. . .
def super_train(args, status_track):
X, Y, Z, _, _, params, loss = build_model(args)
with tf.name_scope("train"):
train_step = tf.train.AdamOptimizer(1e-3).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
model_dir = status_track.get_sl_starter()
sess.run(load(params, os.path.join(args.save_dir, model_dir)))
. . .
How can I rewrite these two functions in a TensorFlow 2.x, i.e., Keras-like style?

Tensorflow 2.x compatible code snippet.
def build_model(args):
nh = args.max_clause
nw = args.max_var
nc = 2
nact = nc * nw
ob_shape = (None, nh, nw, nc * args.n_stack)
X = tf.compat.v1.placeholder(tf.float32, ob_shape)
Y = tf.compat.v1.placeholder(tf.float32, (None, nact))
Z = tf.compat.v1.placeholder(tf.float32, None)
p, v = model(X, nact)
params = tf.compat.v1.trainable_variables()
with tf.name_scope("loss"):
cross_entropy = tf.reduce_mean(tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=p))
value_loss = tf.keras.metrics.mean_squared_error(labels=Z, predictions=v)
lossL2 = tf.add_n([tf.compat.v1.nn.l2_loss(vv) for vv in params])
loss = cross_entropy + value_loss + args.l2_coeff * lossL2
return X, Y, Z, p, v, params, loss

Related

Error in defining custom layers using subclasses in keras

I am getting an error in defining layers to my model. The input for self.layer[i] is not working when I'm implementing X = self.layers[i](A, A)inside the code. But the layer[i] which is made from GTLayer, works well for when I call it separetly, A = tf.random.uniform(shape=[2,2]) d = GTLayer(2,2,1) d.call(A,A)
..
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class GTN(keras.Model): # layers.Layer keeps track of everything under the hood!
def __init__(self, num_edge, num_channels, w_in, w_out, num_class,num_layers,norm):
super(GTN, self).__init__()
self.num_layers = 3
self.num_edge = 6
self.num_channels = 3
self.w_in = 2#tf.random.uniform(shape=[2,2])
self.w_out = 2#tf.random.uniform(shape=[2,2])
self.num_class =2
layers = []
for i in tf.range(num_layers):
print (4)
if i == 0:
layers.append(GTLayer(num_edge, num_channels, first=True))
else:
print(i)
layers.append(GTLayer(num_edge, num_channels, first=False))
#print ((self.w_out*self.num_channels, ))
self.linear1 = tf.keras.layers.Dense( self.w_out, input_shape =(self.w_out*self.num_channels, ), activation= None)
self.linear2 = tf.keras.layers.Dense( self.num_class, input_shape=(self.w_out, ), activation= None)
def call(self, A, X, target_x, target):
#A = tf.expand_dims(A, 0)
Ws = []
print('hello')
for i in range(self.num_layers):
print('i:',i)
if i == 0:
print('layers:',layers)
print('layers[i](A):', self.layers[i](A))
H, W = self.layers[i](A) #self.layers = nn.ModuleList(layers)
else:
#H = self.normalization(H)
print(H)
print(W)
print('A', A)
X = self.layers[i](A, A)
print('H_dash',self.layers[i](A))
H, W = self.layers[i](A, H)
Ws.append(W)
for i in range(self.num_channels):
if i==0:
X_ = tf.nn.relu(self.gcn_conv(X,H[i])).numpy()
else:
X_tmp = tf.nn.relu(self.gcn_conv(X,H[i])).numpy()
X_ = tf.concat((X_,X_tmp), dim=1)
X_ = self.linear1(X_)
X_ = tf.nn.relu(X_).numpy()
y = self.linear2(X_[target_x])
loss = self.loss(y, target)
return loss, y, Ws
class GTLayer(keras.layers.Layer):
def __init__(self, in_channels, out_channels, first=True):
super(GTLayer, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.first = first
def call(self, A, H_ = None):
if self.first == True:
a = tf.random.uniform(shape=[2,2])
b = tf.random.uniform(shape=[2,2])
print(a)
print(b)
#H = torch.bmm(np.array(a),np.array(b))
H = tf.matmul( a, b)
else:
a = tf.random.uniform(shape=[2])
H = tf.random.uniform(shape=[2])
return H,W
Input used for check:
d = GTN(2,3,4,2,2,2,1)
A = tf.random.uniform(shape=[2,2])
X = tf.random.uniform(shape=[2,2])
t_x = 5
t = 4
d.call(A,X,t_x,t)
Error:
---> 47 X = self.layers[i](A, A)
TypeError: call() takes 2 positional arguments but 3 were given

Is there a way for dynamic N-times replication of a tensor in Tensorflow custom layer (on TPU)?

I'm trying to solve quite a simple task (I thought it to be), which is replicating a tensor in custom layer on TPU.
My input is 2 tensors of shapes A=(BS, H, n, C) and B = (BS, n, W, C), where n in my case can be (1, 3, 5, 7), but should probably also work with other numbers.
My task is to repeat both tensors A & B to shape (BS, H, W, C) and them sum them for the output. It would be easy if H (or W) were always divisible by n, but they are not. So the number of repeats for each slice (BS, H, 1, C) of A would differ. Thus the output is calculated using the following pseudocode:
for i in range(W):
A1[BS, H, i, C] = A[BS, H, floor(n*i/W), C]
I tried implementing it in a multiple ways:
class StripPoolingCombine(tf.keras.layers.Layer):
def __init__(self, n=1):
super(StripPoolingCombine, self).__init__()
self.n = n
def call(self, v, h, training=False):
H, W = v.shape[1], h.shape[2]
v_repeats = tf.unique_with_counts(tf.math.floor(tf.range(W) * self.n / W))[-1]
h_repeats = tf.unique_with_counts(tf.math.floor(tf.range(H) * self.n / H))[-1]
v = tf.repeat(v, repeats=v_repeats, axis=2)
h = tf.repeat(h, repeats=h_repeats, axis=1)
return Add()([v, h])
Or by replacing unique_with_counts with the following logic:
tf.math.bincount(tf.cast(tf.math.floor(tf.range(W) * self.n / W), dtype=tf.int32)
Using improvised formula:
f = tf.cast(tf.math.ceil(W / self.n), dtype=tf.int32)
s = tf.cast(tf.math.floor(W / self.n), dtype=tf.int32)
b = tf.cast(f!=s, dtype=tf.int32)
r = W - f - s * (self.n - 1)
x1 = s * tf.ones(self.n-1, dtype=tf.int32)
x2 = (1 - tf.range(r*2) % 2) * b
x2 = tf.pad(x2, paddings=[[0, self.n-r*2-1]])
x3 = tf.concat([[f], tf.add(x1, x2)], axis=0)
But as could be seen at Available TensorFlow Ops for TPU, it doesn't support dynamic tf.range, tf.unique_with_counts or tf.math.bincount, and my implementations all result in errors when bulding a model and calling model.fit() or model.predict(). Yet I still hope that tensorflow has provided some way to work with dynamic shapes in a way that would suit my task, and won't me rewrite whole Ops module for such a trivial issue. Please, help!
Full reproducible example (using Colab TPU):
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Add
try:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
print(f'Running on TPU: {tpu.master()}')
except ValueError:
print('Could not connect to TPU')
tpu = None
if tpu:
try:
print('Initializing TPU...')
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.TPUStrategy(tpu)
print('TPU initialized!')
except Exception:
print('Failed to initialize TPU')
# class StripPoolingCombine(tf.keras.layers.Layer):
# def __init__(self, n=1):
# super(StripPoolingCombine, self).__init__()
# self.n = n
# def call(self, v, h, training=False):
# H, W = v.shape[1], h.shape[2]
# v_repeats = tf.unique_with_counts(tf.math.floor(tf.range(W) * self.n / W))[-1]
# h_repeats = tf.unique_with_counts(tf.math.floor(tf.range(H) * self.n / H))[-1]
# v = tf.repeat(v, repeats=v_repeats, axis=2)
# h = tf.repeat(h, repeats=h_repeats, axis=1)
# return Add()([v, h])
class StripPoolingCombine(tf.keras.layers.Layer):
def __init__(self, n=1):
super(StripPoolingCombine, self).__init__()
self.n = n
def call(self, v, h, training=False):
H, W = tf.shape(v)[1], tf.shape(h)[2]
f = tf.cast(tf.math.ceil(W / self.n), dtype=tf.int32)
s = tf.cast(tf.math.floor(W / self.n), dtype=tf.int32)
b = tf.cast(f!=s, dtype=tf.int32)
r = W - f - s * (self.n - 1)
x1 = s * tf.ones(self.n-1, dtype=tf.int32)
x2 = (1 - tf.range(r*2) % 2) * b
x2 = tf.pad(x2, paddings=[[0, self.n-r*2-1]])
x3 = tf.concat([[f], tf.add(x1, x2)], axis=0)
v = tf.repeat(v, repeats=x3, axis=2)
h = tf.repeat(h, repeats=x3, axis=1)
output = tf.add(v, h)
return output
def build_model(n=7):
v = Input(shape=(256, n, 3))
h = Input(shape=(n, 256, 3))
outputs = StripPoolingCombine()(v, h)
model = Model(inputs=[v, h], outputs=outputs)
return model
tf.keras.backend.clear_session()
with strategy.scope():
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4, beta_1=0.9, beta_2=0.999)
model = build_model()
model.compile(optimizer=optimizer, loss='mean_squared_error')
rng_1 = tf.random.uniform([1, 256, 7, 3])
rng_2 = tf.random.uniform([1, 7, 256, 3])
model.predict([rng_1, rng_2])
Use tf.gather:
def call(self, v, h, training=False):
def out(A, H, axis):
r = tf.range(H)
inds = tf.floor(self.n * r / H)
inds = tf.cast(inds, tf.int32)
return tf.gather(A, inds, axis=axis)
H, W = tf.shape(v)[1], tf.shape(h)[2]
v = out(v, W, 2)
h = out(h, H, 1)
output = tf.add(v, h)
return output

WGAN-GP Large Oscillating Loss

I am trying to train a WaveGAN as described here: https://github.com/chrisdonahue/wavegan
In the paper, the WaveGAN is trained using WGAN-GP, so I have tried to implement it myself by adapting code from: https://github.com/LynnHo/DCGAN-LSGAN-WGAN-GP-DRAGAN-Tensorflow-2. However, after even only 2000 steps (~1 epoch), the loss values I am getting for the critic and the generator are large (< 1000) and oscillate between negative and positive. My audio is the same piano recordings that they used, just resampled at 16000Hz and converted to mono from stereo.
My loss graphs are:
I was hoping someone could please validate whether my implementation is correct and if so, what experiments can I run to diagnose this problem?
Note: TIMESTEPS indicates the number of samples I wish to generate for each generator pass. Currently this is set to 1 to replicate WaveGAN, and I wish to experiment with this in the future. For now, I don't think it is relevant to the issue.
My train.py script is:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
import numpy as np
import librosa
import random
import os
import sys
import time
import GANModels
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
MODEL_DIMS = 64
NUM_SAMPLES = 16384
TIMESTEPS = 1
D_UPDATES_PER_G_UPDATE = 5
GRADIENT_PENALTY_WEIGHT = 10.0
NOISE_LEN = 100
EPOCHS = 2000
EPOCHS_PER_SAMPLE = 2
BATCH_SIZE = 8
Fs = 16000
class GAN:
def __init__(self, model_dims=MODEL_DIMS, num_samples=NUM_SAMPLES, timesteps=TIMESTEPS, gradient_penalty_weight=GRADIENT_PENALTY_WEIGHT,
noise_len=NOISE_LEN, batch_size=BATCH_SIZE, sr=Fs):
self.model_dims = model_dims
self.num_samples = num_samples
self.timesteps = timesteps
self.noise_dims = (timesteps, noise_len)
self.batch_size = batch_size
self.G = GANModels.Generator(self.model_dims, self.timesteps, num_samples)
self.D = GANModels.Critic(self.model_dims, self.timesteps, num_samples)
self.G_optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9)
self.D_optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9)
print(self.G.summary())
print(self.D.summary())
self.gradient_penalty_weight = gradient_penalty_weight
self.sr = sr
def _d_loss_fn(self, r_logit, f_logit):
r_loss = - tf.reduce_mean(r_logit)
f_loss = tf.reduce_mean(f_logit)
return r_loss, f_loss
def _g_loss_fn(self, f_logit):
f_loss = - tf.reduce_mean(f_logit)
return f_loss
def _gradient_penalty(self, real, fake):
def _interpolate(a, b):
shape = [tf.shape(a)[0]] + [1] * (a.shape.ndims - 1)
alpha = tf.random.uniform(shape=shape, minval=0., maxval=1.)
inter = a + alpha * (b - a)
inter.set_shape(a.shape)
return inter
x = _interpolate(real, fake)
with tf.GradientTape() as t:
t.watch(x)
pred = self.D(x, training=True)
grad = t.gradient(pred, x)
norm = tf.norm(tf.reshape(grad, [tf.shape(grad)[0], -1]), axis=1)
gp = tf.reduce_mean((norm - 1.)**2)
return gp
#tf.function
def train_G(self):
with tf.GradientTape() as t:
z = tf.random.normal(shape=(self.batch_size,) + self.noise_dims)
x_fake = self.G(z, training=True)
x_fake_d_logit = self.D(x_fake, training=True)
G_loss = self._g_loss_fn(x_fake_d_logit)
G_grad = t.gradient(G_loss, self.G.trainable_variables)
self.G_optimizer.apply_gradients(zip(G_grad, self.G.trainable_variables))
return {'g_loss': G_loss}
#tf.function
def train_D(self, x_real):
with tf.GradientTape() as t:
z = tf.random.normal(shape=(x_real.shape[0],) + self.noise_dims) #Half fake and half real
x_fake = self.G(z, training=True)
x_real_d_logit = self.D(x_real, training=True)
x_fake_d_logit = self.D(x_fake, training=True)
x_real_d_loss, x_fake_d_loss = self._d_loss_fn(x_real_d_logit, x_fake_d_logit)
gp = self._gradient_penalty(x_real, x_fake)
D_loss = (x_real_d_loss + x_fake_d_loss) + gp * self.gradient_penalty_weight
D_grad = t.gradient(D_loss, self.D.trainable_variables)
self.D_optimizer.apply_gradients(zip(D_grad, self.D.trainable_variables))
return {'d_loss': x_real_d_loss + x_fake_d_loss, 'gp': gp}
def sample(self, epoch, num_samples=10):
z = tf.random.normal(shape=(num_samples,) + self.noise_dims)
result = self.G(z, training=False)
for i in range(num_samples):
audio = np.array(result[i, :, :])
audio.flatten()
librosa.output.write_wav(f"output/piano/{epoch}-{i}.wav", audio, sr=self.sr)
#############################################################################
gan = GAN()
X_train = []
for file in os.listdir(r"D:\ML_Datasets\mancini_piano\piano\train"):
with open(r"D:\ML_Datasets\mancini_piano\piano\train" + fr"\{file}", "rb") as f:
samples, _ = librosa.load(f, Fs)
if len(samples) < TIMESTEPS*NUM_SAMPLES:
audio = np.array([np.array([sample]) for sample in samples])
padding = np.zeros(shape=(TIMESTEPS*NUM_SAMPLES - len(samples), 1), dtype='float32')
X_train.append(np.append(audio, padding, axis=0))
else:
for i in range(len(samples) // (TIMESTEPS*NUM_SAMPLES)):
X_train.append(np.array([np.array([sample]) for sample in samples[:TIMESTEPS*NUM_SAMPLES]]))
samples = np.delete(samples, np.s_[:TIMESTEPS*NUM_SAMPLES])
print(f"X_train shape = {(len(X_train),) + X_train[0].shape}")
librosa.output.write_wav("output/piano/test.wav", X_train[0], sr=Fs)
train_summary_writer = tf.summary.create_file_writer("logs/train")
with train_summary_writer.as_default():
steps_per_epoch = len(X_train) // BATCH_SIZE
for e in range(EPOCHS):
for i in range(steps_per_epoch):
D_loss_sum = 0
for n in range(D_UPDATES_PER_G_UPDATE):
D_loss_dict = gan.train_D(np.array(random.sample(X_train, BATCH_SIZE)))
D_loss_sum += D_loss_dict['d_loss']
D_loss = D_loss_sum / D_UPDATES_PER_G_UPDATE
G_loss_dict = gan.train_G()
G_loss = G_loss_dict['g_loss']
tf.summary.scalar('d_loss', D_loss, step=(e*steps_per_epoch)+i)
tf.summary.scalar('g_loss', G_loss, step=(e*steps_per_epoch)+i)
print(f"step {(e*steps_per_epoch)+i}: d_loss = {D_loss} g_loss = {G_loss}")
if e % EPOCHS_PER_SAMPLE == 0:
gan.sample(e)
My GANModels.py script is:
def Generator(d, a, num_samples, c=16):
# Prelim layers
input_layer = Input(shape=(100,))
dense_layer0 = Dense(256*d, input_shape=(100,))(input_layer)#
reshape_layer0 = Reshape((c, c*d))(dense_layer0)#
relu_layer0 = Activation('relu')(reshape_layer0)#
# WaveCNN layers
c //= 2
expanded_layer0 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer0)#relu_layer1
conv1d_t_layer0 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer0)
slice_layer0 = Lambda(lambda x: x[:, 0])(conv1d_t_layer0)
relu_layer2 = Activation('relu')(slice_layer0)
c //= 2
expanded_layer1 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer2)
conv1d_t_layer1 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer1)
slice_layer1 = Lambda(lambda x: x[:, 0])(conv1d_t_layer1)
relu_layer3 = Activation('relu')(slice_layer1)
c //= 2
expanded_layer2 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer3)
conv1d_t_layer2 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer2)
slice_layer2 = Lambda(lambda x: x[:, 0])(conv1d_t_layer2)
relu_layer4 = Activation('relu')(slice_layer2)
c //= 2
expanded_layer3 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer4)
conv1d_t_layer3 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer3)
slice_layer3 = Lambda(lambda x: x[:, 0])(conv1d_t_layer3)
relu_layer5 = Activation('relu')(slice_layer3)
expanded_layer4 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer5)
conv1d_t_layer4 = Conv2DTranspose(1, (1, 25), strides=(1, 4), padding='same')(expanded_layer4)#strides=(1,1)
slice_layer4 = Lambda(lambda x: x[:, 0])(conv1d_t_layer4)
tanh_layer0 = Activation('tanh')(slice_layer4)
model = Model(inputs=input_layer, outputs=tanh_layer0)
return model
def _apply_phaseshuffle(x, rad=2, pad_type='reflect'):
b, x_len, nch = x.get_shape().as_list()
phase = tf.random.uniform([], minval=-rad, maxval=rad + 1, dtype=tf.int32)
pad_l = tf.maximum(phase, 0)
pad_r = tf.maximum(-phase, 0)
phase_start = pad_r
x = tf.pad(x, [[0, 0], [pad_l, pad_r], [0, 0]], mode=pad_type)
x = x[:, phase_start:phase_start+x_len]
x.set_shape([b, x_len, nch])
return x
def Critic(d, a, num_samples, c=1):
input_layer = Input(shape=(a*num_samples, 1))#d*d
conv1d_layer0 = Conv1D(c*d, 25, strides=4, padding='same')(input_layer)#//2
LReLU_layer0 = LeakyReLU(alpha=0.2)(conv1d_layer0)
phaseshuffle_layer0 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer0)
c *= 2
conv1d_layer1 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer0)#d
LReLU_layer1 = LeakyReLU(alpha=0.2)(conv1d_layer1)
phaseshuffle_layer1 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer1)
c *= 2
conv1d_layer2 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer1)#2*d
LReLU_layer2 = LeakyReLU(alpha=0.2)(conv1d_layer2)
phaseshuffle_layer2 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer2)
c *= 2
conv1d_layer3 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer2)#4*d
LReLU_layer3 = LeakyReLU(alpha=0.2)(conv1d_layer3)
phaseshuffle_layer3 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer3)
c *= 2
conv1d_layer4 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer3)#8*d,strides=4
LReLU_layer4 = LeakyReLU(alpha=0.2)(conv1d_layer4)
phaseshuffle_layer4 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer4)
slice_layer0 = Lambda(lambda x: x[:, 0])(phaseshuffle_layer4)
dense_layer1 = Dense(1, input_shape=(256*d,))(slice_layer0)
model = Model(inputs=input_layer, outputs=dense_layer1)
return model

tf.layers.batch_normalization freezes during sess.run() (1.5.0-dev20171031)

The graph building phase passes without error, but the program freezes (no reading hard drive, no memory change, no ...) during sess.run() in the first mini-batch in the first epoch. If I remove this layer or replace it with tf.contrib.layers.layer_norm, the program runs without issues.
The tensor (x) I pass into tf.layers.batch_normalization has the shape [#batches, 200]. I use most default values, but turned off the center and scale.
x_BN = tf.layers.batch_normalization(
x,
axis=-1,
momentum=0.99,
epsilon=1e-10, #0.001,
center=False, #True,
scale=False, #True,
beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(),
beta_regularizer=None,
gamma_regularizer=None,
beta_constraint=None,
gamma_constraint=None,
training=Flg_training, #False,
trainable=True,
name=None,
reuse=None,
renorm=False,
renorm_clipping=None,
renorm_momentum=0.99,
fused=False,
virtual_batch_size=None,
adjustment=None
)
The tensorflow version I'm using is tf-nightly-gpu (1.5.0-dev20171031 or 1.5.0-dev20171023). Has anyone encountered a similar problem?
Update
This happens when the input of tf.layers.batch_normalization is from tf.nn.bidirectional_dynamic_rnn, please see a simplified code to reproduce this issue:
import tensorflow as tf
import numpy as np
starter_learning_rate = 0.001
decay_steps = 100
decay_rate = 0.96
num_RNN_layers = 3
LSTM_CELL_SIZE = 100
keep_prob = 0.95
with tf.name_scope('Inputs'):
x = tf.placeholder(dtype=tf.float32, shape=[None, 200])
y = tf.placeholder(dtype=tf.float32, shape=[None, 200])
length = tf.placeholder(dtype=tf.int32, shape=[None])
Flg_training = tf.placeholder(dtype=tf.bool, shape=[])
x_1 = tf.expand_dims(x, -1)
with tf.name_scope('BiLSTM'):
dropcells = []
for iiLyr in list(range(num_RNN_layers)):
cell_iiLyr = tf.nn.rnn_cell.LSTMCell(num_units=LSTM_CELL_SIZE, state_is_tuple=True)
dropcells.append(tf.nn.rnn_cell.DropoutWrapper(cell=cell_iiLyr, output_keep_prob=keep_prob)) #,, input_keep_prob=self.keep_prob input_keep_prob=1.0, seed=None
MultiLyr_cell = tf.nn.rnn_cell.MultiRNNCell(cells=dropcells, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=MultiLyr_cell,
cell_bw=MultiLyr_cell,
dtype=tf.float32,
sequence_length=length, #tf_b_lens
inputs=x_1, #stacked_RefPts_desc, #tf_b_VCCs_AMs_BN1
scope = "BiLSTM"
)
#output_fw, output_bw = outputs
states_fw, states_bw = states
c_fw_lstLyr, h_fw_lstLyr = states_fw[-1]
c_bw_lstLyr, h_bw_lstLyr = states_bw[-1]
states_concat1 = tf.concat([h_fw_lstLyr, h_bw_lstLyr], axis = 1, name = 'states_concat')
with tf.name_scope("cs_BN1"):
x_BN = tf.layers.batch_normalization(
states_concat1,
axis=-1, # axis that should be normalized (typically the features axis, in this case the concated states or hidden vectors)
momentum=0.99,
epsilon=1e-10, #0.001,
center=False, #True,
scale=False, #True,
beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(),
beta_regularizer=None,
gamma_regularizer=None,
beta_constraint=None,
gamma_constraint=None,
training=Flg_training, #False,
trainable=True,
name="test_BN", #None,
reuse=None,
renorm=False,
renorm_clipping=None,
renorm_momentum=0.99,
fused=False,
virtual_batch_size=None,
adjustment=None
)
with tf.name_scope("Regression"):
a = tf.get_variable("a", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
b = tf.get_variable("b", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
with tf.name_scope("Prediction"):
y_pred = tf.multiply(x_BN, a) + b
with tf.name_scope('Loss'):
losses = tf.losses.mean_squared_error(y, y_pred, reduction=tf.losses.Reduction.NONE)
mean_loss = tf.reduce_mean(losses)
with tf.name_scope('Training'):
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
decay_steps, decay_rate, staircase=True)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(losses, global_step=global_step)
#x_mean = tf.reduce_mean(x_BN, axis=0)
sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter("G:\\Surface_Ozone\\Temp\\", sess.graph)
sess.run(tf.global_variables_initializer())
for ii in list(range(2000)):
x_in = (np.random.rand(20, 200))
y_in = x_in * 1.5 + 3.0
length_in = np.full([20], 200, dtype=np.int32)
_, mean_loss_val, a_val, b_val = sess.run([train_step, mean_loss, a, b], feed_dict={
x: x_in,
Flg_training: True,
y: y_in,
length: length_in
})
if (ii < 50):
print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
else:
if (ii % 100 == 0):
print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
print("Normal End.")

Creating a highly customizable RNN in Tensorflow

I am trying to implement an RNN without using the RNN functions provided by tensorflow. Here is the code I tried that eventually gave me an error
import tensorflow as tf
tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=(5,5))
InitialState = tf.zeros((5,1))
h = InitialState
W1 = tf.Variable(tf.random_normal([5, 5], stddev=0.35),
name="W1")
W2 = tf.Variable(tf.random_normal([5, 5], stddev=0.35),
name="W2")
for k in range(5):
h = tf.matmul(W1,h) + tf.matmul(W2,x[:,k:(k+1)])
h = tf.sigmoid(h)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
a = sess.run([h], feed_dict = {x:tf.ones((5,5))})
How can I implement an RNN from scratch? Is there an example online?
import tensorflow as tf
import numpy as np
hidden_size = 2 # hidden layer of two neurons
input_size = 5
# Weight of x will the be (hidden_layer_size x input_size)
Wx = tf.Variable(tf.random_normal([hidden_size, input_size], stddev=0.35),
name="Wx")
# Weight of y will be (input_size x hidden_layer_size)
Wy = tf.Variable(tf.random_normal([input_size, hidden_size], stddev=0.35),
name="Wy")
# Weight of h will be (hidden_size, hidden_size)
Wh = tf.Variable(tf.random_normal([hidden_size, hidden_size], stddev=0.35),
name="Wh")
h = tf.zeros((hidden_size, input_size))
x = tf.placeholder(dtype = tf.float32,
shape = (input_size,input_size))
y = tf.placeholder(dtype = tf.float32,
shape = (input_size,input_size))
feed_dict = {
x : np.ones((5,5), dtype = np.float32),
y : np.ones((5,5), dtype = np.float32)
}
# RNN step
for _ in range(input_size):
h = tf.tanh(tf.matmul(Wh, h) + tf.matmul(Wx, x))
o = tf.nn.softmax(h)
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op, feed_dict = feed_dict)
h_new, y_hat = sess.run([h, o], feed_dict = feed_dict)
print h_new
print y_hat