batch normalization switch in tensorflow - tensorflow

Here is an autoencoder.
My problem is that I don't know how to set "mode" to let it split into "train" and "test".
I'm glad if you guys can show me an example:D
Can I use a global variable or placeholder to change "mode" ?
I'm glad you can answer it for me.
#batch normalization
def Batch_norm_en(Wx_plus_b, i):
if mode == 1:
fc_mean_en, fc_var_en = tf.nn.moments(Wx_plus_b, axes=[0, 1])
else:
fc_mean_en, fc_var_en = fc_mean_en, fc_var_en
Wx_plus_b = tf.nn.batch_normalization(Wx_plus_b, fc_mean_en, fc_var_en, shift_en[i], scale_en[i], 10**(-3))
return Wx_plus_b
def Batch_norm_de(Wx_plus_b, i):
if mode == 1:
fc_mean_de, fc_var_de = tf.nn.moments(Wx_plus_b, axes=[0, 1])
else:
fc_mean_de, fc_var_de = fc_mean_de, fc_var_de
Wx_plus_b = tf.nn.batch_normalization(Wx_plus_b, fc_mean_de, fc_var_de, shift_de[i], scale_de[i], 10**(-3))
return Wx_plus_b
#encoder data
def encoder_model(x):
res = x
for i in range(0, len(en_n_neurons)-1):
Wx_plus_b = tf.matmul(res,W_en[i]) + b_en[i]
Wx_plus_b = Batch_norm_en(Wx_plus_b, i)
res = tf.nn.sigmoid(Wx_plus_b)
return res
#decoder data
def decoder_model(x):
res = x
for i in range(0, len(de_n_neurons)-1):
Wx_plus_b = tf.matmul(res,W_de[i]) + b_de[i]
Wx_plus_b = Batch_norm_de(Wx_plus_b, i)
res = tf.nn.sigmoid(Wx_plus_b)
return res

Related

how to solve vector-based variable/constraints in python pyomo

enter image description here
I am solving this problem, i write code. but getting error at last line ' results'
please anyone know how to deal vector based dynamic optimization please let me know.
model = ConcreteModel()
model.num_itter = Param(initialize = 0)
global p_
p_ = model.num_itter
# print(value(p_))
torq = np.zeros((14,6))
# global p_
def M_func(model,i):
return np.array(Mqn( *value(model.q[i]) ))
def sai_func(model, i):
return np.array(sain( *value(model.q[i]) ))
def sai_func_T(model, i):
return np.array(sain( *value(model.q[i]) )).T
def c_func(model, i):
return np.array(cqn(*np.append(value(model.q[0]),value(model.u[0])) ))
def dsai_dt_func(model, i):
return np.array(saidn(*np.append(value(model.q[0]),value(model.u[0])) ))
def first_eq(model,i):
return value(model.M[i])#value(model.q[i])-\
value(model.sai[i]).T#value(model.lm[i])+\
torq#value(model.tu[i])
def second_eq(model,i):
return value(model.sai[i])#value(model.a[i])
def third_eq(model,i):
return (-value(model.dsai_dt[i])#value(model.u[i])).reshape(10,1)
def solu_f(model,i):
global p_
s_ = solu10[int(p_/2),:]
# time.sleep(2)
p_= p_+1
print('p',p_)
return s_
def init_q(model,i):
return solu10[i,:]
def init_u(model,i):
return np.zeros((14,))
def init_tu(model,i):
return np.zeros((6,))
def init_lm(model,i):
return np.zeros((10,))
def init_a(model,i):
return np.zeros((14,))
# Define the time horizon
t0 =0 ;tf =40
model.t = ContinuousSet(bounds=(t0,tf))
# measurements = {_: solu10[_,:] for _ in range(len(solu10))}
# Define the state variables, control inputs, and other variables
model.time_points = Set(initialize=range(200))
model.q = Var(model.t,initialize=init_q)
model.u = Var(model.t,initialize=init_u)
model.a = Var(model.t,initialize=init_a)
model.tu =Var(model.t,initialize=init_tu)
model.lm = Var(model.t,initialize=init_lm)
model.dqdt = DerivativeVar(model.q,wrt=model.t)
model.dudt = DerivativeVar(model.u,wrt=model.t)
# Define the parameters
model.M = Param(model.t,mutable=True, initialize=M_func,within=Any)
model.sai = Param(model.t,mutable=True, initialize=sai_func,within=Any)
model.c = Param(model.t,mutable=True, initialize=c_func,within=Any)
model.dsai_dt = Param(model.t,mutable=True, initialize=dsai_dt_func,within=Any)
model.first_meq = Param(model.t,mutable =True, initialize = first_eq,within = Any)
model.solu = Param(model.t,mutable = True,initialize = solu_f, within =Any)
model.second_meq = Param(model.t,mutable =True, initialize = second_eq,within = Any)
model.third_meq = Param(model.t,mutable =True, initialize = third_eq,within = Any)
# print(value(p_))
# Define the Constraint equations
def dyn_eq_1(model,i):
return model.first_meq[i]==-model.c[i]
def dyn_eq_2(model,i):
return model.second_meq[i]==model.third_meq[i]
def vel_constraint(model,i):
return model.dqdt[i]==model.u[i]
def acc_constraint(model,i):
return model.dudt[i]==model.a[i]
def path_constraint(model, i):
return model.q[i] == model.solu[i]
model.dyn_eq_1s= Constraint(model.t, rule=dyn_eq_1)
model.dyn_eq_2s= Constraint(model.t, rule=dyn_eq_2)
model.vel_constraints = Constraint(model.t, rule= vel_constraint)
model.acc_constraints = Constraint(model.t, rule= acc_constraint)
model.path_constraints = Constraint(model.t, rule=path_constraint)
# Define the bounds
u_lb = -np.array([2,2,0.5,0.34,float('inf'),float('inf'),0.20,28,float('inf'),float('inf'),0.20,28,0.25,0.25])
u_ub = [2,2,0.5,0.34,None,None,0.20,28,None,None,0.20,28,0.25,0.25]
a_lb = -np.array([1.5,1.5,0.2,0.10,float('inf'),float('inf'),0.10,5,float('inf'),float('inf'),0.10,5,0.10,0.10])
a_ub = [1.5,1.5,0.2,0.10,None,None,0.10,5,None,None,0.10,5,0.10,0.10]
tu_lb = -np.array([4,0.5,4,0.5,4,4])
tu_ub = [4,4,4,4,4,4]
model.u.setlb(u_lb)
model.u.setub(u_ub)
model.a.setlb(a_lb)
model.a.setub(a_ub)
model.tu.setlb(tu_lb)
model.tu.setub(tu_ub)
def u_nes_fn(model,i):
u_nes = np.array([])
for _ in [6,7,10,11,12,13]:
u_nes = np.append(u_nes,value(model.u[i])[_])
return u_nes.astype('float64')
model.u_nes = Param(model.t,initialize= u_nes_fn,within = Any)
# Define the objective function
def tu_u_(model,i):
return np.linalg.norm(value( model.u_nes[i]*model.tu[i]*model.u_nes[i]*model.tu[i]))
model.tu_u = Integral(model.t,wrt=model.t,rule = tu_u_)
def objfun(model):
return model.tu_u
model.obj = Objective(rule = objfun)
# Define the solver and solve the problem
solver = SolverFactory('ipopt')
results = solver.solve(model)
ValueError Traceback (most recent call last)
c:\Users\pyomo_opti.ipynb Cell 16 in <cell line: 11>()
9 # Define the solver and solve the problem
10 solver = SolverFactory('ipopt')
---> 11 results = solver.solve(model)
File c:\Users\AppData\Local\Programs\Python\Python310\lib\site-packages\pyomo\opt\base\solvers.py:570, in OptSolver.solve(self, *args, **kwds)
565 try:
566
...
File pyomo\repn\plugins\ampl\ampl_.pyx:410, in pyomo.repn.plugins.ampl.ampl_.ProblemWriter_nl.call()
File pyomo\repn\plugins\ampl\ampl_.pyx:1072, in pyomo.repn.plugins.ampl.ampl_.ProblemWriter_nl._print_model_NL()
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

AttributeError: 'list' object has no attribute 'cuda'

I try to develop Convolution network deep learning for face recognition and right now when i try to run it said 'list' object has no attribute 'cuda' im not sure what went wrong can anyone check. the code below is for train the whole module and below that is for load the data
if name == 'main':
#set_trace()
args = edict({
'operation' : 'train',
'feature_file' : None,
'result_sample_path' : None,
'gpu' : 'GPU',
'path_image_query' : None,
'query_label' : 'Query label',
'dataset' : None,
'specific_dataset_folder_name' : 'lfw',
'img_extension' : 'jpg',
'preprocessing_method' : 'sphereface',
'model_name' : 'mobiface',
'batch_size' : 3,
'image_query':'/content/drive/My Drive/recfaces13/recfaces/datasets/LFW',
'train':True})
# selecting the size of the crop based on the network
if args.model_name == 'mobilefacenet' or args.model_name == 'sphereface':
crop_size = (96, 112)
elif args.model_name == 'mobiface' or args.model_name == 'shufflefacenet':
crop_size = (112, 112)
elif args.model_name == 'openface':
crop_size = (96, 96)
elif args.model_name == 'facenet':
crop_size = (160, 160)
else:
raise NotImplementedError("Model " + args.model_name + " not implemented")
if args.dataset is not None:
# process whole dataset
assert args.specific_dataset_folder_name is not None, 'To process a dataset, ' \
'the flag --specific_dataset_folder_name is required.'
process_dataset(args.operation, args.model_name, args.batch_size,
args.dataset, args.specific_dataset_folder_name,
args.img_extension, args.preprocessing_method, crop_size,
args.result_sample_path, args.feature_file)
#elif args.image_query is not None:
# process unique image
# dataset = ImageDataLoader(args.image_query, args.preprocessing_method,
# crop_size, args.operation == 'extract_features')
# dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2, drop_last=False)
# features = None
elif args.operation == 'train':
##########set_trace()
net = load_net('mobilefacenet', 'gpu')
net = net.cuda()
model_name=args.model_name
dataset = LFW(args.image_query,args.specific_dataset_folder_name, args.img_extension, args.preprocessing_method, crop_size, args.train)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2, drop_last=False)
# data_counter_per_class = np.zeros((len(dataloader)))
# for i in range(len(dataloader)):
# path = os.path.join('image_query', dataloader[i])
# files = get_files_from_folder(path)
# data_counter_per_class[i] = len(files)
# test_counter = np.round(data_counter_per_class * (1 - train_ratio))
#dataloader1=dataloader.split(',')
#train,test=train_test_split(dataloader,test_size=0.2)
#trainloader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2, drop_last=False)
# testloader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=False, num_workers=2, drop_last=False) //create path//
#create array of data path split that data path and
features = None
if args.feature_file is not None and os.path.isfile(args.feature_file):
features = scipy.io.loadmat(args.feature_file)
epoch = 2
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
train_loss = list()
#set_trace()
for i, data in enumerate(dataloader):
inps, labs = data
inps, labs = inps.cuda(args['device']), labs.cuda(args['device'])
inps.squeeze_(0)
labs.squeeze_(0)
inps = Variable(inps).cuda(args['device'])
labs = Variable(labs).cuda(args['device'])
optimizer.zero_grad()
outs = net(inps)
soft_outs = F.softmax(outs, dim=1)
prds = soft_outs.data.max(1)[1]
loss = criterion(outs, labs)
loss.backward()
optimizer.step()
prds = prds.squeeze_(1).squeeze_(0).cpu().numpy()
inps_np = inps.detach().squeeze(0).cpu().numpy()
labs_np = labs.detach().squeeze(0).cpu().numpy()
train_loss.append(loss.data.item())
print('[epoch %d], [iter %d / %d], [train loss %.5f]' % (epoch, i + 1, len(train_loader), np.asarray(train_loss).mean()))
Dataloader
class LFW(object):
def __init__(self, root, specific_folder, img_extension, preprocessing_method=None, crop_size=(96, 112)):
"""
Dataloader of the LFW dataset.
root: path to the dataset to be used.
specific_folder: specific folder inside the same dataset.
img_extension: extension of the dataset images.
preprocessing_method: string with the name of the preprocessing method.
crop_size: retrieval network specific crop size.
"""
self.preprocessing_method = preprocessing_method
self.crop_size = crop_size
self.imgl_list = []
self.classes = []
self.people = []
self.model_align = None
# read the file with the names and the number of images of each people in the dataset
with open(os.path.join(root, 'people.txt')) as f:
people = f.read().splitlines()[1:]
# get only the people that have more than 20 images
for p in people:
p = p.split('\t')
if len(p) > 1:
if int(p[1]) >= 20:
for num_img in range(1, int(p[1]) + 1):
self.imgl_list.append(os.path.join(root, specific_folder, p[0], p[0] + '_' +
'{:04}'.format(num_img) + '.' + img_extension))
self.classes.append(p[0])
self.people.append(p[0])
le = preprocessing.LabelEncoder()
self.classes = le.fit_transform(self.classes)
print(len(self.imgl_list), len(self.classes), len(self.people))
def __getitem__(self, index):
imgl = imageio.imread(self.imgl_list[index])
cl = self.classes[index]
# if image is grayscale, transform into rgb by repeating the image 3 times
if len(imgl.shape) == 2:
imgl = np.stack([imgl] * 3, 2)
imgl, bb = preprocess(imgl, self.preprocessing_method, crop_size=self.crop_size,
is_processing_dataset=True, return_only_largest_bb=True, execute_default=True)
# append image with its reverse
imglist = [imgl, imgl[:, ::-1, :]]
# normalization
for i in range(len(imglist)):
imglist[i] = (imglist[i] - 127.5) / 128.0
imglist[i] = imglist[i].transpose(2, 0, 1)
imgs = [torch.from_numpy(i).float() for i in imglist]
return imgs, cl, imgl, bb, self.imgl_list[index], self.people[index]
def __len__(self):
return len(self.imgl_list)

Triplet-Loss using pre-trained network

I am trying to use the Triple-Loss technique to fine-tune an EfficientNet network for human Re-ID using Keras. Here is the code I am using:
This is the generator:
class SampleGen(object):
def __init__(self, file_class_mapping):
self.file_class_mapping = file_class_mapping
self.class_to_list_files = defaultdict(list)
self.list_all_files = list(file_class_mapping.keys())
self.range_all_files = list(range(len(self.list_all_files)))
for file, class_ in file_class_mapping.items():
self.class_to_list_files[class_].append(file)
self.list_classes = list(set(self.file_class_mapping.values()))
self.range_list_classes = range(len(self.list_classes))
self.class_weight = np.array([len(self.class_to_list_files[class_]) for class_ in self.list_classes])
self.class_weight = self.class_weight / np.sum(self.class_weight)
def get_sample(self):
class_idx = np.random.choice(self.range_list_classes, 1, p=self.class_weight)[0]
examples_class_idx = np.random.choice(range(len(self.class_to_list_files[self.list_classes[class_idx]])), 2)
positive_example_1, positive_example_2 = \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[0]], \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[1]]
negative_example = None
while negative_example is None or self.file_class_mapping[negative_example] == \
self.file_class_mapping[positive_example_1]:
negative_example_idx = np.random.choice(self.range_all_files, 1)[0]
negative_example = self.list_all_files[negative_example_idx]
return positive_example_1, negative_example, positive_example_2
def read_and_resize(filepath):
im = Image.open((filepath)).convert('RGB')
im = im.resize((image_size, image_size))
return np.array(im, dtype="float32")
def augment(im_array):
if np.random.uniform(0, 1) > 0.9:
im_array = np.fliplr(im_array)
return im_array
def gen(triplet_gen):
while True:
list_positive_examples_1 = []
list_negative_examples = []
list_positive_examples_2 = []
for i in range(batch_size):
positive_example_1, negative_example, positive_example_2 = triplet_gen.get_sample()
path_pos1 = join(path_train, positive_example_1)
path_neg = join(path_train, negative_example)
path_pos2 = join(path_train, positive_example_2)
positive_example_1_img = read_and_resize(path_pos1)
negative_example_img = read_and_resize(path_neg)
positive_example_2_img = read_and_resize(path_pos2)
positive_example_1_img = augment(positive_example_1_img)
negative_example_img = augment(negative_example_img)
positive_example_2_img = augment(positive_example_2_img)
list_positive_examples_1.append(positive_example_1_img)
list_negative_examples.append(negative_example_img)
list_positive_examples_2.append(positive_example_2_img)
A = preprocess_input(np.array(list_positive_examples_1))
B = preprocess_input(np.array(list_positive_examples_2))
C = preprocess_input(np.array(list_negative_examples))
label = None
yield {'anchor_input': A, 'positive_input': B, 'negative_input': C}, label
This is how I create the model:
def get_model():
base_model = efn.EfficientNetB3(weights='imagenet', include_top=False)
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = Dropout(0.6)(x)
x = Dense(embedding_dim)(x)
x = Lambda(lambda x: K.l2_normalize(x, axis=1), name="enc_out")(x)
embedding_model = Model(base_model.input, x, name="embedding")
input_shape = (image_size, image_size, 3)
anchor_input = Input(input_shape, name='anchor_input')
positive_input = Input(input_shape, name='positive_input')
negative_input = Input(input_shape, name='negative_input')
anchor_embedding = embedding_model(anchor_input)
positive_embedding = embedding_model(positive_input)
negative_embedding = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [anchor_embedding, positive_embedding, negative_embedding]
triplet_model = Model(inputs, outputs)
triplet_model.add_loss(K.mean(triplet_loss(outputs)))
return embedding_model, triplet_model
And this is how I'm trying to run the training:
if __name__ == '__main__':
data = pd.read_csv(path_csv)
train, test = train_test_split(data, train_size=0.7, random_state=1337)
file_id_mapping_train = {k: v for k, v in zip(train.Image.values, train.Id.values)}
file_id_mapping_test = {k: v for k, v in zip(test.Image.values, test.Id.values)}
gen_tr = gen(SampleGen(file_id_mapping_train))
gen_te = gen(SampleGen(file_id_mapping_test))
embedding_model, triplet_model = get_model()
for i, layer in enumerate(embedding_model.layers):
print(i, layer.name, layer.trainable)
for layer in embedding_model.layers[379:]:
layer.trainable = True
for layer in embedding_model.layers[:379]:
layer.trainable = False
triplet_model.compile(loss=None, optimizer=Adam(0.0001))
history = triplet_model.fit(x=gen_tr,
validation_data=gen_te,
epochs=10,
verbose=1,
steps_per_epoch=200,
validation_steps=20,
callbacks=create_callbacks())
The csv contains two columns (Image, Id) and I am generating triplets on the go using a generator. The layer 379 is the last layer of the network so I just leave that as trainable. I let it run for some epochs and it seems like it doesn't converge, it stays around 2.30. On epochs like 20, the loss is even higher than what I've started with. Here you can see what I mean: train example Is there anything wrong with the way I think about the problem?
Thank you!

AttributeError: 'numpy.float32' object has no attribute 'to_cpu'

Good day,
I'm developing a deep learning model for wireless signal detection. Below is the snippet of the function that computes the model accuracy and bit error rate (BER):
from chainer.datasets import TupleDataset
import numpy as np
from chainer import cuda
from chainer import function
def get_idp_acc(model, dataset_tuple, comp_ratio, profile = None, batchsize = 128, gpu = -1):
chainer.config.train = True
xp = np if gpu < 0 else cuda.cupy
x, indices, x_zf, HtH, Hty = dataset_tuple._datasets[0], dataset_tuple._datasets[1], dataset_tuple._datasets[2], dataset_tuple._datasets[3], dataset_tuple._datasets[4]
accs = 0
BERs = 0
model.train = False
for j in range(0, len(x), batchsize):
x_batch = xp.array(x[j:j + batchsize])
indices_batch = xp.array(indices[j:j + batchsize])
x_zf_batch = xp.array(x_zf[j:j + batchsize])
HtH_batch = xp.array(HtH[j:j + batchsize])
Hty_batch = xp.array(Hty[j:j + batchsize])
if profile == None:
acc_data = model(x_batch, indices_batch, x_zf_batch, HtH_batch, Hty_batch, comp_ratio = comp_ratio,
ret_param = 'acc')
else:
acc_data = model(x_batch, indices_batch, x_zf_batch, HtH_batch, Hty_batch, comp_ratio = comp_ratio,
ret_param = 'acc', profile = profile)
acc_data.to_cpu()
acc = acc_data.data
BER = 1.0 - acc
accs += acc * len(x_batch)
BERs += BER * len(x_batch)
return (accs / len(x)) * 100.
When the code is run, I get the following error below despite having imported all the required chainer modules. I really need your help on this issue as I'm stuck for nearly two months without making any headways in my project.
Traceback (most recent call last):
File "/Users/mac/Documents/idp_detnet/examples/run_mlp.py", line 14, in <module>
mlp.run(args)
File "/Users/mac/Documents/idp_detnet/examples/mlp.py", line 39, in run
acc_dict[name], BER_dict[name] = util.sweep_idp(model, test, comp_ratios, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 107, in sweep_idp
batchsize=args.batchsize, profile=profile))
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 83, in get_idp_acc
acc_data.to_cpu()
AttributeError: 'numpy.float32' object has no attribute 'to_cpu'
Below is the additional information providing codes for model definition:
K = 10
num_layers = 3*K
def lin_soft_sign(x, t):
'''Linear soft sign activation function from the original paper Eq. (11)'''
y = -1 + F.relu(x + t)/ F.absolute(t) - F.relu(- t)/ F.absolute(t)
return y
def accuracy(x, y):
'''Computes the fraction of elements for which x and y are equal'''
return np.mean(np.equal(x, y)).astype(np.float32)
class MLP(chainer.Chain):
def __init__(self, K, coeff_generator, profiles = None, z_dims = 8*K, v_dims = 2*K):
super(MLP, self).__init__()
if profiles == None:
profiles = [(0, 10)]
self.coeff_generator = coeff_generator
self.z_dims = z_dims
self.v_dims = v_dims
self.K = K
self.profiles = profiles
self.profile = 0
with self.init_scope():
self.p0_l1 = IncompleteLinear(None, self.z_dims)
self.p1_l1 = IncompleteLinear(None, self.z_dims)
self.p2_l1 = IncompleteLinear(None, self.z_dims)
self.p0_lv = IncompleteLinear(None, self.v_dims)
self.p1_lv = IncompleteLinear(None, self.v_dims)
self.p2_lv = IncompleteLinear(None, self.v_dims)
self.p0_l3 = IncompleteLinear(None, self.K)
self.p1_l3 = IncompleteLinear(None, self.K)
self.p2_l3 = IncompleteLinear(None, self.K)
def __call__(self, x, indices, x_zf, HtH, Hty, ret_param = 'loss', profile = None, comp_ratio = None):
if profile == None:
profile = self.profile
# Form Zero-forcing detection
err_rel = F.sum((x - x_zf)**2, axis = 1)
params = layer_profile(self.coeff_generator,
*self.profiles[profile], self.z_dims,
self.v_dims, comp_ratio)
def detnet_layer(x_d, x_logit, v, z_dims, v_dims):
HtH_x = np.matmul(HtH, np.expand_dims(x_d.data, axis = 2).astype(np.float32))
HtH_x = F.squeeze(HtH_x, axis = -1)
#x_concat = np.concatenate([Hty, x, HtH_x, v], axis=1)
x_concat = F.concat([Hty, x_d, HtH_x, v], axis = 1)
if profile == 0:
z = F.relu(self.p0_l1(x_concat))
v += self.p0_lv(z, *params)
x_logit += self.p0_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
elif profile == 1:
z = F.relu(self.p1_l1(x_concat))
v += self.p1_lv(z, *params)
x_logit += self.p1_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
elif profile == 2:
z = F.relu(self.p2_l1(x_concat))
v += self.p2_lv(z, *params)
x_logit += self.p2_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
return x, x_logit, v
x_k = np.zeros((Hty.shape[0], self.K), dtype = np.float32)
x_k_logit = np.zeros((Hty.shape[0], self.K), dtype = np.float32)
v = np.zeros((Hty.shape[0], self.v_dims), dtype = np.float32)
loss = 0
mod = sg.Modulator('BPSK', K)
for k in range(1, num_layers + 1):
x_k, x_k_logit, v = detnet_layer(x_k, x_k_logit, v, self.z_dims, self.v_dims)
err = F.sum((x - x_k)**2, 1)
loss += (np.log(k)).astype(np.float32) * F.mean(err/err_rel)
report = {'loss': loss, 'acc': accuracy(mod.demodulate(x_k.data), indices)}
reporter.report(report, self)
return report[ret_param]
def report_params(self):
return ['validation/main/acc']
def param_names(self):
if len(self.profiles) > 1:
return 'IDPDETNET_{}_{}_{}_p{}'.format(self.z_dims, self.v_dims, self.coeff_generator.__name__, len(self.profiles))
return 'IDPDETNET_{}_{}_{}'.format(self.z_dims, self.v_dims, self.coeff_generator.__name__)
import os
import sys
sys.path.insert(0, os.path.abspath(
os.path.join(os.path.dirname(__file__), '..')))
import numpy as np
import visualize as vz
import idp.coeffs_generator as cg
from net import MLP
import util
K = 10
N = 4
v_dims = 2*K
z_dims = 8*K
SNR_dB_tmin = -4
SNR_dB_tmax = 24
SNR_dB_test = np.linspace(SNR_dB_tmin, SNR_dB_tmax, 8)
num_snr_test = len(SNR_dB_test)
def run(args):
train, test = util.get_dataset(args.modeltype)
names = ['all-one (standard)', 'linear']
colors = [vz.colors.all_one_lg, vz.colors.linear_lg]
models = [
MLP.MLP(K, cg.uniform, z_dims = 8*K, v_dims = 2*K),
MLP.MLP(K, cg.linear, z_dims = 8*K, v_dims = 2*K)
]
comp_ratios = np.linspace(0.1, 1.0, 20)
acc_dict = {}
BER_dict = {}
ratios_dict = {}
for i in range(num_snr_test):
for name, model in zip(names, models):
util.load_or_train_model(model, train, test, args)
acc_dict[name], BER_dict[name] = util.sweep_idp(model, test, comp_ratios, args)
ratios_dict[name] = [100. * cr for cr in comp_ratios]
filename = "IDPDETNET1_{}".format(args.modeltype)
vz.plot(ratios_dict, acc_dict, names, filename, colors = colors,
folder = args.figure_path, ext=args.ext,
title = 'IDPDETNET (BPSK)',
xlabel = 'IDP (%)',
ylabel = 'Test Accuracy (%)', ylim = (0, 100))
filename = "IDPDETNET2_{}".format(args.modeltype)
vz.plot(ratios_dict, BER_dict, names, filename, colors = colors,
folder=args.figure_path, ext=args.ext,
title='IDPDETNET (BPSK)',
xlabel='IDP (%)',
ylabel='BER (bits/sec)')
filename = "IDPDETNET3_{}".format(args.modeltype)
vz.plot(num_snr_test, BER_dict, names, filename, colors = colors,
folder = args.figure_path, ext = args.ext,
title = 'IDPDETNET (BPSK)',
xlabel = 'SNR (dB)',
ylabel = ' BER (bits/sec)')
if __name__ == '__main__':
args = util.default_parser('IDPDETNET Example').parse_args()
run(args)
Hi Seiya Tokui. Thank you for your kind input. Here is the model definition based on the above code:
model = MLP.MLP(K, cg.uniform, z_dims = 8*K, v_dims = 2*K)
OR
model = MLP.MLP(K, cg.linear, z_dims = 8*K, v_dims = 2*K)
Hi #BloodyD. Thank for your brilliant contributions. The model started training, but then later returned the following error:
1 nan nan 0.50108 5.85448
Traceback (most recent call last):
File "run_mlp.py", line 14, in <module>
mlp.run(args)
File "/Users/mac/Documents/idp_detnet/examples/mlp.py", line 38, in run
util.load_or_train_model(model, train, test, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 204, in load_or_train_model
train_model(model, train, test, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 184, in train_model
return eval(fp.read().replace('\n', ''))
File "<string>", line 1, in <module>
NameError: name 'NaN' is not defined
The error occurs in the last line of this snippet code below:
name = model.param_names()
save_model(model, os.path.join(args.model_path, name))
chainer.config.train = False
with open(os.path.join(args.out, 'log'), 'r') as fp:
return eval(fp.read().replace('\n', ''))

Tensorflow : train on mini batch, fast then slow

I am a beginner in tensorflow and I am trying to train a model using "mini batch". To do that I created a generator and iterate it. The problem I encounter is that, at the beginning of the epoch, the train seems fast (many batch per seconds) then the train slow down (1 batch per second) so I am wondering where I am wrong in my code but I do not find the problem.
def prepare_data(filename):
'''load file which give path and label for the data'''
f = open(filename, 'r')
data = [line.split() for line in f]
feat =[]
label=[]
for l in data:
feat.append(l[0])
label.append(l[1])
n_samples = len(feat)
shuf = list(range(n_samples))
random.shuffle(shuf)
count = Counter(label)
print(count)
feature = [feat[i] for i in shuf]
label = np.array(label, dtype=np.int)
return feature, label[shuf]
def get_specgrams(paths, nsamples=16000):
'''
Given list of paths, return specgrams.
'''
# read the wav files
wavs = [wavfile.read(x)[1] for x in paths]
# zero pad the shorter samples and cut off the long ones.
data = []
for wav in wavs:
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
data.append(d)
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return np.asarray(data)
def get_specgram(path, nsamples=16000):
'''
Given path, return specgrams.
'''
# read the wav files
wav = wavfile.read(path)[1]
# zero pad the shorter samples and cut off the long ones.
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return d
# multci classification binary labels
def one_hot_encode(labels, n_unique_labels=31):
n_labels = len(labels)
#print('number of unique labels:', n_unique_labels)
one_hot_encode = np.zeros((n_labels,n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return np.array(one_hot_encode, dtype=np.int)
#create_path_file('train/audio/')
def model(tr_features, tr_labels, ts_features, ts_labels):
# remove gpu device error
config = tf.ConfigProto(allow_soft_placement = True)
# parameters
BATCH_SIZE = 4
number_loop = math.ceil(len(tr_features)/BATCH_SIZE)
training_epochs = 10
n_dim = 16000
n_classes = 31 #len(np.unique(ts_labels))
n_hidden_units_one = 280
n_hidden_units_two = 300
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.1
# get test data
ts_features, ts_labels = get_data(ts_features, ts_labels)
# Model
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])
W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd))
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd))
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd))
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd))
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)
W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)
init = tf.initialize_all_variables()
# function and optimizers
cost_function = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train loop
cost_history = np.empty(shape=[1],dtype=float)
y_true, y_pred = None, None
with tf.Session(config = config) as sess:
sess.run(init)
for epoch in range(training_epochs):
print(' ## Epoch n°', epoch+1 )
batch = batch_generator(BATCH_SIZE, tr_features, tr_labels)
acc_total = 0.0
for cpt, (train_features_batch, train_labels_batch) in enumerate(batch):
_,cost = sess.run([optimizer,cost_function],feed_dict={X:train_features_batch,Y:train_labels_batch})
cost_history = np.append(cost_history,cost)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
acc = accuracy.eval(feed_dict={X:train_features_batch,Y:train_labels_batch})
acc_total = (acc_total*cpt + acc)/(cpt+1)
print('Train accuracy : ', acc_total, '[',str(cpt+1), '/',str(number_loop), ']' ,flush=True, end='\r')
clear_output()
print('Train accuracy : ', acc_total)
y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: ts_features})
y_true = sess.run(tf.argmax(ts_labels,1))
print('Test accuracy: ', round(sess.run(accuracy, feed_dict={X: ts_features, Y: ts_labels}) , 3))
fig = plt.figure(figsize=(10,8))
plt.plot(cost_history)
plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
def batch_generator(batch_size, feat_path, labels):
n_sample = len(feat_path)
ite = math.ceil(n_sample/batch_size)
for i in range(0, ite):
if i == ite-1:
label = one_hot_encode(labels[-batch_size:])
feat = get_specgrams(feat_path[-batch_size:])
yield (feat, label)
else:
label = one_hot_encode(labels[i*batch_size:i*batch_size+batch_size])
feat = get_specgrams(feat_path[i*batch_size:i*batch_size+batch_size])
yield (feat, label)
def get_data(feat_path, labels):
feat = get_specgrams(feat_path)
label = one_hot_encode(labels)
return feat, label
def __main__():
print('## Load data and shuffle')
feat_path, labels = prepare_data('data_labelised2.txt')
idx = int(len(labels)*0.8)
print("## Create Model")
model(feat_path[0:idx], labels[0:idx], feat_path[idx+1:], labels[idx+1:])
with tf.device('/gpu:0'):
__main__()