Scipy.optimize.fmin_cg giving TypeError: unsupported operand type(s) for -: 'tuple' and 'tuple' - numpy

I am building a oneVsAll classifier in python and using scipy.optimize.fmin_cg to get optimum value of theta vector. This is my classifier function.
def oneVsAll(X, y, K, reg_parameter):
X = np.hstack((np.ones((X.shape[0],1)), X))
theta = initialiseTheta((K,X.shape[1]))
for i in range(K):
print("In for loop")
digit_class = i if i else 10
theta[i] = opt.fmin_cg(f = lrCostFunction, fprime=None, x0=theta[i], args = (X, (y==digit_class).flatten() , reg_parameter), maxiter=50)
return theta
This is my cost function
def lrCostFunction(theta, X, y, reg_parameter):
m = y.shape[0] #number of training examples
J =0
grad = np.zeros(theta.shape)
Z =, theta)
print("Z shape", Z.shape)
hx = sigmoid(Z)
print("hx - y shape", (hx-y).shape)
print("X shape",(X[:,0].T).shape )
print("dot product shape",[:,0].T, (hx -y)))
J = -(1/m)*(np.sum(np.multiply(y, np.log(hx)) + np.multiply((1-y), np.log(1-hx)))) + (reg_parameter/(2*m))*(np.sum(np.power(theta[1:], 2)))
grad_unregularized = (1/m)*([:,1:].T, (hx -y)))
grad[0] = (1/m)*([:,0].T, (hx -y)))
grad[1:] = grad_unregularized + (reg_parameter/m)*(theta[1:])
return (J, grad)


Error when trying to implement mAP as metrics in yolov1 training using tensorflow

I am trying to implement mAP as the main metric for yolov1 training. It ran fine for several epochs and was able to give the mAP value along with its loss for each batch. but after several epochs, it would crash, and I can't figure out what was wrong.
This is the error code that I got:
InvalidArgumentError: in user code:
C:\Users\DeepLab\AppData\Local\Temp/ipykernel_11432/ mean_average_precision *
if iou > best_iou:
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\autograph\operators\ if_stmt
_tf_if_stmt(cond, body, orelse, get_state, set_state, symbol_names, nouts)
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\autograph\operators\ _tf_if_stmt
cond = _verify_tf_condition(cond, 'if statement')
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\autograph\operators\ _verify_tf_condition
cond = array_ops.reshape(cond, ())
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\util\ wrapper
return target(*args, **kwargs)
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\ops\ reshape
result = gen_array_ops.reshape(tensor, shape, name)
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\ops\ reshape
return reshape_eager_fallback(
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\ops\ reshape_eager_fallback
_result = _execute.execute(b"Reshape", 1, inputs=_inputs_flat, attrs=_attrs,
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\eager\ quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
InvalidArgumentError: Input to reshape is a tensor with 0 values, but the requested shape has 1 [Op:Reshape]
For calculating mAP, I use these functions:
intersection_over_union used to return iou in tensor type
convert_cellboxes used to return the label value measured from the shape of the image
cellboxes_to_boxes used to return the list of lists containing 6 values (class_idx, confident, x, y, w, h)
non_max_suppression used to return the filtered version of cellboxes_to_boxes output
get_bboxes used to return a list containing 7 value, img_idx, class_idx, confident, x, y, w, h). It will be used as an input to calculate mAP.
mean_average_precisions is used to calculate mAP.
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
if box_format == "midpoint":
box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2 ## ==> x - w / 2 for each grid in each image
box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2 ## ==> y - h / 2 for each grid in each image
box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2 ## ==> x + w / 2 for each grid in each image
box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2 ## ==> y + h / 2 for each grid in each image
box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
if box_format == "corners":
box1_x1 = boxes_preds[..., 0:1]
box1_y1 = boxes_preds[..., 1:2]
box1_x2 = boxes_preds[..., 2:3]
box1_y2 = boxes_preds[..., 3:4] # (N, 1)
box2_x1 = boxes_labels[..., 0:1]
box2_y1 = boxes_labels[..., 1:2]
box2_x2 = boxes_labels[..., 2:3]
box2_y2 = boxes_labels[..., 3:4]
x1 = K.max((box1_x1, box2_x1))
y1 = K.max((box1_y1, box2_y1))
x2 = K.min((box1_x2, box2_x2))
y2 = K.min((box1_y2, box2_y2))
intersection = K.clip((x2-x1), min_value=0, max_value=abs(x2-x1)) * K.clip((y2-y1), min_value=0, max_value=abs(y2-y1))
#intersection = 2
box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
return intersection / (box1_area + box2_area - intersection + 1e-6)
def convert_cellboxes(predictions, S=7): #array (n, 7, 7, 30) (n, 7 x 7, 30)
#batch_size = predictions.shape[0]
n = batch_size
predictions = K.reshape(predictions, (n, 7, 7, 30))
n = len(X_val)%batch_size
predictions = K.reshape(predictions, (n, 7, 7, 30))
bboxes1 = predictions[..., 21:25]
bboxes2 = predictions[..., 26:30]
scores = tf.concat(
(tf.expand_dims(predictions[..., 20], 0), tf.expand_dims(predictions[..., 25], 0)), axis=0 #(1, 7, 7, 2)
) ## (n, 7, 7, 2)
best_box = tf.expand_dims(K.argmax(scores, 0), -1)
best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2 ##(7, 7, 4)
cell_indices = tf.expand_dims(tf.tile(tf.range(start=0, limit=7, delta=1), (7,)), -1) # (49, 1) (1, 7, 7, 1)
cell_indices = tf.repeat(tf.reshape(cell_indices, (1, 7, 7, 1)), n, 0) ## reshape from (49, 1) to (n, 7, 7, 1)
best_boxes = tf.cast(best_boxes, tf.float32)
cell_indices = tf.cast(cell_indices, tf.float32)
x = 1 / S * (best_boxes[..., :1] + cell_indices)
y = 1 / S * (best_boxes[..., 1:2] + K.permute_dimensions(cell_indices, (0, 2, 1, 3)))
w_h = 1 / S * best_boxes[..., 2:4]
converted_bboxes = tf.concat((x, y, w_h), axis=-1) # dimensi terakhir = 4
predicted_class = tf.expand_dims(K.argmax(predictions[..., :20], -1), -1) #n, 7, 7, 1
best_confidence = tf.expand_dims(K.max((predictions[..., 20], predictions[..., 25]), 0), -1)
predicted_class = tf.cast(predicted_class, tf.float32)
best_confidence = tf.cast(best_confidence, tf.float32)
converted_preds = tf.concat(
(predicted_class, best_confidence, converted_bboxes), -1 # n, 7, 7, 6
return converted_preds
def cellboxes_to_boxes(out, S=7):
n = batch_size
converted_pred = K.reshape(convert_cellboxes(out), (n, S * S, -1)) # (n, 49, 6)
n = len(X_val)%batch_size
converted_pred = K.reshape(convert_cellboxes(out), (n, S * S, -1)) # (n, 49, 6)
converted_pred = converted_pred.numpy() # mode graph
all_bboxes = []
for ex_idx in range(out.shape[0]):
bboxes = []
for bbox_idx in range(S * S):
bboxes.append([x for x in converted_pred[ex_idx, bbox_idx, :]])
return all_bboxes
def non_max_suppression(bboxes, iou_threshold, threshold, box_format="midpoint"):
#bboxes = bboxes[0]
#for i, box in enumerate(bboxes):
# bboxes[i][4:6] = box[4:6] * 7
assert type(bboxes) == list
bboxes = [box for box in bboxes if box[1] > threshold]
bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
bboxes_after_nms = []
while bboxes:
chosen_box = bboxes.pop(0)
bboxes = [
box # (6)
for box in bboxes
if box[0] != chosen_box[0]
or intersection_over_union(
< iou_threshold
return bboxes_after_nms
def get_bboxes(gt_labels, pred_labels, iou_threshold, threshold, box_format="midpoint"):
images_pred_boxes = list with each element in this format (image_idx, class_prediction, prob_score, x, y, w, h)
images_gt_boxes = list with each element in this format (image_idx, class, prob_score, x, y, w, h)
images_pred_boxes = []
images_gt_boxes = []
#pred_labels = model.predict(images) # data training, validation, testing
image_idx = 0
gt_boxes = cellboxes_to_boxes(gt_labels)
pred_boxes = cellboxes_to_boxes(pred_labels)
for i in range(len(gt_labels)):
pred_box_nms = non_max_suppression(pred_boxes[i], iou_threshold, threshold, box_format="midpoint")
for nms_box in pred_box_nms:
images_pred_boxes.append([image_idx] + nms_box)
for box in gt_boxes[i]:
if box[1] > threshold:
images_gt_boxes.append([image_idx] + box)
image_idx += 1
return images_pred_boxes, images_gt_boxes
def mean_average_precision(
y_true, y_pred, iou_threshold=0.5, box_format="midpoint", num_classes=20
pred_boxes, true_boxes = get_bboxes(y_true, y_pred, iou_threshold=0.6, threshold=0.3, box_format="midpoint")
# list storing all AP for respective classes
average_precisions = []
# used for numerical stability later on
epsilon = 1e-6
for c in range(num_classes):
detections = []
ground_truths = []
# Go through all predictions and targets,
# and only add the ones that belong to the
# current class c
for detection in pred_boxes:
if detection[1] == c:
for true_box in true_boxes:
if true_box[1] == c:
# find the amount of bboxes for each training example
# Counter here finds how many ground truth bboxes we get
# for each training example, so let's say img 0 has 3,
# img 1 has 5 then we will obtain a dictionary with:
# amount_bboxes = {0:3, 1:5, ..., 20: 10}
amount_bboxes = Counter([gt[0] for gt in ground_truths])
# We then go through each key, val in this dictionary
# and convert to the following (w.r.t same example):
# amount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
for key, val in amount_bboxes.items():
amount_bboxes[key] = np.zeros(val)
# sort by box probabilities which is index 2
detections.sort(key=lambda x: x[2], reverse=True)
TP = np.zeros((len(detections)))
FP = np.zeros((len(detections)))
total_true_bboxes = len(ground_truths)
# If none exists for this class then we can safely skip
if total_true_bboxes == 0:
for detection_idx, detection in enumerate(detections):
# Only take out the ground_truths that have the same
# training idx as detection
ground_truth_img = [
bbox for bbox in ground_truths if bbox[0] == detection[0]
num_gts = len(ground_truth_img) #
best_iou = 0
best_gt_idx = 0
iou = 0
for idx, gt in enumerate(ground_truth_img):
iou = intersection_over_union(
if iou > best_iou:
best_iou = iou
best_gt_idx = idx
if best_iou > iou_threshold:
# only detect ground truth detection once
if amount_bboxes[detection[0]][best_gt_idx] == 0:
# true positive and add this bounding box to seen
TP[detection_idx] = 1
amount_bboxes[detection[0]][best_gt_idx] = 1
FP[detection_idx] = 1
# if IOU is lower then the detection is a false positive
FP[detection_idx] = 1
TP = tf.constant(TP)
FP = tf.constant(FP)
TP_cumsum = tf.cumsum(TP, axis=0)
FP_cumsum = tf.cumsum(FP, axis=0)
recalls = TP_cumsum / (total_true_bboxes + epsilon)
precisions = tf.math.divide(TP_cumsum, (TP_cumsum + FP_cumsum + epsilon))
precisions = tf.concat((tf.cast(tf.constant([1]), precisions.dtype), precisions), axis=0)
recalls = tf.concat((tf.cast(tf.constant([0]), recalls.dtype), recalls), axis=0)
# torch.trapz for numerical integration
average_precisions.append(tfp.math.trapz(precisions, recalls))
return sum(average_precisions) / len(average_precisions)
for training, I used a standard with pascalvoc2007 as its dataset and a batch size of 4.

custom Keras Layer

I want to make this deep learning network with Keras. This network is proposed for compressing video recently.
One layer of this model is ConvLSTM.
ConvLSTM is good for compressing sequences of images.
I know Keras has the ConvLSTM2D layer but I want to use this class:
import tensorflow as tf
class ConvLSTMCell(tf.nn.rnn_cell.RNNCell):
"""A LSTM cell with convolutions instead of multiplications.
Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015.
def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None):
super(ConvLSTMCell, self).__init__(_reuse=reuse)
self._kernel = kernel
self._filters = filters
self._forget_bias = forget_bias
self._activation = activation
self._normalize = normalize
self._peephole = peephole
if data_format == 'channels_last':
self._size = tf.TensorShape(shape + [self._filters])
self._feature_axis = self._size.ndims
self._data_format = None
elif data_format == 'channels_first':
self._size = tf.TensorShape([self._filters] + shape)
self._feature_axis = 0
self._data_format = 'NC'
raise ValueError('Unknown data_format')
def state_size(self):
return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size)
def output_size(self):
return self._size
def call(self, x, state):
c, h = state
x = tf.concat([x, h], axis=self._feature_axis)
n = x.shape[-1].value
m = 4 * self._filters if self._filters > 1 else 4
W = tf.get_variable('kernel', self._kernel + [n, m])
y = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format)
if not self._normalize:
y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
j, i, f, o = tf.split(y, 4, axis=self._feature_axis)
if self._peephole:
i += tf.get_variable('W_ci', c.shape[1:]) * c
f += tf.get_variable('W_cf', c.shape[1:]) * c
if self._normalize:
j = tf.contrib.layers.layer_norm(j)
i = tf.contrib.layers.layer_norm(i)
f = tf.contrib.layers.layer_norm(f)
f = tf.sigmoid(f + self._forget_bias)
i = tf.sigmoid(i)
c = c * f + i * self._activation(j)
if self._peephole:
o += tf.get_variable('W_co', c.shape[1:]) * c
if self._normalize:
o = tf.contrib.layers.layer_norm(o)
c = tf.contrib.layers.layer_norm(c)
o = tf.sigmoid(o)
h = o * self._activation(c)
state = tf.nn.rnn_cell.LSTMStateTuple(c, h)
return h, state
Now I don't know how to change this class to a custom Keras Layer. Anyone can help me?

TypeError when trying to make a loop creating artificial neural networks

I am working on an artifical neural network which I have created via subclassing.
The subclassing looks like this:
import time
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import scipy.stats as si
import sympy as sy
from sympy.stats import Normal, cdf
from sympy import init_printing
class DGMNet(tf.keras.Model):
def __init__(self, n_layers, n_nodes, dimensions=1):
- n_layers: number of layers
- n_nodes: number of nodes in (inner) layers
- dimensions: number of spacial dimensions
self.n_layers = n_layers
self.initial_layer = DenseLayer(dimensions + 1, n_nodes, activation="relu")
self.lstmlikelist = []
for _ in range(self.n_layers):
self.lstmlikelist.append(LSTMLikeLayer(dimensions + 1, n_nodes, activation="relu"))
self.final_layer = DenseLayer(n_nodes, 1, activation=None)
def call(self, t, x):
X = tf.concat([t,x], 1)
S =
for i in range(self.n_layers):
S = self.lstmlikelist[i].call({'S': S, 'X': X})
result =
return result
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, n_inputs, n_outputs, activation):
- n_inputs: number of inputs
- n_outputs: number of outputs
- activation: activation function
super(DenseLayer, self).__init__()
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.W = self.add_weight(shape=(self.n_inputs, self.n_outputs),
self.b = self.add_weight(shape=(1, self.n_outputs),
self.activation = _get_function(activation)
def call(self, inputs):
S = tf.add(tf.matmul(inputs, self.W), self.b)
S = self.activation(S)
return S
class LSTMLikeLayer(tf.keras.layers.Layer):
def __init__(self, n_inputs, n_outputs, activation):
- n_inputs: number of inputs
- n_outputs: number of outputs
- activation: activation function
super(LSTMLikeLayer, self).__init__()
self.n_outputs = n_outputs
self.n_inputs = n_inputs
self.Uz = self.add_variable("Uz", shape=[self.n_inputs, self.n_outputs])
self.Ug = self.add_variable("Ug", shape=[self.n_inputs, self.n_outputs])
self.Ur = self.add_variable("Ur", shape=[self.n_inputs, self.n_outputs])
self.Uh = self.add_variable("Uh", shape=[self.n_inputs, self.n_outputs])
self.Wz = self.add_variable("Wz", shape=[self.n_outputs, self.n_outputs])
self.Wg = self.add_variable("Wg", shape=[self.n_outputs, self.n_outputs])
self.Wr = self.add_variable("Wr", shape=[self.n_outputs, self.n_outputs])
self.Wh = self.add_variable("Wh", shape=[self.n_outputs, self.n_outputs]) = self.add_variable("bz", shape=[1, self.n_outputs]) = self.add_variable("bg", shape=[1, self.n_outputs]) = self.add_variable("br", shape=[1, self.n_outputs]) = self.add_variable("bh", shape=[1, self.n_outputs])
self.activation = _get_function(activation)
def call(self, inputs):
S = inputs['S']
X = inputs['X']
Z = self.activation(tf.add(tf.add(tf.matmul(X, self.Uz), tf.matmul(S, self.Wz)),
G = self.activation(tf.add(tf.add(tf.matmul(X, self.Ug), tf.matmul(S, self.Wg)),
R = self.activation(tf.add(tf.add(tf.matmul(X, self.Ur), tf.matmul(S, self.Wr)),
H = self.activation(tf.add(tf.add(tf.matmul(X, self.Uh), tf.matmul(tf.multiply(S, R), self.Wh)),
Snew = tf.add(tf.multiply(tf.subtract(tf.ones_like(G), G), H), tf.multiply(Z, S))
return Snew
def _get_function(name):
f = None
if name == "tanh":
f = tf.nn.tanh
elif name == "sigmoid":
f = tf.nn.sigmoid
elif name == "relu":
f = tf.nn.relu
elif not name:
f = tf.identity
assert f is not None
return f
# Sampling
def sampler(N1, N2, N3):
# Sampler #1: PDE domain
t1 = np.random.uniform(low=T0,
s1 = np.random.uniform(low=S1,
# Sampler #2: boundary condition
t2 = np.zeros(shape=(1, 1))
s2 = np.zeros(shape=(1, 1))
# Sampler #3: initial/terminal condition
t3 = T * np.ones((N3,1)) #Terminal condition
s3 = np.random.uniform(low=S1,
return (t1, s1, t2, s2, t3, s3)
# Loss function
def loss(model, t1, x1, t2, x2, t3, x3):
# Loss term #1: PDE
V = model(t1, x1)
V_t = tf.gradients(V, t1)[0]
V_x = tf.gradients(V, x1)[0]
V_xx = tf.gradients(V_x, x1)[0]
f = V_t + r*x1*V_x + 0.5*sigma**2*x1**2*V_xx - r*V
L1 = tf.reduce_mean(tf.square(f))
# Loss term #2: boundary condition
#L2 = tf.reduce_mean(tf.square(V))
# Loss term #3: initial/terminal condition
L3 = tf.reduce_mean(tf.square(model(t3, x3) - tf.math.maximum(x3-K,0)))
return (L1, L3)
# B-S's analytical known solution
def analytical_solution(t, x):
#C = SN(d1) - Xe- rt N(d2)
#S: spot price
#K: strike price
#T: time to maturity
#r: interest rate
#sigma: volatility of underlying asset
d1 = (np.log(x / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
d2 = (np.log(x / K) + (r - 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
call = (x * si.norm.cdf(d1, 0.0, 1.0) - K * np.exp(-r * T) * si.norm.cdf(d2, 0.0, 1.0))
return call
# Set random seeds
# Strike price
K = 0.5
# PDE parameters
r = 0.05 # Interest rate
sigma = 0.25 # Volatility
# Time limits
T0 = 0.0 + 1e-10 # Initial time
T = 1.0 # Terminal time
# Space limits
S1 = 0.0 + 1e-10 # Low boundary
S2 = 1.0 # High boundary
# Number of samples
NS_1 = 1000
NS_2 = 0
NS_3 = 100
t1, s1, t2, s2, t3, s3 = sampler(NS_1, NS_2, NS_3)
Now what I want to do is to iterate over different parameters and create a new ann for each iteration.
My plan was to do it in this way:
t1_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x1_t = tf.compat.v1.placeholder(tf.float32, [None,1])
t2_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x2_t = tf.compat.v1.placeholder(tf.float32, [None,1])
t3_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x3_t = tf.compat.v1.placeholder(tf.float32, [None,1])
volatility_list = [0.08]#[0.08, 0.16, 0.18, 0.2, 0.28]
stages_list = [10]#, 50, 100]
layers_list = [3]#, 5, 7]
npl_list = [3]#, 6, 9, 12, 15]
for sigma in volatility_list:
for st in stages_list:
for lay in layers_list:
for npl in npl_list:
# Neural Network definition
num_layers = lay
nodes_per_layer = npl
ann = DGMNet(num_layers, nodes_per_layer)
L1_t, L3_t = loss(ann, t1_t, x1_t, t2_t, x2_t, t3_t, x3_t)
loss_t = L1_t + L3_t
# Optimizer parameters
global_step = tf.Variable(1, trainable=False)
starter_learning_rate = 0.001
learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate, global_step,
100000, 0.96, staircase=True)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_t)
# Training parameters
steps_per_sample = st
sampling_stages = 100#2000
# Plot tensors
tplot_t = tf.compat.v1.placeholder(tf.float32, [None,1], name="tplot_t") # We name to recover it later
xplot_t = tf.compat.v1.placeholder(tf.float32, [None,1], name="xplot_t")
vplot_t = tf.identity(ann(tplot_t, xplot_t), name="vplot_t") # Trick for naming the trained model
# Training data holders
sampling_stages_list = []
elapsed_time_list = []
loss_list = []
L1_list = []
L3_list = []
# Train network!!
init_op = tf.compat.v1.global_variables_initializer()
sess = tf.compat.v1.Session()
for i in range(sampling_stages):
t1, x1, t2, x2, t3, x3 = sampler(NS_1, NS_2, NS_3)
start_time = time.clock()
for _ in range(steps_per_sample):
loss, L1, L3, _ =[loss_t, L1_t, L3_t, optimizer],
feed_dict = {t1_t:t1, x1_t:x1, t2_t:t2, x2_t:x2, t3_t:t3, x3_t:x3})
end_time = time.clock()
elapsed_time = end_time - start_time
text = "Stage: {:04d}, Loss: {:e}, L1: {:e}, L3: {:e}, {:f} seconds".format(i, loss, L1, L3, elapsed_time)
#goodness of fit
time_0 = 0
listofzeros = [time_0] * 100
prices_for_goodness = np.linspace(S1,S2, 100)
goodness_list = []
solution_goodness = analytical_solution(listofzeros, prices_for_goodness)
ttt = time_0*np.ones_like(prices_for_goodness.reshape(-1,1))
nn_goodness, =[vplot_t],
feed_dict={tplot_t:ttt, xplot_t:prices_for_goodness.reshape(-1,1)})
deviation_list = np.abs(solution_goodness - nn_goodness)/(T-T0)
Unfortunately as soon as it ends the first iteration I get a TypeError that 'numpy.float32' object is not callable
Error Traceback:
TypeError Traceback (most recent call last)
<ipython-input-14-bb14643d0c42> in <module>()
---> 12 L1_t, L3_t = loss(ann, t1_t, x1_t, t2_t, x2_t, t3_t, x3_t)
13 loss_t = L1_t + L3_t
TypeError: 'numpy.float32' object is not callable
I guess that the problem is with the creation of the placeholders, however I am not sure how to solve it. Maybe one of you can help me
Thanks in advance!
Did you create a variable called 'loss'? It seems that the loss function is redefined by a variable with the same name, so then python tries to call that variable as a function.

Deep neural-network with backpropagation implementation does not work - python

I want to implement a multilayer NN with backpropagation. I have been trying for days, but it simply does not work. It is extremely clear in my head how it is supposed to work, I have streamline my code to be as simple as possible but I can't do it. It's probably something stupid, but I cannot see it.
The implementation I have done is with an input layer of 784 (28x28), two (L) hidden layers of 300 and an output of 10 classes. I have a bias in every layer (except last...)
The output activation is softmax and the hidden activation is ReLU.
I use mini batches of 600 examples over a dataset of 60k examples with 50 to 500 epoches.
Here the core of my code:
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
L = 2
K = len(np.unique(train_labels))
lr = 0.001
nb_epochs = 50
node_per_hidden_layer = 300
nb_batches = 100
W = []
losses_test = []
X_train = np.reshape(train_images, (train_images.shape[0], train_images.shape[1]*train_images.shape[2]))
X_test = np.reshape(test_images, (test_images.shape[0], train_images.shape[1]*train_images.shape[2]))
Y_train = np.zeros((train_labels.shape[0], K))
Y_train[np.arange(Y_train.shape[0]), train_labels] = 1
Y_test = np.zeros((test_labels.shape[0], K))
Y_test[np.arange(Y_test.shape[0]), test_labels] = 1
W.append(np.random.normal(0, 0.01, (X_train.shape[1]+1, node_per_hidden_layer)))
for i in range(L-1):
W.append(np.random.normal(0, 0.01, (node_per_hidden_layer+1, node_per_hidden_layer)))
W.append(np.random.normal(0, 0.01, (node_per_hidden_layer+1, K)))
Helper function:
def softmax(z):
exp = np.exp(z - z.max(1)[:,np.newaxis])
return np.array(exp / exp.sum(1)[:,np.newaxis])
def softmax_derivative(z):
sm = softmax(z)
return sm * (1-sm)
def ReLU(z):
return np.maximum(z, 0)
def ReLU_derivative(z):
return (z >= 0).astype(int)
def get_loss(y, y_pred):
return -np.sum(y * np.log(y_pred))
def fit():
minibatch_size = len(X_train) // nb_batches
for epoch in range(nb_epochs):
permutaion = list(np.random.permutation(X_train.shape[0]))
X_shuffle = X_train[permutaion]
Y_shuffle = Y_train[permutaion]
print("Epoch----------------", epoch)
for batche in range(0, X_shuffle.shape[0], minibatch_size):
Z = [None] * (L + 2)
a = [None] * (L + 2)
delta = [None] * (L + 2)
X = X_train[batche:batche+minibatch_size]
Y = Y_shuffle[batche:batche+minibatch_size]
### forward propagation
a[0] = np.append(X, np.ones((minibatch_size, 1)), axis=1)
for i in range(L):
Z[i + 1] = a[i] # W[i]
a[i + 1] = np.append(ReLU(Z[i+1]), np.ones((minibatch_size, 1), dtype=int), axis=1)
Z[-1] = a[L] # W[L]
a[-1] = softmax(Z[-1])
### back propagation
delta[-1] = (Y - a[-1]) * softmax_derivative(Z[-1])
for i in range(L, 0, -1):
delta[i] = (delta[i+1] # W[i].T)[:,:-1] * ReLU_derivative(Z[i])
for i in range(len(W)):
g = a[i].T # delta[i+1] / minibatch_size
W[i] = W[i] + lr * g
def get_loss_on_test():
Z_test = [None] * (L + 2)
a_test = [None] * (L + 2)
a_test[0] = np.append(X_test, np.ones((len(X_test), 1)), axis=1)
for i in range(L):
Z_test[i + 1] = a_test[i] # W[i]
a_test[i + 1] = np.append(ReLU(Z_test[i+1]), np.ones((len(X_test), 1)), axis=1)
Z_test[-1] = a_test[L] # W[L]
a_test[-1] = softmax(Z_test[-1])
losses_test.append(get_loss(Y_test, a_test[-1]))
If you want to see it in my notebook with an example of losses graph, here the link:
If you want more details on my assignment, this is part 1b (page 2 for english):

Creating a Neural Network layer has sum of N tf.exp function in Tensorflow

I want to create a function f which will take input x and gives output y as: y = f(x)
f(x) = c1 exp( (x-m1) /l1 ) + c2 exp( (x-m2) /l2 ) + ......
here, x is input tensor. cn, mn and ln (n=1 to N) are Tensorflow scalar variables (N*3 variables).
Solution 1) Explicit broadcasting then reduction:
tiling = [1] * len(x.get_shape().as_list()) + [n]
res = tf.reduce_sum(
c * tf.exp((tf.tile(tf.expand_dims(x, axis=-1), tiling) - m) / l),
Solution 2) Looping over n:
condition = lambda i, y: i < n
operation = lambda i, y: [i+1, y + c[i] * tf.exp((x - m[i]) / l[i])]
_, res = tf.while_loop(condition, operation,
loop_vars=[0, tf.zeros_like(x)],
shape_invariants=[tf.TensorShape([]), x.get_shape()])
import tensorflow as tf
import numpy as np
# Inputs for example:
x_val = np.random.rand(3, 2)
n = 3
c_val, m_val, l_val = np.random.rand(3, n)
x = tf.constant(x_val)
c, m, l = tf.constant(c_val), tf.constant(m_val), tf.constant(l_val)
# Getting numpy result for comparison:
res = np.sum([c_val[i] * np.exp((x_val - m_val[i]) / l_val[i]) for i in range(n)], axis=0)
# [[ 2.55195594 0.42834575]
# [ 0.29125215 0.29025419]
# [ 0.74048059 1.63411303]]
# Solution 1:
tiling = [1] * len(x.get_shape().as_list()) + [n]
res_broad = tf.reduce_sum(c * tf.exp((tf.tile(tf.expand_dims(x, axis=-1), tiling) - m) / l),
# Solution 2:
condition = lambda i, y: i < n
operation = lambda i, y: [i+1, y + c[i] * tf.exp((x - m[i]) / l[i])]
_, res_loop = tf.while_loop(condition, operation,
loop_vars=[0, tf.zeros_like(x)],
shape_invariants=[tf.TensorShape([]), x.get_shape()])
with tf.Session() as sess:
# [[2.55195594 0.42834575]
# [0.29125215 0.29025419]
# [0.74048059 1.63411303]]
# [[2.55195594 0.42834575]
# [0.29125215 0.29025419]
# [0.74048059 1.63411303]]