incompatible array types are mixed in the forward input (LinearFunction) in machine learning - pandas

I have trained a deep Q-Learning model using Chanier:
class Q_Network (chainer.Chain):
def __init__(self, input_size, hidden_size, output_size):
super (Q_Network, self).__init__ (
fc1=L.Linear (input_size, hidden_size),
fc2=L.Linear (hidden_size, hidden_size),
fc3=L.Linear (hidden_size, output_size)
)
def __call__(self, x):
h = F.relu (self.fc1 (x))
h = F.relu (self.fc2 (h))
y = self.fc3 (h)
return y
def reset(self):
self.zerograds ()
def train_dqn(env):
Q = Q_Network (input_size=env.history_t + 1, hidden_size=100, output_size=3)
Q_ast = copy.deepcopy (Q)
optimizer = chainer.optimizers.Adam ()
optimizer.setup (Q)
epoch_num = 50
step_max = len (env.data) - 1
memory_size = 200
batch_size = 20
# epsilon = 1.0
epsilon = 0.9
epsilon_decrease = 1e-3
epsilon_min = 0.1
start_reduce_epsilon = 200
train_freq = 10
update_q_freq = 20
# gamma = 0.97
gamma = 0.9
show_log_freq = 5
memory = []
total_step = 0
total_rewards = []
total_losses = []
start = time.time ()
for epoch in range (epoch_num):
pobs = env.reset ()
step = 0
done = False
total_reward = 0
total_loss = 0
while not done and step < step_max:
# select act
pact = np.random.randint (3)
if np.random.rand () > epsilon:
pact = Q (np.array (pobs, dtype=np.float32).reshape (1, -1))
pact = np.argmax (pact.data)
# act
obs, reward, done = env.step (pact)
# add memory
memory.append ((pobs, pact, reward, obs, done))
if len (memory) > memory_size:
memory.pop (0)
# train or update q
if len (memory) == memory_size:
if total_step % train_freq == 0:
shuffled_memory = np.random.permutation (memory)
memory_idx = range (len (shuffled_memory))
for i in memory_idx[::batch_size]:
batch = np.array (shuffled_memory[i:i + batch_size])
b_pobs = np.array (batch[:, 0].tolist (), dtype=np.float32).reshape (batch_size, -1)
b_pact = np.array (batch[:, 1].tolist (), dtype=np.int32)
b_reward = np.array (batch[:, 2].tolist (), dtype=np.int32)
b_obs = np.array (batch[:, 3].tolist (), dtype=np.float32).reshape (batch_size, -1)
b_done = np.array (batch[:, 4].tolist (), dtype=np.bool)
q = Q (b_pobs)
maxq = np.max (Q_ast (b_obs).data, axis=1)
target = copy.deepcopy (q.data)
for j in range (batch_size):
target[j, b_pact[j]] = b_reward[j] + gamma * maxq[j] * (not b_done[j])
Q.reset ()
loss = F.mean_squared_error (q, target)
total_loss += loss.data
loss.backward ()
optimizer.update ()
if total_step % update_q_freq == 0:
Q_ast = copy.deepcopy (Q)
# epsilon
if epsilon > epsilon_min and total_step > start_reduce_epsilon:
epsilon -= epsilon_decrease
# next step
total_reward += reward
pobs = obs
step += 1
total_step += 1
total_rewards.append (total_reward)
total_losses.append (total_loss)
if (epoch + 1) % show_log_freq == 0:
log_reward = sum (total_rewards[((epoch + 1) - show_log_freq):]) / show_log_freq
log_loss = sum (total_losses[((epoch + 1) - show_log_freq):]) / show_log_freq
elapsed_time = time.time () - start
print ('\t'.join (map (str, [epoch + 1, epsilon, total_step, log_reward, log_loss, elapsed_time])))
start = time.time ()
return Q, total_losses, total_rewards
if __name__ == "__main__":
Q, total_losses, total_rewards = train_dqn (Environment1 (train))
serializers.save_npz(r'C:\Users\willi\Desktop\dqn\dqn.model', Q)
After saved the model,I call the model again and feed data in it to let it predict:
load model:
model = Q_Network (input_size=91, hidden_size=100, output_size=3)
serializers.load_npz(r'C:\Users\willi\Desktop\dqn\dqn.model', model)
feed one row dataļ¼š
data = pd.read_csv (r'C:\Users\willi\Downloads\spyv.csv')
the data is looks like:
open high low close volume datetime
0 236.250 239.01 236.22 238.205 2327395 30600
1 238.205 240.47 238.00 239.920 1506096 30660
2 239.955 240.30 238.85 239.700 1357531 30720
3 239.690 243.33 239.66 241.650 1265604 30780
4 241.570 242.13 240.20 240.490 896000 30840
Now predict:
x = data.iloc[1].to_numpy()
y = model(x)
But the error says:
IndexError: tuple index out of range
The full error is:
IndexError Traceback (most recent call last)
<ipython-input-7-b745008aa965> in <module>
64
65 x = data.iloc[1].to_numpy()
---> 66 y = Q(x)
67
68
~\ddqn.ipynb in __call__(self, x)
~\Anaconda3\lib\site-packages\chainer\link.py in __call__(self, *args, **kwargs)
285 # forward is implemented in the child classes
286 forward = self.forward # type: ignore
--> 287 out = forward(*args, **kwargs)
288
289 # Call forward_postprocess hook
~\Anaconda3\lib\site-packages\chainer\links\connection\linear.py in forward(self, x, n_batch_axes)
181 in_size = utils.size_of_shape(x.shape[n_batch_axes:])
182 self._initialize_params(in_size)
--> 183 return linear.linear(x, self.W, self.b, n_batch_axes=n_batch_axes)
~\Anaconda3\lib\site-packages\chainer\functions\connection\linear.py in linear(x, W, b, n_batch_axes)
306 args = x, W, b
307
--> 308 y, = LinearFunction().apply(args)
309 if n_batch_axes > 1:
310 y = y.reshape(batch_shape + (-1,))
~\Anaconda3\lib\site-packages\chainer\function_node.py in apply(self, inputs)
305
306 if configuration.config.type_check:
--> 307 self._check_data_type_forward(in_data)
308
309 self.check_layout_forward(input_vars)
~\Anaconda3\lib\site-packages\chainer\function_node.py in _check_data_type_forward(self, in_data)
444 try:
445 with type_check.light_mode:
--> 446 self.check_type_forward(in_type)
447 return
448 except type_check.InvalidType:
~\Anaconda3\lib\site-packages\chainer\functions\connection\linear.py in check_type_forward(self, in_types)
27 x_type.ndim == 2,
28 w_type.ndim == 2,
---> 29 x_type.shape[1] == w_type.shape[1],
30 )
31 if type_check.eval(n_in) == 3:
IndexError: tuple index out of range

TypeError: incompatible array types are mixed in the forward input (LinearFunction).
Actual: <class 'pandas.core.frame.DataFrame'>, <class 'numpy.ndarray'>, <class 'numpy.ndarray'>
The error says that your input is a pandas.core.frame.DataFrame while your model parameters are numpy.ndarray.
You need to convert your pandas dataframe data to numpy using .to_numpy().
Probably you will face other issues with the format of the data and you will need to manipulate it to match your training examples.
x = data.iloc[1].to_numpy()
y = model(x)

Related

ValueError: The two structures don't have the same sequence length. Input structure has length 1, while shallow structure has length 2

What is the solution to the following error in tensorflow.
ValueError: The two structures don't have the same sequence length.
Input structure has length 1, while shallow structure has length 2.
I tried tensorflow versions: 2.9.1 and 2.4.0.
The toy example is given to reproduce the error.
import tensorflow as tf
d1 = tf.data.Dataset.range(10)
d1 = d1.map(lambda x:tf.cast([x], tf.float32))
def func1(x):
y1 = 2.0 * x
y2 = -3.0 * x
return tuple([y1, y2])
d2 = d1.map(lambda x: tf.py_function(func1, [x], [tf.float32, tf.float32]))
d3 = d2.padded_batch(3, padded_shapes=(None,))
for x, y in d2.as_numpy_iterator():
pass
The full error is:
ValueError Traceback (most recent call last)
~/Documents/pythonProject/tfProjects/asr/transformer/dataset.py in <module>
256 return tuple([y1, y2])
257 d2 = d1.map(lambda x: tf.py_function(func1, [x], [tf.float32, tf.float32]))
---> 258 d3 = d2.padded_batch(3, padded_shapes=(None,))
259 for x, y in d2.as_numpy_iterator():
260 pass
~/miniconda3/envs/jtf2/lib/python3.7/site-packages/tensorflow/python/data/ops/dataset_ops.py in padded_batch(self, batch_size, padded_shapes, padding_values, drop_remainder, name)
1887 padding_values,
1888 drop_remainder,
-> 1889 name=name)
1890
1891 def map(self,
~/miniconda3/envs/jtf2/lib/python3.7/site-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, input_dataset, batch_size, padded_shapes, padding_values, drop_remainder, name)
5171
5172 input_shapes = get_legacy_output_shapes(input_dataset)
-> 5173 flat_padded_shapes = nest.flatten_up_to(input_shapes, padded_shapes)
5174
5175 flat_padded_shapes_as_tensors = []
~/miniconda3/envs/jtf2/lib/python3.7/site-packages/tensorflow/python/data/util/nest.py in flatten_up_to(shallow_tree, input_tree)
377 `input_tree`.
378 """
--> 379 assert_shallow_structure(shallow_tree, input_tree)
380 return list(_yield_flat_up_to(shallow_tree, input_tree))
381
~/miniconda3/envs/jtf2/lib/python3.7/site-packages/tensorflow/python/data/util/nest.py in assert_shallow_structure(shallow_tree, input_tree, check_types)
290 if len(input_tree) != len(shallow_tree):
291 raise ValueError(
--> 292 "The two structures don't have the same sequence length. Input "
293 f"structure has length {len(input_tree)}, while shallow structure "
294 f"has length {len(shallow_tree)}.")
ValueError: The two structures don't have the same sequence length. Input structure has length 1, while shallow structure has length 2.
The following modification in padded_shapes argument will resolve the error.
import tensorflow as tf
d1 = tf.data.Dataset.range(10)
d1 = d1.map(lambda x:tf.cast([x], tf.float32))
def func1(x):
y1 = 2.0 * x
y2 = -3.0 * x
return tuple([y1, y2])
d2 = d1.map(lambda x: tf.py_function(func1, [x], [tf.float32, tf.float32]))
d3 = d2.padded_batch(3, padded_shapes=([None],[None]))
for x, y in d2.as_numpy_iterator():
pass

How to fix type error: Caught TypeError in DataLoader worker process 1

I got a TypeError while training my model:
enter image description here
here is my data preprocessing code:
class CriteoDatasetOtherSplit(torch.utils.data.Dataset):
"""
Criteo Display Advertising Challenge Dataset
Data prepration:
* Remove the infrequent features (appearing in less than threshold instances) and treat them as a single feature
* Discretize numerical values by log2 transformation which is proposed by the winner of Criteo Competition
:param dataset_path: criteo train.txt path.
:param cache_path: lmdb cache path.
:param rebuild_cache: If True, lmdb cache is refreshed.
:param min_threshold: infrequent feature threshold.
Reference:
https://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset
https://www.csie.ntu.edu.tw/~r01922136/kaggle-2014-criteo.pdf
"""
def __init__(self, dataset_path=None, cache_path='./criteo', rebuild_cache=False, min_threshold=8):
self.NUM_FEATS = 39
self.NUM_INT_FEATS = 13
self.min_threshold = min_threshold
if rebuild_cache or not Path(cache_path).exists():
shutil.rmtree(cache_path, ignore_errors=True)
if dataset_path is None:
raise ValueError('create cache: failed: dataset_path is None')
self.__build_cache(dataset_path, cache_path)
self.env = lmdb.open(cache_path, create=False, lock=False, readonly=True)
with self.env.begin(write=False) as txn:
self.length = txn.stat()['entries'] - 1
self.field_dims = np.frombuffer(txn.get(b'field_dims'), dtype=np.uint32)
self.other_dims = np.frombuffer(txn.get(b'other_dims'), dtype=np.uint32)
def __getitem__(self, index):
with self.env.begin(write=False) as txn:
np_array = np.frombuffer(
txn.get(struct.pack('>I', index)), dtype=np.uint32).astype(dtype=np.long)
return np_array[1:], np_array[0]
def __len__(self):
return self.length
def __build_cache(self, path, cache_path):
feat_mapper, other_feat_mapper, defaults = self.__get_feat_mapper(path)
with lmdb.open(cache_path, map_size=int(1e11)) as env:
field_dims = np.zeros(self.NUM_FEATS, dtype=np.uint32)
other_dims = np.zeros(self.NUM_FEATS, dtype=np.uint32)
for i, fm in other_feat_mapper.items():
other_dims[i - 1] = len(fm)
for i, fm in feat_mapper.items():
field_dims[i - 1] = len(fm) + other_dims[i - 1]
with env.begin(write=True) as txn:
txn.put(b'field_dims', field_dims.tobytes())
txn.put(b'other_dims', other_dims.tobytes())
for buffer in self.__yield_buffer(path, feat_mapper, other_feat_mapper, defaults):
with env.begin(write=True) as txn:
for key, value in buffer:
txn.put(key, value)
def __get_feat_mapper(self, path):
feat_cnts = defaultdict(lambda: defaultdict(int))
with open(path) as f:
pbar = tqdm(f, mininterval=1, smoothing=0.1)
pbar.set_description('Create criteo dataset cache: counting features')
for line in pbar:
values = line.rstrip('\n').split('\t')
if len(values) != self.NUM_FEATS + 1:
continue
for i in range(1, self.NUM_INT_FEATS + 1):
feat_cnts[i][convert_numeric_feature(values[i])] += 1
for i in range(self.NUM_INT_FEATS + 1, self.NUM_FEATS + 1):
feat_cnts[i][values[i]] += 1
feat_mapper = {i: {feat for feat, c in cnt.items() if c >= self.min_threshold} for i, cnt in feat_cnts.items()}
other_feat_mapper = {i: {feat for feat, c in cnt.items() if c < self.min_threshold} for i, cnt in feat_cnts.items()}
feat_mapper = {i: {feat: idx for idx, feat in enumerate(cnt)} for i, cnt in feat_mapper.items()}
other_feat_mapper = {i: {feat: idx for idx, feat in enumerate(cnt)} for i, cnt in other_feat_mapper.items()}
defaults = {i: len(cnt) for i, cnt in feat_mapper.items()}
return feat_mapper, other_feat_mapper, defaults
def __yield_buffer(self, path, feat_mapper, other_feat_mapper, defaults, buffer_size=int(1e5)):
item_idx = 0
buffer = list()
with open(path) as f:
pbar = tqdm(f, mininterval=1, smoothing=0.1)
pbar.set_description('Create criteo dataset cache: setup lmdb')
for line in pbar:
values = line.rstrip('\n').split('\t')
if len(values) != self.NUM_FEATS + 1:
continue
np_array = np.zeros(self.NUM_FEATS + 1, dtype=np.uint32)
np_array[0] = int(values[0])
for i in range(1, self.NUM_INT_FEATS + 1):
other_feat_mapper[i].setdefault(convert_numeric_feature(values[i]), 0)
np_array[i] = feat_mapper[i].get(convert_numeric_feature(values[i]),
other_feat_mapper[i][convert_numeric_feature(values[i])]+defaults[i])
for i in range(self.NUM_INT_FEATS + 1, self.NUM_FEATS + 1):
other_feat_mapper[i].setdefault(values[i], 0)
np_array[i] = feat_mapper[i].get(values[i], other_feat_mapper[i][values[i]]+defaults[i])
buffer.append((struct.pack('>I', item_idx), np_array.tobytes()))
item_idx += 1
if item_idx % buffer_size == 0:
yield buffer
buffer.clear()
yield buffer
#lru_cache(maxsize=None)
def convert_numeric_feature(val: str):
if val == '':
return 'NULL'
v = int(val)
if v > 2:
return str(int(math.log(v) ** 2))
else:
return str(v - 2)

Error when trying to implement mAP as metrics in yolov1 training using tensorflow

I am trying to implement mAP as the main metric for yolov1 training. It ran fine for several epochs and was able to give the mAP value along with its loss for each batch. but after several epochs, it would crash, and I can't figure out what was wrong.
This is the error code that I got:
InvalidArgumentError: in user code:
C:\Users\DeepLab\AppData\Local\Temp/ipykernel_11432/1408655327.py:105 mean_average_precision *
if iou > best_iou:
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py:1172 if_stmt
_tf_if_stmt(cond, body, orelse, get_state, set_state, symbol_names, nouts)
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py:1180 _tf_if_stmt
cond = _verify_tf_condition(cond, 'if statement')
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py:139 _verify_tf_condition
cond = array_ops.reshape(cond, ())
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
return target(*args, **kwargs)
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\ops\array_ops.py:196 reshape
result = gen_array_ops.reshape(tensor, shape, name)
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\ops\gen_array_ops.py:8397 reshape
return reshape_eager_fallback(
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\ops\gen_array_ops.py:8422 reshape_eager_fallback
_result = _execute.execute(b"Reshape", 1, inputs=_inputs_flat, attrs=_attrs,
C:\Users\DeepLab\anaconda3\envs\GPU\lib\site-packages\tensorflow\python\eager\execute.py:59 quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
InvalidArgumentError: Input to reshape is a tensor with 0 values, but the requested shape has 1 [Op:Reshape]
For calculating mAP, I use these functions:
intersection_over_union used to return iou in tensor type
convert_cellboxes used to return the label value measured from the shape of the image
cellboxes_to_boxes used to return the list of lists containing 6 values (class_idx, confident, x, y, w, h)
non_max_suppression used to return the filtered version of cellboxes_to_boxes output
get_bboxes used to return a list containing 7 value, img_idx, class_idx, confident, x, y, w, h). It will be used as an input to calculate mAP.
mean_average_precisions is used to calculate mAP.
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
if box_format == "midpoint":
box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2 ## ==> x - w / 2 for each grid in each image
box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2 ## ==> y - h / 2 for each grid in each image
box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2 ## ==> x + w / 2 for each grid in each image
box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2 ## ==> y + h / 2 for each grid in each image
box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
if box_format == "corners":
box1_x1 = boxes_preds[..., 0:1]
box1_y1 = boxes_preds[..., 1:2]
box1_x2 = boxes_preds[..., 2:3]
box1_y2 = boxes_preds[..., 3:4] # (N, 1)
box2_x1 = boxes_labels[..., 0:1]
box2_y1 = boxes_labels[..., 1:2]
box2_x2 = boxes_labels[..., 2:3]
box2_y2 = boxes_labels[..., 3:4]
x1 = K.max((box1_x1, box2_x1))
y1 = K.max((box1_y1, box2_y1))
x2 = K.min((box1_x2, box2_x2))
y2 = K.min((box1_y2, box2_y2))
intersection = K.clip((x2-x1), min_value=0, max_value=abs(x2-x1)) * K.clip((y2-y1), min_value=0, max_value=abs(y2-y1))
#intersection = 2
box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
return intersection / (box1_area + box2_area - intersection + 1e-6)
def convert_cellboxes(predictions, S=7): #array (n, 7, 7, 30) (n, 7 x 7, 30)
#batch_size = predictions.shape[0]
try:
n = batch_size
predictions = K.reshape(predictions, (n, 7, 7, 30))
except:
n = len(X_val)%batch_size
predictions = K.reshape(predictions, (n, 7, 7, 30))
bboxes1 = predictions[..., 21:25]
bboxes2 = predictions[..., 26:30]
scores = tf.concat(
(tf.expand_dims(predictions[..., 20], 0), tf.expand_dims(predictions[..., 25], 0)), axis=0 #(1, 7, 7, 2)
) ## (n, 7, 7, 2)
best_box = tf.expand_dims(K.argmax(scores, 0), -1)
#print(best_box)
best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2 ##(7, 7, 4)
cell_indices = tf.expand_dims(tf.tile(tf.range(start=0, limit=7, delta=1), (7,)), -1) # (49, 1) (1, 7, 7, 1)
cell_indices = tf.repeat(tf.reshape(cell_indices, (1, 7, 7, 1)), n, 0) ## reshape from (49, 1) to (n, 7, 7, 1)
best_boxes = tf.cast(best_boxes, tf.float32)
cell_indices = tf.cast(cell_indices, tf.float32)
x = 1 / S * (best_boxes[..., :1] + cell_indices)
y = 1 / S * (best_boxes[..., 1:2] + K.permute_dimensions(cell_indices, (0, 2, 1, 3)))
w_h = 1 / S * best_boxes[..., 2:4]
converted_bboxes = tf.concat((x, y, w_h), axis=-1) # dimensi terakhir = 4
predicted_class = tf.expand_dims(K.argmax(predictions[..., :20], -1), -1) #n, 7, 7, 1
best_confidence = tf.expand_dims(K.max((predictions[..., 20], predictions[..., 25]), 0), -1)
predicted_class = tf.cast(predicted_class, tf.float32)
best_confidence = tf.cast(best_confidence, tf.float32)
#print(predicted_class.shape)
#print(best_confidence.shape)
#print(converted_bboxes.shape)
converted_preds = tf.concat(
(predicted_class, best_confidence, converted_bboxes), -1 # n, 7, 7, 6
)
#print(converted_preds.shape)
return converted_preds
def cellboxes_to_boxes(out, S=7):
try:
n = batch_size
converted_pred = K.reshape(convert_cellboxes(out), (n, S * S, -1)) # (n, 49, 6)
except:
n = len(X_val)%batch_size
converted_pred = K.reshape(convert_cellboxes(out), (n, S * S, -1)) # (n, 49, 6)
#print(converted_pred.shape)
converted_pred = converted_pred.numpy() # mode graph
all_bboxes = []
for ex_idx in range(out.shape[0]):
bboxes = []
for bbox_idx in range(S * S):
bboxes.append([x for x in converted_pred[ex_idx, bbox_idx, :]])
all_bboxes.append(bboxes)
return all_bboxes
def non_max_suppression(bboxes, iou_threshold, threshold, box_format="midpoint"):
#bboxes = bboxes[0]
#print(bboxes[:2])
#for i, box in enumerate(bboxes):
# bboxes[i][4:6] = box[4:6] * 7
#print(bboxes[:2])
assert type(bboxes) == list
bboxes = [box for box in bboxes if box[1] > threshold]
bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
bboxes_after_nms = []
while bboxes:
chosen_box = bboxes.pop(0)
bboxes = [
box # (6)
for box in bboxes
if box[0] != chosen_box[0]
or intersection_over_union(
tf.constant(box[2:]),
tf.constant(chosen_box[2:]),
box_format=box_format,
)
< iou_threshold
]
bboxes_after_nms.append(chosen_box)
return bboxes_after_nms
def get_bboxes(gt_labels, pred_labels, iou_threshold, threshold, box_format="midpoint"):
"""
return:
images_pred_boxes = list with each element in this format (image_idx, class_prediction, prob_score, x, y, w, h)
images_gt_boxes = list with each element in this format (image_idx, class, prob_score, x, y, w, h)
"""
images_pred_boxes = []
images_gt_boxes = []
#pred_labels = model.predict(images) # data training, validation, testing
image_idx = 0
gt_boxes = cellboxes_to_boxes(gt_labels)
pred_boxes = cellboxes_to_boxes(pred_labels)
for i in range(len(gt_labels)):
pred_box_nms = non_max_suppression(pred_boxes[i], iou_threshold, threshold, box_format="midpoint")
for nms_box in pred_box_nms:
images_pred_boxes.append([image_idx] + nms_box)
for box in gt_boxes[i]:
if box[1] > threshold:
images_gt_boxes.append([image_idx] + box)
image_idx += 1
#print(images_pred_boxes[:10])
#print(images_gt_boxes[:10])
return images_pred_boxes, images_gt_boxes
def mean_average_precision(
y_true, y_pred, iou_threshold=0.5, box_format="midpoint", num_classes=20
):
pred_boxes, true_boxes = get_bboxes(y_true, y_pred, iou_threshold=0.6, threshold=0.3, box_format="midpoint")
# list storing all AP for respective classes
average_precisions = []
# used for numerical stability later on
epsilon = 1e-6
for c in range(num_classes):
detections = []
ground_truths = []
# Go through all predictions and targets,
# and only add the ones that belong to the
# current class c
for detection in pred_boxes:
if detection[1] == c:
detections.append(detection)
for true_box in true_boxes:
if true_box[1] == c:
ground_truths.append(true_box)
# find the amount of bboxes for each training example
# Counter here finds how many ground truth bboxes we get
# for each training example, so let's say img 0 has 3,
# img 1 has 5 then we will obtain a dictionary with:
# amount_bboxes = {0:3, 1:5, ..., 20: 10}
amount_bboxes = Counter([gt[0] for gt in ground_truths])
# We then go through each key, val in this dictionary
# and convert to the following (w.r.t same example):
# amount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
for key, val in amount_bboxes.items():
amount_bboxes[key] = np.zeros(val)
# sort by box probabilities which is index 2
detections.sort(key=lambda x: x[2], reverse=True)
TP = np.zeros((len(detections)))
FP = np.zeros((len(detections)))
total_true_bboxes = len(ground_truths)
# If none exists for this class then we can safely skip
if total_true_bboxes == 0:
continue
for detection_idx, detection in enumerate(detections):
# Only take out the ground_truths that have the same
# training idx as detection
ground_truth_img = [
bbox for bbox in ground_truths if bbox[0] == detection[0]
]
num_gts = len(ground_truth_img) #
best_iou = 0
best_gt_idx = 0
iou = 0
for idx, gt in enumerate(ground_truth_img):
iou = intersection_over_union(
tf.constant(detection[3:]),
tf.constant(gt[3:]),
box_format=box_format,
)
if iou > best_iou:
best_iou = iou
best_gt_idx = idx
if best_iou > iou_threshold:
# only detect ground truth detection once
if amount_bboxes[detection[0]][best_gt_idx] == 0:
# true positive and add this bounding box to seen
TP[detection_idx] = 1
amount_bboxes[detection[0]][best_gt_idx] = 1
else:
FP[detection_idx] = 1
# if IOU is lower then the detection is a false positive
else:
FP[detection_idx] = 1
TP = tf.constant(TP)
FP = tf.constant(FP)
#print(TP)
#print(FP)
TP_cumsum = tf.cumsum(TP, axis=0)
FP_cumsum = tf.cumsum(FP, axis=0)
recalls = TP_cumsum / (total_true_bboxes + epsilon)
precisions = tf.math.divide(TP_cumsum, (TP_cumsum + FP_cumsum + epsilon))
precisions = tf.concat((tf.cast(tf.constant([1]), precisions.dtype), precisions), axis=0)
recalls = tf.concat((tf.cast(tf.constant([0]), recalls.dtype), recalls), axis=0)
# torch.trapz for numerical integration
average_precisions.append(tfp.math.trapz(precisions, recalls))
return sum(average_precisions) / len(average_precisions)
for training, I used a standard model.fit with pascalvoc2007 as its dataset and a batch size of 4.

How to stop the iteration when the Jacobian reached to an arbitrary (small) value in Newton-CG method?

How to put a stopping condition on jacobian (or gradient) for Newton-CG methode?
I want the algorithme to stop when the jacobian reaches to 1e-2, is it possible to do with Newton-CG ??
input:
scipy.optimize.minimize(f, [5.0,1.0,2.0,5.0], args=Data, method='Newton-CG',jac=Jacf)
output:
jac: array([7.64265411e-08, 1.74985718e-08, 4.12408407e-07, 5.02972841e-08])
message: 'Optimization terminated successfully.'
nfev: 12
nhev: 0
nit: 11
njev: 68
status: 0
success: True
x: array([0.22545395, 0.3480084 , 1.06811724, 1.64873479])
in BFGS method, which is symilar to Newton-CG, there is a gtol option, it allows to stop the iteration when the gradient reaches to some value. But in Newton-CG theres no that type of option.
Does anyone know how to stop the iteration when the jacobien reaches to 1e-2.
Here are some details to reproduce my code:
def convert_line2matrix(a):
n = len(a)
if (np.sqrt(n) % 1 == 0) :
d = int(np.sqrt(n))
Mat = np.zeros((d,d))
for i in range(d):
for j in range(d):
Mat[i,j] = a[j+d*i]
else:
raise ValueError(f"{a} cant be converted into a (n x n) matrix. The array has {len(a)} elements, \n\t thus impossible to build a square matrix with {len(a)} elements.")
return Mat
def convert_matrix2line(Matrix):
result = []
dim = len(Matrix)
for i in range(dim):
for j in range(dim):
result.append(Matrix[i,j])
return np.array(result)
my_data = np.array([[0.21530249, 0.32450331, 0 ],
[0.1930605 , 0.31788079, 0 ],
[0.17793594, 0.31788079, 0 ],
[0.16459075, 0.31125828, 1 ],
[0.24822064, 0.31125828, 0 ],
[0.28647687, 0.32450331, 0 ],
[0.32829181, 0.31788079, 0 ],
[0.38879004, 0.32450331, 0 ],
[0.42882562, 0.32450331, 0 ],
[0.47419929, 0.32450331, 0 ],
[0.5044484 , 0.32450331, 0 ],
[0.1797153 , 0.31125828, 0 ],
[0.16548043, 0.31125828, 1 ],
[0.17793594, 0.29801325, 1 ],
[0.1930605 , 0.31788079, 0 ]])
Data = pd.DataFrame(my_data, columns=['X_1','X_2', 'Allum'])
def logLB(params,Data):
B = convert_line2matrix(params)
X = np.array(Data.iloc[:,:len(B)])
Y = np.array(Data.iloc[:,len(B)])
result = 0
n = len(Data)
BB = np.transpose(B) # B
for i in range(n):
if(1-np.exp(-X[i].T # BB # X[i]) > 0):
result += Y[i]*(-np.transpose(X[i]) # BB # X[i]) + (1 - Y[i])*np.log(1-np.exp(-X[i].T # BB # X[i]))
return result
def f(params, Data):
return -logLB(params, Data)
def dlogLB(params, Data):
B = convert_line2matrix(params)
X = np.array(Data.iloc[:,:len(B)])
Y = np.array(Data.iloc[:,len(B)])
BB = B.T # B
N = len(Data)
M = len(B)
Jacobian = np.zeros(np.shape(B))
for n in range(len(B)):
for m in range(len(B)):
result = 0
for c in range(N):
som = 0
for i in range(M):
som += X[c,m]*B[n,i]*X[c,i]
if (1 - np.exp(-X[c].T # BB # X[c]) > 0):
result += -2*Y[c]*som + (1-Y[c])*np.exp(-X[c].T # BB # X[c])*(2*som)/(1 - np.exp(-X[c].T # BB # X[c]))
Jacobian[n,m] = result
return convert_matrix2line(Jacobian)
def Jacf(params, Data):
return -dlogLB(params, Data)
I assume that you want to stop the optimizer as soon as the euclidian norm of the gradient reaches a specific value, which is exactly the meaning of the BFGS method's gtol option. Otherwise, it doesn't make any sense mathematically, since the evaluated gradient is a vector and thus can't be compared to a scalar value.
The Newton-CG method doesn't provide a similar option. However, you could use a simple callback that is called after each iteration and terminates the algorithm when the callback returns True. Unfortunately, you can only terminate the optimizer by a callback with the trust-constr method. For all other methods, the callback's return value is ignored, so it's very limited.
A possible hacky and ugly way to terminate the optimizer by the callback anyway would be raising an exception:
import numpy as np
from scipy.optimize import minimize
class Callback:
def __init__(self, eps, args, jac):
self.eps = eps
self.args = args
self.jac = jac
self.x = None
self.gtol = None
def __call__(self, xk):
self.x = xk
self.gtol = np.linalg.norm(self.jac(xk, *self.args))
if self.gtol <= self.eps:
raise Exception("Gradient norm is below threshold")
Here, xk is the current iterate, eps your desired tolerance, args a tuple containing your optional objective und gradient arguments and jac the gradient. Then, you can use it like this:
from scipy.optimize import minimize
cb = Callback(1.0e-1, (Data,), Jacf)
try:
res = minimize(f, [5.0,1.0,2.0,5.0], args=Data, method='Newton-CG',
jac=Jacf, callback=cb)
except:
x = cb.x
gtol = cb.gtol
print(f"gtol = {gtol:E}, x = {x}")
which yields
gtol = 5.515263E-02, x = [14.43322108 -5.18163542 0.22582261 -0.04859385]

TypeError when trying to make a loop creating artificial neural networks

I am working on an artifical neural network which I have created via subclassing.
The subclassing looks like this:
import time
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import scipy.stats as si
import sympy as sy
from sympy.stats import Normal, cdf
from sympy import init_printing
class DGMNet(tf.keras.Model):
def __init__(self, n_layers, n_nodes, dimensions=1):
"""
Parameters:
- n_layers: number of layers
- n_nodes: number of nodes in (inner) layers
- dimensions: number of spacial dimensions
"""
super().__init__()
self.n_layers = n_layers
self.initial_layer = DenseLayer(dimensions + 1, n_nodes, activation="relu")
self.lstmlikelist = []
for _ in range(self.n_layers):
self.lstmlikelist.append(LSTMLikeLayer(dimensions + 1, n_nodes, activation="relu"))
self.final_layer = DenseLayer(n_nodes, 1, activation=None)
def call(self, t, x):
X = tf.concat([t,x], 1)
S = self.initial_layer.call(X)
for i in range(self.n_layers):
S = self.lstmlikelist[i].call({'S': S, 'X': X})
result = self.final_layer.call(S)
return result
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, n_inputs, n_outputs, activation):
"""
Parameters:
- n_inputs: number of inputs
- n_outputs: number of outputs
- activation: activation function
"""
super(DenseLayer, self).__init__()
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.W = self.add_weight(shape=(self.n_inputs, self.n_outputs),
initializer='random_normal',
trainable=True)
self.b = self.add_weight(shape=(1, self.n_outputs),
initializer='random_normal',
trainable=True)
self.activation = _get_function(activation)
def call(self, inputs):
S = tf.add(tf.matmul(inputs, self.W), self.b)
S = self.activation(S)
return S
class LSTMLikeLayer(tf.keras.layers.Layer):
def __init__(self, n_inputs, n_outputs, activation):
"""
Parameters:
- n_inputs: number of inputs
- n_outputs: number of outputs
- activation: activation function
"""
super(LSTMLikeLayer, self).__init__()
self.n_outputs = n_outputs
self.n_inputs = n_inputs
self.Uz = self.add_variable("Uz", shape=[self.n_inputs, self.n_outputs])
self.Ug = self.add_variable("Ug", shape=[self.n_inputs, self.n_outputs])
self.Ur = self.add_variable("Ur", shape=[self.n_inputs, self.n_outputs])
self.Uh = self.add_variable("Uh", shape=[self.n_inputs, self.n_outputs])
self.Wz = self.add_variable("Wz", shape=[self.n_outputs, self.n_outputs])
self.Wg = self.add_variable("Wg", shape=[self.n_outputs, self.n_outputs])
self.Wr = self.add_variable("Wr", shape=[self.n_outputs, self.n_outputs])
self.Wh = self.add_variable("Wh", shape=[self.n_outputs, self.n_outputs])
self.bz = self.add_variable("bz", shape=[1, self.n_outputs])
self.bg = self.add_variable("bg", shape=[1, self.n_outputs])
self.br = self.add_variable("br", shape=[1, self.n_outputs])
self.bh = self.add_variable("bh", shape=[1, self.n_outputs])
self.activation = _get_function(activation)
def call(self, inputs):
S = inputs['S']
X = inputs['X']
Z = self.activation(tf.add(tf.add(tf.matmul(X, self.Uz), tf.matmul(S, self.Wz)), self.bz))
G = self.activation(tf.add(tf.add(tf.matmul(X, self.Ug), tf.matmul(S, self.Wg)), self.bg))
R = self.activation(tf.add(tf.add(tf.matmul(X, self.Ur), tf.matmul(S, self.Wr)), self.br))
H = self.activation(tf.add(tf.add(tf.matmul(X, self.Uh), tf.matmul(tf.multiply(S, R), self.Wh)), self.bh))
Snew = tf.add(tf.multiply(tf.subtract(tf.ones_like(G), G), H), tf.multiply(Z, S))
return Snew
def _get_function(name):
f = None
if name == "tanh":
f = tf.nn.tanh
elif name == "sigmoid":
f = tf.nn.sigmoid
elif name == "relu":
f = tf.nn.relu
elif not name:
f = tf.identity
assert f is not None
return f
# Sampling
def sampler(N1, N2, N3):
np.random.seed(42)
# Sampler #1: PDE domain
t1 = np.random.uniform(low=T0,
high=T,
size=[N1,1])
s1 = np.random.uniform(low=S1,
high=S2,
size=[N1,1])
# Sampler #2: boundary condition
t2 = np.zeros(shape=(1, 1))
s2 = np.zeros(shape=(1, 1))
# Sampler #3: initial/terminal condition
t3 = T * np.ones((N3,1)) #Terminal condition
s3 = np.random.uniform(low=S1,
high=S2,
size=[N3,1])
return (t1, s1, t2, s2, t3, s3)
# Loss function
def loss(model, t1, x1, t2, x2, t3, x3):
# Loss term #1: PDE
V = model(t1, x1)
V_t = tf.gradients(V, t1)[0]
V_x = tf.gradients(V, x1)[0]
V_xx = tf.gradients(V_x, x1)[0]
f = V_t + r*x1*V_x + 0.5*sigma**2*x1**2*V_xx - r*V
L1 = tf.reduce_mean(tf.square(f))
# Loss term #2: boundary condition
#L2 = tf.reduce_mean(tf.square(V))
# Loss term #3: initial/terminal condition
L3 = tf.reduce_mean(tf.square(model(t3, x3) - tf.math.maximum(x3-K,0)))
return (L1, L3)
# B-S's analytical known solution
def analytical_solution(t, x):
#C = SN(d1) - Xe- rt N(d2)
#S: spot price
#K: strike price
#T: time to maturity
#r: interest rate
#sigma: volatility of underlying asset
d1 = (np.log(x / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
d2 = (np.log(x / K) + (r - 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
call = (x * si.norm.cdf(d1, 0.0, 1.0) - K * np.exp(-r * T) * si.norm.cdf(d2, 0.0, 1.0))
return call
# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)
# Strike price
K = 0.5
# PDE parameters
r = 0.05 # Interest rate
sigma = 0.25 # Volatility
# Time limits
T0 = 0.0 + 1e-10 # Initial time
T = 1.0 # Terminal time
# Space limits
S1 = 0.0 + 1e-10 # Low boundary
S2 = 1.0 # High boundary
# Number of samples
NS_1 = 1000
NS_2 = 0
NS_3 = 100
t1, s1, t2, s2, t3, s3 = sampler(NS_1, NS_2, NS_3)
Now what I want to do is to iterate over different parameters and create a new ann for each iteration.
My plan was to do it in this way:
tf.compat.v1.disable_eager_execution()
t1_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x1_t = tf.compat.v1.placeholder(tf.float32, [None,1])
t2_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x2_t = tf.compat.v1.placeholder(tf.float32, [None,1])
t3_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x3_t = tf.compat.v1.placeholder(tf.float32, [None,1])
volatility_list = [0.08]#[0.08, 0.16, 0.18, 0.2, 0.28]
stages_list = [10]#, 50, 100]
layers_list = [3]#, 5, 7]
npl_list = [3]#, 6, 9, 12, 15]
for sigma in volatility_list:
for st in stages_list:
for lay in layers_list:
for npl in npl_list:
# Neural Network definition
num_layers = lay
nodes_per_layer = npl
ann = DGMNet(num_layers, nodes_per_layer)
L1_t, L3_t = loss(ann, t1_t, x1_t, t2_t, x2_t, t3_t, x3_t)
loss_t = L1_t + L3_t
# Optimizer parameters
global_step = tf.Variable(1, trainable=False)
starter_learning_rate = 0.001
learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate, global_step,
100000, 0.96, staircase=True)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_t)
# Training parameters
steps_per_sample = st
sampling_stages = 100#2000
# Plot tensors
tplot_t = tf.compat.v1.placeholder(tf.float32, [None,1], name="tplot_t") # We name to recover it later
xplot_t = tf.compat.v1.placeholder(tf.float32, [None,1], name="xplot_t")
vplot_t = tf.identity(ann(tplot_t, xplot_t), name="vplot_t") # Trick for naming the trained model
# Training data holders
sampling_stages_list = []
elapsed_time_list = []
loss_list = []
L1_list = []
L3_list = []
# Train network!!
init_op = tf.compat.v1.global_variables_initializer()
sess = tf.compat.v1.Session()
sess.run(init_op)
for i in range(sampling_stages):
t1, x1, t2, x2, t3, x3 = sampler(NS_1, NS_2, NS_3)
start_time = time.clock()
for _ in range(steps_per_sample):
loss, L1, L3, _ = sess.run([loss_t, L1_t, L3_t, optimizer],
feed_dict = {t1_t:t1, x1_t:x1, t2_t:t2, x2_t:x2, t3_t:t3, x3_t:x3})
end_time = time.clock()
elapsed_time = end_time - start_time
sampling_stages_list.append(i)
elapsed_time_list.append(elapsed_time)
loss_list.append(loss)
L1_list.append(L1)
L3_list.append(L3)
text = "Stage: {:04d}, Loss: {:e}, L1: {:e}, L3: {:e}, {:f} seconds".format(i, loss, L1, L3, elapsed_time)
print(text)
#goodness of fit
time_0 = 0
listofzeros = [time_0] * 100
prices_for_goodness = np.linspace(S1,S2, 100)
goodness_list = []
solution_goodness = analytical_solution(listofzeros, prices_for_goodness)
ttt = time_0*np.ones_like(prices_for_goodness.reshape(-1,1))
nn_goodness, = sess.run([vplot_t],
feed_dict={tplot_t:ttt, xplot_t:prices_for_goodness.reshape(-1,1)})
deviation_list = np.abs(solution_goodness - nn_goodness)/(T-T0)
print("{0:.2f}%".format(np.average(deviation_list)*100))
Unfortunately as soon as it ends the first iteration I get a TypeError that 'numpy.float32' object is not callable
Error Traceback:
TypeError Traceback (most recent call last)
<ipython-input-14-bb14643d0c42> in <module>()
10
11
---> 12 L1_t, L3_t = loss(ann, t1_t, x1_t, t2_t, x2_t, t3_t, x3_t)
13 loss_t = L1_t + L3_t
14
TypeError: 'numpy.float32' object is not callable
I guess that the problem is with the creation of the placeholders, however I am not sure how to solve it. Maybe one of you can help me
Thanks in advance!
Chris
Did you create a variable called 'loss'? It seems that the loss function is redefined by a variable with the same name, so then python tries to call that variable as a function.