Process finished with exit code 137 (interrupted by signal 9: SIGKILL) - pandas
I have added a large-scale key values pairs in python file which is about 20000 and i got below error after running a code.
Trace
(236167, 3)
Process finished with exit code 137 (interrupted by signal 9: SIGKILL)
File.py
import pandas as pd
import cupy.cuda as np
import itertools
from sklearn import metrics
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, auc
import matplotlib.pyplot as plt
from tqdm import tqdm
np.Device(0).use()
# --------------------------
# Data set
# Ref: https://github.com/serengil/deepface/tree/master/tests/dataset
idendities = {
"AnneBancroft": [
"13859_AnneBancroft_25_f.jpg",
"13872_AnneBancroft_73_f.jpg",
"13864_AnneBancroft_31_f.jpg",
"13870_AnneBancroft_61_f.jpg",
"13844_AnneBancroft_67_f.jpg",
"13863_AnneBancroft_22_f.jpg",
"13869_AnneBancroft_72_f.jpg",
"13843_AnneBancroft_60_f.jpg",
"13868_AnneBancroft_71_f.jpg",
"13860_AnneBancroft_66_f.jpg",
"13853_AnneBancroft_49_f.jpg",
"13842_AnneBancroft_51_f.jpg",
"13874_AnneBancroft_73_f.jpg",
"13846_AnneBancroft_44_f.jpg",
"13871_AnneBancroft_35_f.jpg",
"13857_AnneBancroft_24_f.jpg",
"13850_AnneBancroft_53_f.jpg",
"13865_AnneBancroft_41_f.jpg",
"13862_AnneBancroft_46_f.jpg",
"13852_AnneBancroft_69_f.jpg",
"13866_AnneBancroft_68_f.jpg",
"13873_AnneBancroft_25_f.jpg",
"13861_AnneBancroft_23_f.jpg",
"13848_AnneBancroft_52_f.jpg",
"13847_AnneBancroft_33_f.jpg",
"13851_AnneBancroft_28_f.jpg",
"13856_AnneBancroft_25_f.jpg",
"13845_AnneBancroft_31_f.jpg",
"13867_AnneBancroft_70_f.jpg",
"13854_AnneBancroft_70_f.jpg",
"13849_AnneBancroft_61_f.jpg",
"13855_AnneBancroft_28_f.jpg",
"13858_AnneBancroft_22_f.jpg"
],
"RoseMarie": [
"9907_RoseMarie_82_f.jpg",
"9893_RoseMarie_35_f.jpg",
"9911_RoseMarie_88_f.jpg",
"9906_RoseMarie_80_f.jpg",
"9895_RoseMarie_40_f.jpg",
"9901_RoseMarie_57_f.jpg",
"9903_RoseMarie_77_f.jpg",
"9892_RoseMarie_30_f.jpg",
"9909_RoseMarie_85_f.jpg",
"9900_RoseMarie_52_f.jpg",
"9897_RoseMarie_44_f.jpg",
"9904_RoseMarie_78_f.jpg",
"9905_RoseMarie_79_f.jpg",
"9898_RoseMarie_46_f.jpg",
"9908_RoseMarie_83_f.jpg",
"9902_RoseMarie_70_f.jpg",
"9896_RoseMarie_42_f.jpg",
"9899_RoseMarie_50_f.jpg",
"9910_RoseMarie_87_f.jpg",
"9894_RoseMarie_37_f.jpg"
],
"BobDylan": [
"1665_BobDylan_35_m.jpg",
"1651_BobDylan_23_m.jpg",
"1663_BobDylan_33_m.jpg",
"1682_BobDylan_64_m.jpg",
"1678_BobDylan_56_m.jpg",
"1684_BobDylan_68_m.jpg",
"1686_BobDylan_72_m.jpg",
"1645_BobDylan_16_m.jpg",
"1664_BobDylan_34_m.jpg",
"1680_BobDylan_61_m.jpg",
"1674_BobDylan_47_m.jpg",
"1656_BobDylan_26_m.jpg",
"1658_BobDylan_28_m.jpg",
"1667_BobDylan_40_m.jpg",
"1673_BobDylan_46_m.jpg",
"1668_BobDylan_41_m.jpg",
"1657_BobDylan_27_m.jpg",
"1685_BobDylan_71_m.jpg",
"1647_BobDylan_19_m.jpg",
"1660_BobDylan_30_m.jpg",
"1679_BobDylan_57_m.jpg",
"1672_BobDylan_45_m.jpg",
"1666_BobDylan_37_m.jpg",
"1650_BobDylan_22_m.jpg",
"1683_BobDylan_66_m.jpg",
"1652_BobDylan_23_m.jpg",
"1654_BobDylan_24_m.jpg",
"1687_BobDylan_74_m.jpg",
"1649_BobDylan_21_m.jpg",
"1677_BobDylan_54_m.jpg",
"1659_BobDylan_29_m.jpg",
"1675_BobDylan_48_m.jpg",
"1662_BobDylan_32_m.jpg",
"1671_BobDylan_44_m.jpg",
"1669_BobDylan_42_m.jpg",
"1653_BobDylan_24_m.jpg",
"1648_BobDylan_20_m.jpg",
"1681_BobDylan_62_m.jpg",
"1661_BobDylan_31_m.jpg",
"1670_BobDylan_43_m.jpg",
"1655_BobDylan_25_m.jpg",
"1676_BobDylan_49_m.jpg",
"1646_BobDylan_18_m.jpg"
],
"LorneGreene": [
"8137_LorneGreene_25_m.jpg",
"8145_LorneGreene_48_m.jpg",
"8140_LorneGreene_38_m.jpg",
"8138_LorneGreene_28_m.jpg",
"8139_LorneGreene_33_m.jpg",
"8149_LorneGreene_52_m.jpg",
"8154_LorneGreene_58_m.jpg",
"8142_LorneGreene_44_m.jpg",
"8162_LorneGreene_68_m.jpg",
"8155_LorneGreene_61_m.jpg",
"8164_LorneGreene_71_m.jpg",
"8147_LorneGreene_50_m.jpg",
"8151_LorneGreene_54_m.jpg",
"8163_LorneGreene_70_m.jpg",
"8150_LorneGreene_53_m.jpg",
"8156_LorneGreene_62_m.jpg",
"8160_LorneGreene_66_m.jpg",
"8146_LorneGreene_49_m.jpg",
"8144_LorneGreene_46_m.jpg",
"8158_LorneGreene_64_m.jpg",
"8152_LorneGreene_55_m.jpg",
"8159_LorneGreene_65_m.jpg",
"8161_LorneGreene_67_m.jpg",
"8157_LorneGreene_63_m.jpg",
"8141_LorneGreene_43_m.jpg",
"8143_LorneGreene_45_m.jpg",
"8136_LorneGreene_18_m.jpg",
"8153_LorneGreene_57_m.jpg",
"8148_LorneGreene_51_m.jpg"
],
"LaurenBacall": [
"11540_LaurenBacall_26_f.jpg",
"11539_LaurenBacall_25_f.jpg",
"11547_LaurenBacall_45_f.jpg",
"11549_LaurenBacall_72_f.jpg",
"11534_LaurenBacall_20_f.jpg",
"11559_LaurenBacall_31_f.jpg",
"11545_LaurenBacall_35_f.jpg",
"11546_LaurenBacall_40_f.jpg",
"11563_LaurenBacall_64_f.jpg",
"11555_LaurenBacall_82_f.jpg",
"11541_LaurenBacall_31_f.jpg",
"11564_LaurenBacall_27_f.jpg",
"11561_LaurenBacall_57_f.jpg",
"11552_LaurenBacall_75_f.jpg",
"11556_LaurenBacall_83_f.jpg",
"11543_LaurenBacall_31_f.jpg",
"11533_LaurenBacall_19_f.jpg",
"11557_LaurenBacall_85_f.jpg",
"11544_LaurenBacall_34_f.jpg",
"11535_LaurenBacall_21_f.jpg",
"11565_LaurenBacall_26_f.jpg",
"11558_LaurenBacall_42_f.jpg",
"11531_LaurenBacall_28_f.jpg",
"11536_LaurenBacall_22_f.jpg",
"11562_LaurenBacall_46_f.jpg",
"11554_LaurenBacall_81_f.jpg",
"11542_LaurenBacall_31_f.jpg",
"11537_LaurenBacall_22_f.jpg",
"11560_LaurenBacall_56_f.jpg",
"11548_LaurenBacall_65_f.jpg",
"11550_LaurenBacall_73_f.jpg",
"11530_LaurenBacall_17_f.jpg",
"11532_LaurenBacall_18_f.jpg",
"11566_LaurenBacall_20_f.jpg",
"11551_LaurenBacall_77_f.jpg",
"11538_LaurenBacall_23_f.jpg",
"11553_LaurenBacall_80_f.jpg"
],
"SerenaWilliams": [
"16468_SerenaWilliams_32_f.jpg",
"16486_SerenaWilliams_32_f.jpg",
"16479_SerenaWilliams_25_f.jpg",
"16474_SerenaWilliams_18_f.jpg",
"16472_SerenaWilliams_21_f.jpg",
"16008_SerenaWilliams_36_f.jpg",
"16484_SerenaWilliams_31_f.jpg",
"16469_SerenaWilliams_31_f.jpg",
"16478_SerenaWilliams_24_f.jpg",
"16485_SerenaWilliams_32_f.jpg",
"16480_SerenaWilliams_26_f.jpg",
"16481_SerenaWilliams_27_f.jpg",
"16487_SerenaWilliams_33_f.jpg",
"16477_SerenaWilliams_23_f.jpg",
"16010_SerenaWilliams_34_f.jpg",
"16483_SerenaWilliams_30_f.jpg",
"16471_SerenaWilliams_29_f.jpg",
"16009_SerenaWilliams_35_f.jpg",
"16476_SerenaWilliams_20_f.jpg",
"16475_SerenaWilliams_19_f.jpg",
"16482_SerenaWilliams_28_f.jpg",
"16007_SerenaWilliams_36_f.jpg",
"16470_SerenaWilliams_35_f.jpg",
"16473_SerenaWilliams_24_f.jpg"
],
"JohnVernon": [
"6459_JohnVernon_49_m.jpg",
"6447_JohnVernon_33_m.jpg",
"6446_JohnVernon_32_m.jpg",
"6448_JohnVernon_34_m.jpg",
"6454_JohnVernon_40_m.jpg",
"6452_JohnVernon_38_m.jpg",
"6471_JohnVernon_71_m.jpg",
"6468_JohnVernon_60_m.jpg",
"6469_JohnVernon_63_m.jpg",
"6458_JohnVernon_47_m.jpg",
"6463_JohnVernon_53_m.jpg",
"6444_JohnVernon_30_m.jpg",
"6457_JohnVernon_46_m.jpg",
"6456_JohnVernon_42_m.jpg",
"6462_JohnVernon_52_m.jpg",
"6464_JohnVernon_54_m.jpg",
"6451_JohnVernon_37_m.jpg",
"6449_JohnVernon_35_m.jpg",
"6470_JohnVernon_67_m.jpg",
"6445_JohnVernon_31_m.jpg",
"6461_JohnVernon_51_m.jpg",
"6450_JohnVernon_36_m.jpg",
"6460_JohnVernon_50_m.jpg",
"6455_JohnVernon_41_m.jpg",
"6466_JohnVernon_57_m.jpg",
"6465_JohnVernon_56_m.jpg",
"6453_JohnVernon_39_m.jpg",
"6467_JohnVernon_58_m.jpg"
],
"JamesStewart": [
"8647_JamesStewart_45_m.jpg",
"8657_JamesStewart_29_m.jpg",
"8644_JamesStewart_32_m.jpg",
"8639_JamesStewart_28_m.jpg",
"8645_JamesStewart_38_m.jpg",
"8642_JamesStewart_31_m.jpg",
"8643_JamesStewart_32_m.jpg",
"8652_JamesStewart_69_m.jpg",
"8655_JamesStewart_32_m.jpg",
"8638_JamesStewart_26_m.jpg",
"8658_JamesStewart_41_m.jpg",
"8646_JamesStewart_40_m.jpg",
"8641_JamesStewart_31_m.jpg",
"8650_JamesStewart_65_m.jpg",
"8656_JamesStewart_32_m.jpg",
"8651_JamesStewart_68_m.jpg",
"8654_JamesStewart_34_m.jpg",
"8637_JamesStewart_86_m.jpg",
"8640_JamesStewart_30_m.jpg",
"8649_JamesStewart_52_m.jpg",
"8653_JamesStewart_41_m.jpg",
"8648_JamesStewart_51_m.jpg"
],
"JoanLeslie": [
"10177_JoanLeslie_35_f.jpg",
"10181_JoanLeslie_50_f.jpg",
"10182_JoanLeslie_59_f.jpg",
"10167_JoanLeslie_21_f.jpg",
"10184_JoanLeslie_77_f.jpg",
"10175_JoanLeslie_32_f.jpg",
"10170_JoanLeslie_25_f.jpg",
"10166_JoanLeslie_19_f.jpg",
"10188_JoanLeslie_83_f.jpg",
"10168_JoanLeslie_22_f.jpg",
"10174_JoanLeslie_30_f.jpg",
"10173_JoanLeslie_29_f.jpg",
"10165_JoanLeslie_17_f.jpg",
"10190_JoanLeslie_87_f.jpg",
"10171_JoanLeslie_26_f.jpg",
"10183_JoanLeslie_74_f.jpg",
"10163_JoanLeslie_13_f.jpg",
"10189_JoanLeslie_84_f.jpg",
"10172_JoanLeslie_28_f.jpg",
"10185_JoanLeslie_78_f.jpg",
"10187_JoanLeslie_81_f.jpg",
"10169_JoanLeslie_23_f.jpg",
"10164_JoanLeslie_16_f.jpg",
"10179_JoanLeslie_38_f.jpg",
"10180_JoanLeslie_45_f.jpg",
"10178_JoanLeslie_36_f.jpg",
"10176_JoanLeslie_33_f.jpg",
"10186_JoanLeslie_80_f.jpg"
],
"MelindaDillion": [
"12321_MelindaDillion_57_f.jpg",
"12310_MelindaDillion_41_f.jpg",
"12307_MelindaDillion_38_f.jpg",
"12304_MelindaDillion_30_f.jpg",
"12323_MelindaDillion_63_f.jpg",
"12314_MelindaDillion_45_f.jpg",
"12324_MelindaDillion_64_f.jpg",
"12327_MelindaDillion_70_f.jpg",
"12312_MelindaDillion_43_f.jpg",
"12306_MelindaDillion_37_f.jpg",
"12316_MelindaDillion_47_f.jpg",
"12319_MelindaDillion_54_f.jpg",
"12305_MelindaDillion_34_f.jpg",
"12325_MelindaDillion_66_f.jpg",
"12309_MelindaDillion_40_f.jpg",
"12313_MelindaDillion_44_f.jpg",
"12311_MelindaDillion_42_f.jpg",
"12326_MelindaDillion_68_f.jpg",
"12303_MelindaDillion_29_f.jpg",
"12320_MelindaDillion_55_f.jpg",
"12317_MelindaDillion_48_f.jpg",
"12315_MelindaDillion_46_f.jpg",
"12322_MelindaDillion_59_f.jpg",
"12308_MelindaDillion_39_f.jpg",
"12328_MelindaDillion_73_f.jpg",
"12318_MelindaDillion_50_f.jpg"
],
"StephenHawking": [
"1020_StephenHawking_65_m.jpg",
"1004_StephenHawking_43_m.jpg",
"1017_StephenHawking_65_m.jpg",
"1014_StephenHawking_67_m.jpg",
"1006_StephenHawking_36_m.jpg",
"1000_StephenHawking_1_m.jpg",
"1018_StephenHawking_66_m.jpg",
"1005_StephenHawking_23_m.jpg",
"1007_StephenHawking_43_m.jpg",
"1012_StephenHawking_67_m.jpg",
"1024_StephenHawking_54_m.jpg",
"1002_StephenHawking_15_m.jpg",
"1019_StephenHawking_53_m.jpg",
"1022_StephenHawking_48_m.jpg",
"1003_StephenHawking_21_m.jpg",
"1010_StephenHawking_62_m.jpg",
"1009_StephenHawking_46_m.jpg",
"1008_StephenHawking_43_m.jpg",
"1016_StephenHawking_53_m.jpg",
"1001_StephenHawking_3_m.jpg",
"1011_StephenHawking_64_m.jpg",
"1015_StephenHawking_40_m.jpg",
"1021_StephenHawking_64_m.jpg",
"1013_StephenHawking_67_m.jpg",
"1023_StephenHawking_45_m.jpg"
]
}
# --------------------------
# Positives
positives = []
for key, values in idendities.items():
# print(key)
for i in range(0, len(values) - 1):
for j in range(i + 1, len(values)):
# print(values[i], " and ", values[j])
positive = []
positive.append(values[i])
positive.append(values[j])
positives.append(positive)
positives = pd.DataFrame(positives, columns=["file_x", "file_y"])
positives["decision"] = "Yes"
print(positives.shape)
# --------------------------
# Negatives
samples_list = list(idendities.values())
negatives = []
for i in range(0, len(idendities) - 1):
for j in range(i + 1, len(idendities)):
# print(samples_list[i], " vs ",samples_list[j])
cross_product = itertools.product(samples_list[i], samples_list[j])
cross_product = list(cross_product)
# print(cross_product)
for cross_sample in cross_product:
# print(cross_sample[0], " vs ", cross_sample[1])
negative = []
negative.append(cross_sample[0])
negative.append(cross_sample[1])
negatives.append(negative)
negatives = pd.DataFrame(negatives, columns=["file_x", "file_y"])
negatives["decision"] = "No"
negatives = negatives.sample(positives.shape[0])
print(negatives.shape)
# --------------------------
# Merge positive and negative ones
df = pd.concat([positives, negatives]).reset_index(drop=True)
print(df.decision.value_counts())
df.file_x = "deepface/tests/dataset/" + df.file_x
df.file_y = "deepface/tests/dataset/" + df.file_y
# --------------------------
# DeepFace
from deepface import DeepFace
from deepface.basemodels import VGGFace, OpenFace, Facenet, FbDeepFace
pretrained_models = {}
pretrained_models["VGG-Face"] = VGGFace.loadModel()
print("VGG-Face loaded")
pretrained_models["Facenet"] = Facenet.loadModel()
print("Facenet loaded")
pretrained_models["OpenFace"] = OpenFace.loadModel()
print("OpenFace loaded")
pretrained_models["DeepFace"] = FbDeepFace.loadModel()
print("FbDeepFace loaded")
instances = df[["file_x", "file_y"]].values.tolist()
models = ['VGG-Face']
metrics = ['cosine']
if True:
for model in models:
for metric in metrics:
resp_obj = DeepFace.verify(instances
, model_name=model
, model=pretrained_models[model]
, distance_metric=metric)
distances = []
for i in range(0, len(instances)):
distance = round(resp_obj["pair_%s" % (i + 1)]["distance"], 4)
distances.append(distance)
df['%s_%s' % (model, metric)] = distances
df.to_csv("face-recognition-pivot.csv", index=False)
else:
df = pd.read_csv("face-recognition-pivot.csv")
df_raw = df.copy()
# --------------------------
# Distribution
fig = plt.figure(figsize=(15, 15))
figure_idx = 1
for model in models:
for metric in metrics:
feature = '%s_%s' % (model, metric)
ax1 = fig.add_subplot(4, 2, figure_idx)
df[df.decision == "Yes"][feature].plot(kind='kde', title=feature, label='Yes', legend=True)
df[df.decision == "No"][feature].plot(kind='kde', title=feature, label='No', legend=True)
figure_idx = figure_idx + 1
# plt.show()
# --------------------------
# Pre-processing for modelling
columns = []
for model in models:
for metric in metrics:
feature = '%s_%s' % (model, metric)
columns.append(feature)
columns.append("decision")
df = df[columns]
df.loc[df[df.decision == 'Yes'].index, 'decision'] = 1
df.loc[df[df.decision == 'No'].index, 'decision'] = 0
print(df.head())
# --------------------------
# Train test split
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df, test_size=0.30, random_state=17)
target_name = "decision"
y_train = df_train[target_name].values
x_train = df_train.drop(columns=[target_name]).values
y_test = df_test[target_name].values
x_test = df_test.drop(columns=[target_name]).values
# --------------------------
# LightGBM
import lightgbm as lgb
features = df.drop(columns=[target_name]).columns.tolist()
lgb_train = lgb.Dataset(x_train, y_train, feature_name=features)
lgb_test = lgb.Dataset(x_test, y_test, feature_name=features)
params = {
'task': 'train'
, 'boosting_type': 'gbdt'
, 'objective': 'multiclass'
, 'num_class': 2
, 'metric': 'multi_logloss'
}
gbm = lgb.train(params, lgb_train, num_boost_round=250, early_stopping_rounds=15, valid_sets=lgb_test)
gbm.save_model("face-recognition-ensemble-model.txt")
# --------------------------
# Evaluation
predictions = gbm.predict(x_test)
predictions_classes = []
for i in predictions:
prediction_class = np.argmax(i)
predictions_classes.append(prediction_class)
cm = confusion_matrix(y_test, predictions_classes)
print(cm)
tn, fp, fn, tp = cm.ravel()
recall = tp / (tp + fn)
precision = tp / (tp + fp)
accuracy = (tp + tn) / (tn + fp + fn + tp)
f1 = 2 * (precision * recall) / (precision + recall)
print("Precision: ", 100 * precision, "%")
print("Recall: ", 100 * recall, "%")
print("F1 score ", 100 * f1, "%")
print("Accuracy: ", 100 * accuracy, "%")
# --------------------------
# Interpretability
ax = lgb.plot_importance(gbm, max_num_features=20)
# plt.show()
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin'
plt.rcParams["figure.figsize"] = [20, 20]
for i in range(0, gbm.num_trees()):
ax = lgb.plot_tree(gbm, tree_index=i)
# plt.show()
if i == 2:
break
# --------------------------
# ROC Curve
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
y_pred_proba = predictions[::, 1]
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
auc = roc_auc_score(y_test, y_pred_proba)
plt.figure(figsize=(7, 3))
lw = 2
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
fig.savefig('/home/khawar/deepface/tests/VGG-FACE_Cosine_ROC.png', dpi=fig.dpi)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('VGG Face')
plt.plot(fpr, tpr, label="ROC with Cosine auc=" + str(auc))
fig.savefig('/home/khawar/deepface/tests/VGG-FACE_Cosine_ROC_T_F.png', dpi=fig.dpi)
plt.legend(loc=4)
fig.savefig('/home/khawar/deepface/tests/VGG-FACE_Cosine.png', dpi=fig.dpi)
plt.show()
# --------------------------
Related
PPO: NaN Policy return in Tensorflow Keras
I am trying to implement the PPO algorithm with clipped loss in addition to KL penalties and run training on Mujuco Gym environments. After ~ 15000 gradient steps, policy collapses into returning NaN. These are the policy training info before the policy collapses: A: tf.Tensor(-0.10426917, shape=(), dtype=float32) LOG_A: tf.Tensor(37.021107, shape=(), dtype=float32) LOSS: tf.Tensor(0.16812761, shape=(), dtype=float32) GRAD: tf.Tensor( [[-3.4624012e-04 -1.2807851e-04 -1.9778654e-01 ... -2.7586846e+00 -1.2552655e-01 -1.7212760e-03] [ 4.6312678e-05 -2.2251482e-04 5.5088173e-03 ... 9.5249921e-02 2.2186586e-03 2.0080474e-04] [ 2.0314787e-05 -1.6381161e-04 7.1509695e-03 ... 1.1740552e-01 3.4010289e-03 1.2105847e-04] ... [ 1.7827883e-04 -1.1712313e-05 5.8873045e-01 ... 9.2354174e+00 2.9186043e-01 -2.2818900e-03] [-9.0385452e-05 3.0951984e-03 -3.6487404e-02 ... -2.6829168e-01 -3.9602429e-02 2.0654879e-03] [ 2.2925157e-04 4.6892464e-03 5.9946489e-01 ... 9.3497839e+00 3.0514282e-01 -1.3834883e-03]], shape=(11, 256), dtype=float32) A: tf.Tensor(nan, shape=(), dtype=float32) LOG_A: tf.Tensor(nan, shape=(), dtype=float32) Note: The gradient info captures only the gradients of the first layer, as I have found capturing all gradient info to be messy and seemingly redundant. What I have tried: Tuning hyperparameters: I have tried multiple sets of hyperparameters including the one documented in the original paper. The same error occurs(the hyperparams setup provided in the example below are chosen for higher sampling efficiency for faster debugging). Gradient clipping: Gradient norm has been clipped to be unitary, and as shown above, it does not appear to have the exploding gradient issue. Guaranteed numerical stability of tanh squashing of policy log probability: A small epsilon was used to clip the sum of squares so that action log probability does not return inf after tanh squashing. Unitized code example: import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import gym import scipy.signal import time from tensorflow.keras import Model import matplotlib.pyplot as plt import random import tensorflow_probability as tfp tf.keras.backend.set_floatx('float32') EPSILON = 1e-10 ################## GLOBAL SETUP P1 ################## problem = "Hopper-v2" env = gym.make(problem) eval_env = gym.make(problem) num_states = env.observation_space.shape[0] print("Size of State Space -> {}".format(num_states), flush=True) num_actions = env.action_space.shape[0] print("Size of Action Space -> {}".format(num_actions), flush=True) upper_bound = env.action_space.high[0] lower_bound = env.action_space.low[0] print("Max Value of Action -> {}".format(upper_bound), flush=True) print("Min Value of Action -> {}".format(lower_bound), flush=True) minibatch_size = 256 ##########*****####################*****########## #################### Auxiliaries #################### def discounted_cumulative_sums(x, discount): # Discounted cumulative sums of vectors for computing rewards-to-go and advantage estimates return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1] ##########*****####################*****########## #################### Replay Buffer #################### class Buffer: def __init__(self, observation_dimensions, action_dimensions, size, gamma=0.99, lam=0.95): self.observation_buffer = np.zeros( (size, observation_dimensions), dtype=np.float32 ) self.action_buffer = np.zeros((size, action_dimensions), dtype=np.int32) self.advantage_buffer = np.zeros(size, dtype=np.float32) self.reward_buffer = np.zeros(size, dtype=np.float32) self.return_buffer = np.zeros(size, dtype=np.float32) self.value_buffer = np.zeros(size, dtype=np.float32) self.logprobability_buffer = np.zeros(size, dtype=np.float32) self.gamma, self.lam = gamma, lam self.pointer, self.trajectory_start_index = 0, 0 def store(self, observation, action, reward, value, logprobability): self.observation_buffer[self.pointer] = observation self.action_buffer[self.pointer] = action self.reward_buffer[self.pointer] = reward self.value_buffer[self.pointer] = value self.logprobability_buffer[self.pointer] = logprobability self.pointer += 1 def finish_trajectory(self, last_value=0): path_slice = slice(self.trajectory_start_index, self.pointer) rewards = np.append(self.reward_buffer[path_slice], last_value) values = np.append(self.value_buffer[path_slice], last_value) deltas = rewards[:-1] + self.gamma * values[1:] - values[:-1] self.advantage_buffer[path_slice] = discounted_cumulative_sums( deltas, self.gamma * self.lam ) self.return_buffer[path_slice] = discounted_cumulative_sums( rewards, self.gamma )[:-1] self.trajectory_start_index = self.pointer def get(self): # Get all data of the buffer and normalize the advantages rindex = np.random.choice(self.pointer, minibatch_size) advantage_mean, advantage_std = ( np.mean(self.advantage_buffer[rindex]), np.std(self.advantage_buffer[rindex]), ) return ( self.observation_buffer[rindex], self.action_buffer[rindex], (self.advantage_buffer[rindex] - advantage_mean) / advantage_std, self.return_buffer[rindex], self.logprobability_buffer[rindex], ) def clear(self): self.pointer, self.trajectory_start_index = 0, 0 ##########*****####################*****########## #################### Models #################### class Actor(Model): def __init__(self): super().__init__() self.action_dim = num_actions self.dense1_layer = layers.Dense(256, activation="relu") self.dense2_layer = layers.Dense(256, activation="relu") self.mean_layer = layers.Dense(self.action_dim) self.stdev_layer = layers.Dense(self.action_dim) def call(self, state, eval_mode=False): a1 = self.dense1_layer(state) a2 = self.dense2_layer(a1) mu = self.mean_layer(a2) log_sigma = self.stdev_layer(a2) sigma = tf.exp(log_sigma) covar_m = tf.linalg.diag(sigma**2) dist = tfp.distributions.MultivariateNormalTriL(loc=mu, scale_tril=tf.linalg.cholesky(covar_m)) if eval_mode: action_ = mu else: action_ = dist.sample() action = tf.tanh(action_) log_pi_ = dist.log_prob(action_) log_pi = log_pi_ - tf.reduce_sum(tf.math.log(tf.clip_by_value(1 - action**2, EPSILON, 1.0)), axis=1) return action*upper_bound, log_pi def get_critic(): state_input = layers.Input(shape=(num_states)) state_out = layers.Dense(256, activation="relu")(state_input) out = layers.Dense(256, activation="relu")(state_out) outputs = layers.Dense(1, dtype='float32')(out) model = tf.keras.Model(state_input, outputs) return model ##########*****####################*****########## #################### GLOBAL SETUP P2 #################### # Hyperparameters of the PPO algorithm horizon = 2048 iterations = 2000 gamma = 0.99 clip_ratio = 0.2 epochs = 500 lam = 0.97 target_kl = 0.01 beta = 1.0 render = False actor_model = Actor() critic_model = get_critic() lr = 0.0003 policy_optimizer = tf.keras.optimizers.Adam(learning_rate=lr, # ) clipnorm=1.0) value_optimizer = tf.keras.optimizers.Adam(learning_rate=lr, # ) clipnorm=1.0) buffer = Buffer(num_states, num_actions, horizon) ##########*****####################*****########## #################### Training #################### observation, episode_return, episode_length = env.reset(), 0, 0 tf_observation = tf.expand_dims(observation, 0) def train_policy( observation_buffer, action_buffer, logprobability_buffer, advantage_buffer ): global beta with tf.GradientTape() as tape: # Record operations for automatic differentiation. action, log_a = actor_model(observation_buffer) # print("A: ", tf.reduce_mean(action)) # print("LOG_A: ", tf.reduce_mean(log_a)) ratio = tf.exp( log_a - logprobability_buffer ) # print("R: ", tf.reduce_mean(ratio), flush=True) cd_ratio = tf.clip_by_value(ratio, (1 - clip_ratio), (1 + clip_ratio)) min_advantage = cd_ratio * advantage_buffer _kl = -beta*tf.math.reduce_max(logprobability_buffer - log_a) policy_loss = -tf.reduce_mean(tf.minimum(ratio * advantage_buffer, min_advantage) + _kl) # print("LOSS: ", policy_loss) policy_grads = tape.gradient(policy_loss, actor_model.trainable_variables) policy_optimizer.apply_gradients(zip(policy_grads, actor_model.trainable_variables)) # print("GRAD: ", policy_grads[0], flush=True) action_opt, log_a_opt = actor_model(observation_buffer) kl = tf.reduce_mean( logprobability_buffer - log_a_opt ) if kl < target_kl/1.5: beta = beta/2 if kl > target_kl*1.5: beta = beta*2 return kl def train_value_function(observation_buffer, return_buffer): with tf.GradientTape() as tape: # Record operations for automatic differentiation. value_loss = tf.reduce_mean((return_buffer - critic_model(observation_buffer)) ** 2) value_grads = tape.gradient(value_loss, critic_model.trainable_variables) value_optimizer.apply_gradients(zip(value_grads, critic_model.trainable_variables)) for ite in range(iterations): for t in range(horizon): if render: env.render() action, log_pi_a = actor_model(tf_observation) action = action[0] observation_new, reward, done, _ = env.step(action) episode_return += reward episode_length += 1 value_t = critic_model(tf_observation) buffer.store(observation, action, reward, value_t, log_pi_a) observation = observation_new tf_observation = tf.expand_dims(observation, 0) terminal = done if terminal or (t == horizon - 1): last_value = 0 if done else critic_model(tf_observation) buffer.finish_trajectory(last_value) observation, episode_return, episode_length = env.reset(), 0, 0 tf_observation = tf.expand_dims(observation, 0) for _ in range(epochs): ( observation_buffer, action_buffer, advantage_buffer, return_buffer, logprobability_buffer, ) = buffer.get() kl = train_policy( observation_buffer, action_buffer, logprobability_buffer, advantage_buffer ) train_value_function(observation_buffer, return_buffer) buffer.clear() ##########*****####################*****########## Note: The code base is constructed by a combination of a modified version of the official keras PPO tutorial(https://keras.io/examples/rl/ppo_cartpole/) and Modules(Mainly the policy network) that have been tested in other implementations. I refrained from using tf_function declaration as I am very new to tensorflow, thus not understanding its impact, and I have read from various github issues that sometimes such declaration causes numerical instability due to caching. However, it could be a source of my issues. Any help is appreciated, and apologies if something is missing or unclear.
TypeError when trying to make a loop creating artificial neural networks
I am working on an artifical neural network which I have created via subclassing. The subclassing looks like this: import time import numpy as np import matplotlib.pyplot as plt import tensorflow as tf import scipy.stats as si import sympy as sy from sympy.stats import Normal, cdf from sympy import init_printing class DGMNet(tf.keras.Model): def __init__(self, n_layers, n_nodes, dimensions=1): """ Parameters: - n_layers: number of layers - n_nodes: number of nodes in (inner) layers - dimensions: number of spacial dimensions """ super().__init__() self.n_layers = n_layers self.initial_layer = DenseLayer(dimensions + 1, n_nodes, activation="relu") self.lstmlikelist = [] for _ in range(self.n_layers): self.lstmlikelist.append(LSTMLikeLayer(dimensions + 1, n_nodes, activation="relu")) self.final_layer = DenseLayer(n_nodes, 1, activation=None) def call(self, t, x): X = tf.concat([t,x], 1) S = self.initial_layer.call(X) for i in range(self.n_layers): S = self.lstmlikelist[i].call({'S': S, 'X': X}) result = self.final_layer.call(S) return result class DenseLayer(tf.keras.layers.Layer): def __init__(self, n_inputs, n_outputs, activation): """ Parameters: - n_inputs: number of inputs - n_outputs: number of outputs - activation: activation function """ super(DenseLayer, self).__init__() self.n_inputs = n_inputs self.n_outputs = n_outputs self.W = self.add_weight(shape=(self.n_inputs, self.n_outputs), initializer='random_normal', trainable=True) self.b = self.add_weight(shape=(1, self.n_outputs), initializer='random_normal', trainable=True) self.activation = _get_function(activation) def call(self, inputs): S = tf.add(tf.matmul(inputs, self.W), self.b) S = self.activation(S) return S class LSTMLikeLayer(tf.keras.layers.Layer): def __init__(self, n_inputs, n_outputs, activation): """ Parameters: - n_inputs: number of inputs - n_outputs: number of outputs - activation: activation function """ super(LSTMLikeLayer, self).__init__() self.n_outputs = n_outputs self.n_inputs = n_inputs self.Uz = self.add_variable("Uz", shape=[self.n_inputs, self.n_outputs]) self.Ug = self.add_variable("Ug", shape=[self.n_inputs, self.n_outputs]) self.Ur = self.add_variable("Ur", shape=[self.n_inputs, self.n_outputs]) self.Uh = self.add_variable("Uh", shape=[self.n_inputs, self.n_outputs]) self.Wz = self.add_variable("Wz", shape=[self.n_outputs, self.n_outputs]) self.Wg = self.add_variable("Wg", shape=[self.n_outputs, self.n_outputs]) self.Wr = self.add_variable("Wr", shape=[self.n_outputs, self.n_outputs]) self.Wh = self.add_variable("Wh", shape=[self.n_outputs, self.n_outputs]) self.bz = self.add_variable("bz", shape=[1, self.n_outputs]) self.bg = self.add_variable("bg", shape=[1, self.n_outputs]) self.br = self.add_variable("br", shape=[1, self.n_outputs]) self.bh = self.add_variable("bh", shape=[1, self.n_outputs]) self.activation = _get_function(activation) def call(self, inputs): S = inputs['S'] X = inputs['X'] Z = self.activation(tf.add(tf.add(tf.matmul(X, self.Uz), tf.matmul(S, self.Wz)), self.bz)) G = self.activation(tf.add(tf.add(tf.matmul(X, self.Ug), tf.matmul(S, self.Wg)), self.bg)) R = self.activation(tf.add(tf.add(tf.matmul(X, self.Ur), tf.matmul(S, self.Wr)), self.br)) H = self.activation(tf.add(tf.add(tf.matmul(X, self.Uh), tf.matmul(tf.multiply(S, R), self.Wh)), self.bh)) Snew = tf.add(tf.multiply(tf.subtract(tf.ones_like(G), G), H), tf.multiply(Z, S)) return Snew def _get_function(name): f = None if name == "tanh": f = tf.nn.tanh elif name == "sigmoid": f = tf.nn.sigmoid elif name == "relu": f = tf.nn.relu elif not name: f = tf.identity assert f is not None return f # Sampling def sampler(N1, N2, N3): np.random.seed(42) # Sampler #1: PDE domain t1 = np.random.uniform(low=T0, high=T, size=[N1,1]) s1 = np.random.uniform(low=S1, high=S2, size=[N1,1]) # Sampler #2: boundary condition t2 = np.zeros(shape=(1, 1)) s2 = np.zeros(shape=(1, 1)) # Sampler #3: initial/terminal condition t3 = T * np.ones((N3,1)) #Terminal condition s3 = np.random.uniform(low=S1, high=S2, size=[N3,1]) return (t1, s1, t2, s2, t3, s3) # Loss function def loss(model, t1, x1, t2, x2, t3, x3): # Loss term #1: PDE V = model(t1, x1) V_t = tf.gradients(V, t1)[0] V_x = tf.gradients(V, x1)[0] V_xx = tf.gradients(V_x, x1)[0] f = V_t + r*x1*V_x + 0.5*sigma**2*x1**2*V_xx - r*V L1 = tf.reduce_mean(tf.square(f)) # Loss term #2: boundary condition #L2 = tf.reduce_mean(tf.square(V)) # Loss term #3: initial/terminal condition L3 = tf.reduce_mean(tf.square(model(t3, x3) - tf.math.maximum(x3-K,0))) return (L1, L3) # B-S's analytical known solution def analytical_solution(t, x): #C = SN(d1) - Xe- rt N(d2) #S: spot price #K: strike price #T: time to maturity #r: interest rate #sigma: volatility of underlying asset d1 = (np.log(x / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T)) d2 = (np.log(x / K) + (r - 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T)) call = (x * si.norm.cdf(d1, 0.0, 1.0) - K * np.exp(-r * T) * si.norm.cdf(d2, 0.0, 1.0)) return call # Set random seeds np.random.seed(42) tf.random.set_seed(42) # Strike price K = 0.5 # PDE parameters r = 0.05 # Interest rate sigma = 0.25 # Volatility # Time limits T0 = 0.0 + 1e-10 # Initial time T = 1.0 # Terminal time # Space limits S1 = 0.0 + 1e-10 # Low boundary S2 = 1.0 # High boundary # Number of samples NS_1 = 1000 NS_2 = 0 NS_3 = 100 t1, s1, t2, s2, t3, s3 = sampler(NS_1, NS_2, NS_3) Now what I want to do is to iterate over different parameters and create a new ann for each iteration. My plan was to do it in this way: tf.compat.v1.disable_eager_execution() t1_t = tf.compat.v1.placeholder(tf.float32, [None,1]) x1_t = tf.compat.v1.placeholder(tf.float32, [None,1]) t2_t = tf.compat.v1.placeholder(tf.float32, [None,1]) x2_t = tf.compat.v1.placeholder(tf.float32, [None,1]) t3_t = tf.compat.v1.placeholder(tf.float32, [None,1]) x3_t = tf.compat.v1.placeholder(tf.float32, [None,1]) volatility_list = [0.08]#[0.08, 0.16, 0.18, 0.2, 0.28] stages_list = [10]#, 50, 100] layers_list = [3]#, 5, 7] npl_list = [3]#, 6, 9, 12, 15] for sigma in volatility_list: for st in stages_list: for lay in layers_list: for npl in npl_list: # Neural Network definition num_layers = lay nodes_per_layer = npl ann = DGMNet(num_layers, nodes_per_layer) L1_t, L3_t = loss(ann, t1_t, x1_t, t2_t, x2_t, t3_t, x3_t) loss_t = L1_t + L3_t # Optimizer parameters global_step = tf.Variable(1, trainable=False) starter_learning_rate = 0.001 learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate, global_step, 100000, 0.96, staircase=True) optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_t) # Training parameters steps_per_sample = st sampling_stages = 100#2000 # Plot tensors tplot_t = tf.compat.v1.placeholder(tf.float32, [None,1], name="tplot_t") # We name to recover it later xplot_t = tf.compat.v1.placeholder(tf.float32, [None,1], name="xplot_t") vplot_t = tf.identity(ann(tplot_t, xplot_t), name="vplot_t") # Trick for naming the trained model # Training data holders sampling_stages_list = [] elapsed_time_list = [] loss_list = [] L1_list = [] L3_list = [] # Train network!! init_op = tf.compat.v1.global_variables_initializer() sess = tf.compat.v1.Session() sess.run(init_op) for i in range(sampling_stages): t1, x1, t2, x2, t3, x3 = sampler(NS_1, NS_2, NS_3) start_time = time.clock() for _ in range(steps_per_sample): loss, L1, L3, _ = sess.run([loss_t, L1_t, L3_t, optimizer], feed_dict = {t1_t:t1, x1_t:x1, t2_t:t2, x2_t:x2, t3_t:t3, x3_t:x3}) end_time = time.clock() elapsed_time = end_time - start_time sampling_stages_list.append(i) elapsed_time_list.append(elapsed_time) loss_list.append(loss) L1_list.append(L1) L3_list.append(L3) text = "Stage: {:04d}, Loss: {:e}, L1: {:e}, L3: {:e}, {:f} seconds".format(i, loss, L1, L3, elapsed_time) print(text) #goodness of fit time_0 = 0 listofzeros = [time_0] * 100 prices_for_goodness = np.linspace(S1,S2, 100) goodness_list = [] solution_goodness = analytical_solution(listofzeros, prices_for_goodness) ttt = time_0*np.ones_like(prices_for_goodness.reshape(-1,1)) nn_goodness, = sess.run([vplot_t], feed_dict={tplot_t:ttt, xplot_t:prices_for_goodness.reshape(-1,1)}) deviation_list = np.abs(solution_goodness - nn_goodness)/(T-T0) print("{0:.2f}%".format(np.average(deviation_list)*100)) Unfortunately as soon as it ends the first iteration I get a TypeError that 'numpy.float32' object is not callable Error Traceback: TypeError Traceback (most recent call last) <ipython-input-14-bb14643d0c42> in <module>() 10 11 ---> 12 L1_t, L3_t = loss(ann, t1_t, x1_t, t2_t, x2_t, t3_t, x3_t) 13 loss_t = L1_t + L3_t 14 TypeError: 'numpy.float32' object is not callable I guess that the problem is with the creation of the placeholders, however I am not sure how to solve it. Maybe one of you can help me Thanks in advance! Chris
Did you create a variable called 'loss'? It seems that the loss function is redefined by a variable with the same name, so then python tries to call that variable as a function.
Google Colab freezes my browser and pc when trying to reconnect to a notebook
I am training a Machine learning model in google colab, to be more specific I am training a GAN with PyTorch-lightning. The problem occurs is when I get disconnected from my current runtime due to inactivity. When I try to reconnect my Browser(tried on firefox and chrome) becomes first laggy and than freezes, my pc starts to lag so that I am not able to close my browser and it doesn't go away. I am forced to press the power button of my PC in order to restart the PC. I have no clue why this happens. I tried various batch sizes(also the size 1) but it still happens. It can't be that my dataset is too big either(since i tried it on a dataset with 10images for testing puposes). I hope someone can help me. Here is my code (For using the code you will need comet.nl and enter the comet.ml api key): import torch import torch.nn as nn import torch.nn.functional as F import torchvision import torchvision.transforms as transforms from torch.utils.data import DataLoader from torchvision.datasets import MNIST from pytorch_lightning.callbacks import ModelCheckpoint import pytorch_lightning as pl from pytorch_lightning import loggers import numpy as np from numpy.random import choice from PIL import Image import os from pathlib import Path import shutil from collections import OrderedDict # custom weights initialization called on netG and netD def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: nn.init.normal_(m.weight.data, 0.0, 0.02) elif classname.find('BatchNorm') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0) # randomly flip some labels def noisy_labels(y, p_flip=0.05): # # flip labels with 5% probability # determine the number of labels to flip n_select = int(p_flip * y.shape[0]) # choose labels to flip flip_ix = choice([i for i in range(y.shape[0])], size=n_select) # invert the labels in place y[flip_ix] = 1 - y[flip_ix] return y class AddGaussianNoise(object): def __init__(self, mean=0.0, std=0.1): self.std = std self.mean = mean def __call__(self, tensor): return tensor + torch.randn(tensor.size()) * self.std + self.mean def __repr__(self): return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std) def get_valid_labels(img): return (0.8 - 1.1) * torch.rand(img.shape[0], 1, 1, 1) + 1.1 # soft labels def get_unvalid_labels(img): return noisy_labels((0.0 - 0.3) * torch.rand(img.shape[0], 1, 1, 1) + 0.3) # soft labels class Generator(nn.Module): def __init__(self, ngf, nc, latent_dim): super(Generator, self).__init__() self.ngf = ngf self.latent_dim = latent_dim self.nc = nc self.main = nn.Sequential( # input is Z, going into a convolution nn.ConvTranspose2d(latent_dim, ngf * 8, 4, 1, 0, bias=False), nn.BatchNorm2d(ngf * 8), nn.LeakyReLU(0.2, inplace=True), # state size. (ngf*8) x 4 x 4 nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 4), nn.LeakyReLU(0.2, inplace=True), # state size. (ngf*4) x 8 x 8 nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 2), nn.LeakyReLU(0.2, inplace=True), # state size. (ngf*2) x 16 x 16 nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf), nn.LeakyReLU(0.2, inplace=True), # state size. (ngf) x 32 x 32 nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False), nn.Tanh() # state size. (nc) x 64 x 64 ) def forward(self, input): return self.main(input) class Discriminator(nn.Module): def __init__(self, ndf, nc): super(Discriminator, self).__init__() self.nc = nc self.ndf = ndf self.main = nn.Sequential( # input is (nc) x 64 x 64 nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 32 x 32 nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 2), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 16 x 16 nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 4), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*4) x 8 x 8 nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 8), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*8) x 4 x 4 nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), nn.Sigmoid() ) def forward(self, input): return self.main(input) class DCGAN(pl.LightningModule): def __init__(self, hparams, logger, checkpoint_folder, experiment_name): super().__init__() self.hparams = hparams self.logger = logger # only compatible with comet_logger at the moment self.checkpoint_folder = checkpoint_folder self.experiment_name = experiment_name # networks self.generator = Generator(ngf=hparams.ngf, nc=hparams.nc, latent_dim=hparams.latent_dim) self.discriminator = Discriminator(ndf=hparams.ndf, nc=hparams.nc) self.generator.apply(weights_init) self.discriminator.apply(weights_init) # cache for generated images self.generated_imgs = None self.last_imgs = None # For experience replay self.exp_replay_dis = torch.tensor([]) # creating checkpoint folder dirpath = Path(self.checkpoint_folder) if not dirpath.exists(): os.makedirs(dirpath, 0o755) def forward(self, z): return self.generator(z) def adversarial_loss(self, y_hat, y): return F.binary_cross_entropy(y_hat, y) def training_step(self, batch, batch_nb, optimizer_idx): # For adding Instance noise for more visit: https://www.inference.vc/instance-noise-a-trick-for-stabilising-gan-training/ std_gaussian = max(0, self.hparams.level_of_noise - ((self.hparams.level_of_noise * 1.5) * (self.current_epoch / self.hparams.epochs))) AddGaussianNoiseInst = AddGaussianNoise(std=std_gaussian) # the noise decays over time imgs, _ = batch imgs = AddGaussianNoiseInst(imgs) # Adding instance noise to real images self.last_imgs = imgs # train generator if optimizer_idx == 0: # sample noise z = torch.randn(imgs.shape[0], self.hparams.latent_dim, 1, 1) # generate images self.generated_imgs = self(z) self.generated_imgs = AddGaussianNoiseInst(self.generated_imgs) # Adding instance noise to fake images # Experience replay # for discriminator perm = torch.randperm(self.generated_imgs.size(0)) # Shuffeling r_idx = perm[:max(1, self.hparams.experience_save_per_batch)] # Getting the index self.exp_replay_dis = torch.cat((self.exp_replay_dis, self.generated_imgs[r_idx]), 0).detach() # Add our new example to the replay buffer # ground truth result (ie: all fake) g_loss = self.adversarial_loss(self.discriminator(self.generated_imgs), get_valid_labels(self.generated_imgs)) # adversarial loss is binary cross-entropy tqdm_dict = {'g_loss': g_loss} log = {'g_loss': g_loss, "std_gaussian": std_gaussian} output = OrderedDict({ 'loss': g_loss, 'progress_bar': tqdm_dict, 'log': log }) return output # train discriminator if optimizer_idx == 1: # Measure discriminator's ability to classify real from generated samples # how well can it label as real? real_loss = self.adversarial_loss(self.discriminator(imgs), get_valid_labels(imgs)) # Experience replay if self.exp_replay_dis.size(0) >= self.hparams.experience_batch_size: fake_loss = self.adversarial_loss(self.discriminator(self.exp_replay_dis.detach()), get_unvalid_labels(self.exp_replay_dis)) # train on already seen images self.exp_replay_dis = torch.tensor([]) # Reset experience replay # discriminator loss is the average of these d_loss = (real_loss + fake_loss) / 2 tqdm_dict = {'d_loss': d_loss} log = {'d_loss': d_loss, "d_exp_loss": fake_loss, "std_gaussian": std_gaussian} output = OrderedDict({ 'loss': d_loss, 'progress_bar': tqdm_dict, 'log': log }) return output else: fake_loss = self.adversarial_loss(self.discriminator(self.generated_imgs.detach()), get_unvalid_labels(self.generated_imgs)) # how well can it label as fake? # discriminator loss is the average of these d_loss = (real_loss + fake_loss) / 2 tqdm_dict = {'d_loss': d_loss} log = {'d_loss': d_loss, "std_gaussian": std_gaussian} output = OrderedDict({ 'loss': d_loss, 'progress_bar': tqdm_dict, 'log': log }) return output def configure_optimizers(self): lr = self.hparams.lr b1 = self.hparams.b1 b2 = self.hparams.b2 opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2)) opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2)) return [opt_g, opt_d], [] def train_dataloader(self): transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]) dataset = MNIST(os.getcwd(), train=True, download=True, transform=transform) return DataLoader(dataset, batch_size=self.hparams.batch_size) # transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)), # transforms.ToTensor(), # transforms.Normalize([0.5], [0.5]) # ]) # train_dataset = torchvision.datasets.ImageFolder( # root="./drive/My Drive/datasets/ghibli_dataset_small_overfit/", # transform=transform # ) # return DataLoader(train_dataset, num_workers=self.hparams.num_workers, shuffle=True, batch_size=self.hparams.batch_size) def on_epoch_end(self): z = torch.randn(4, self.hparams.latent_dim, 1, 1) # match gpu device (or keep as cpu) if self.on_gpu: z = z.cuda(self.last_imgs.device.index) # log sampled images sample_imgs = self.generator(z) sample_imgs = sample_imgs.view(-1, self.hparams.nc, self.hparams.image_size, self.hparams.image_size) grid = torchvision.utils.make_grid(sample_imgs, nrow=2) self.logger.experiment.log_image(grid.permute(1, 2, 0), f'generated_images_epoch{self.current_epoch}', step=self.current_epoch) # save model if self.current_epoch % self.hparams.save_model_every_epoch == 0: trainer.save_checkpoint(self.checkpoint_folder + "/" + self.experiment_name + "_epoch_" + str(self.current_epoch) + ".ckpt") comet_logger.experiment.log_asset_folder(self.checkpoint_folder, step=self.current_epoch) # Deleting the folder where we saved the model so that we dont upload a thing twice dirpath = Path(self.checkpoint_folder) if dirpath.exists() and dirpath.is_dir(): shutil.rmtree(dirpath) # creating checkpoint folder access_rights = 0o755 os.makedirs(dirpath, access_rights) from argparse import Namespace args = { 'batch_size': 48, 'lr': 0.0002, 'b1': 0.5, 'b2': 0.999, 'latent_dim': 128, # tested value which worked(in V4_1): 100 'nc': 1, 'ndf': 32, 'ngf': 32, 'epochs': 10, 'save_model_every_epoch': 5, 'image_size': 64, 'num_workers': 2, 'level_of_noise': 0.15, 'experience_save_per_batch': 1, # this value should be very low; tested value which works: 1 'experience_batch_size': 50 # this value shouldnt be too high; tested value which works: 50 } hparams = Namespace(**args) # Parameters experiment_name = "DCGAN_V4_2_MNIST" dataset_name = "MNIST" checkpoint_folder = "DCGAN/" tags = ["DCGAN", "MNIST", "OVERFIT", "64x64"] dirpath = Path(checkpoint_folder) # init logger comet_logger = loggers.CometLogger( api_key="", rest_api_key="", project_name="gan", experiment_name=experiment_name, #experiment_key="f23d00c0fe3448ee884bfbe3fc3923fd" # used for resuming trained id can be found in comet.ml ) #defining net net = DCGAN(hparams, comet_logger, checkpoint_folder, experiment_name) #logging comet_logger.experiment.set_model_graph(str(net)) comet_logger.experiment.add_tags(tags=tags) comet_logger.experiment.log_dataset_info(dataset_name) trainer = pl.Trainer(#resume_from_checkpoint="GHIBLI_DCGAN_OVERFIT_64px_epoch_6000.ckpt", logger=comet_logger, max_epochs=args["epochs"] ) trainer.fit(net) comet_logger.experiment.end()
I fixed it with importing this: from IPython.display import clear_output
Deep neural-network with backpropagation implementation does not work - python
I want to implement a multilayer NN with backpropagation. I have been trying for days, but it simply does not work. It is extremely clear in my head how it is supposed to work, I have streamline my code to be as simple as possible but I can't do it. It's probably something stupid, but I cannot see it. The implementation I have done is with an input layer of 784 (28x28), two (L) hidden layers of 300 and an output of 10 classes. I have a bias in every layer (except last...) The output activation is softmax and the hidden activation is ReLU. I use mini batches of 600 examples over a dataset of 60k examples with 50 to 500 epoches. Here the core of my code: Preparation: from tensorflow import keras import numpy as np import matplotlib.pyplot as plt fashion_mnist = keras.datasets.fashion_mnist (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() L = 2 K = len(np.unique(train_labels)) lr = 0.001 nb_epochs = 50 node_per_hidden_layer = 300 nb_batches = 100 W = [] losses_test = [] X_train = np.reshape(train_images, (train_images.shape[0], train_images.shape[1]*train_images.shape[2])) X_test = np.reshape(test_images, (test_images.shape[0], train_images.shape[1]*train_images.shape[2])) Y_train = np.zeros((train_labels.shape[0], K)) Y_train[np.arange(Y_train.shape[0]), train_labels] = 1 Y_test = np.zeros((test_labels.shape[0], K)) Y_test[np.arange(Y_test.shape[0]), test_labels] = 1 W.append(np.random.normal(0, 0.01, (X_train.shape[1]+1, node_per_hidden_layer))) for i in range(L-1): W.append(np.random.normal(0, 0.01, (node_per_hidden_layer+1, node_per_hidden_layer))) W.append(np.random.normal(0, 0.01, (node_per_hidden_layer+1, K))) Helper function: def softmax(z): exp = np.exp(z - z.max(1)[:,np.newaxis]) return np.array(exp / exp.sum(1)[:,np.newaxis]) def softmax_derivative(z): sm = softmax(z) return sm * (1-sm) def ReLU(z): return np.maximum(z, 0) def ReLU_derivative(z): return (z >= 0).astype(int) def get_loss(y, y_pred): return -np.sum(y * np.log(y_pred)) fitting def fit(): minibatch_size = len(X_train) // nb_batches for epoch in range(nb_epochs): permutaion = list(np.random.permutation(X_train.shape[0])) X_shuffle = X_train[permutaion] Y_shuffle = Y_train[permutaion] print("Epoch----------------", epoch) for batche in range(0, X_shuffle.shape[0], minibatch_size): Z = [None] * (L + 2) a = [None] * (L + 2) delta = [None] * (L + 2) X = X_train[batche:batche+minibatch_size] Y = Y_shuffle[batche:batche+minibatch_size] ### forward propagation a[0] = np.append(X, np.ones((minibatch_size, 1)), axis=1) for i in range(L): Z[i + 1] = a[i] # W[i] a[i + 1] = np.append(ReLU(Z[i+1]), np.ones((minibatch_size, 1), dtype=int), axis=1) Z[-1] = a[L] # W[L] a[-1] = softmax(Z[-1]) ### back propagation delta[-1] = (Y - a[-1]) * softmax_derivative(Z[-1]) for i in range(L, 0, -1): delta[i] = (delta[i+1] # W[i].T)[:,:-1] * ReLU_derivative(Z[i]) for i in range(len(W)): g = a[i].T # delta[i+1] / minibatch_size W[i] = W[i] + lr * g get_loss_on_test() loss def get_loss_on_test(): Z_test = [None] * (L + 2) a_test = [None] * (L + 2) a_test[0] = np.append(X_test, np.ones((len(X_test), 1)), axis=1) for i in range(L): Z_test[i + 1] = a_test[i] # W[i] a_test[i + 1] = np.append(ReLU(Z_test[i+1]), np.ones((len(X_test), 1)), axis=1) Z_test[-1] = a_test[L] # W[L] a_test[-1] = softmax(Z_test[-1]) losses_test.append(get_loss(Y_test, a_test[-1])) main losses_test.clear() fit() plt.plot(losses_test) plt.show() If you want to see it in my notebook with an example of losses graph, here the link: https://github.com/beurnii/INF8225/blob/master/tp2/jpt.ipynb If you want more details on my assignment, this is part 1b (page 2 for english): https://github.com/beurnii/INF8225/blob/master/tp2/INF8225_TP2_2020.pdf
How can I define a custom kernel function for sklearn.svm.SVC?
I am trying to make a stock prediction system in Python using scikit-learn. Here is my code: import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt from sklearn import svm,preprocessing from sklearn.metrics import precision_recall_fscore_support import pandas as pd import time ##import statistics def my_kernel(X, Y): """ We create a custom kernel: (2 0) k(X, Y) = X ( ) Y.T (0 1) """ M = np.array([[2, 0], [0, 1.0]]) return np.dot(np.dot(X, M), Y.T) FEATURES = ['DE Ratio', 'Trailing P/E', 'Price/Sales', 'Price/Book', 'Profit Margin', 'Operating Margin', 'Return on Assets', 'Return on Equity', 'Revenue Per Share', 'Market Cap', 'Enterprise Value', 'Forward P/E', 'PEG Ratio', 'Enterprise Value/Revenue', 'Enterprise Value/EBITDA', 'Revenue', 'Gross Profit', 'EBITDA', 'Net Income Avl to Common ', 'Diluted EPS', 'Earnings Growth', 'Revenue Growth', 'Total Cash', 'Total Cash Per Share', 'Total Debt', 'Current Ratio', 'Book Value Per Share', 'Cash Flow', 'Beta', 'Held by Insiders', 'Held by Institutions', 'Shares Short (as of', 'Short Ratio', 'Short % of Float', 'Shares Short (prior '] def Build_Data_Set(): data_df = pd.DataFrame.from_csv("key_stats.csv") data_df = data_df.reindex(np.random.permutation(data_df.index)) ##print data_df X = np.array(data_df[FEATURES].values) y = (data_df["Status"] .replace("underperform",0) .replace("outperform",1) .values.tolist()) X = preprocessing.scale(X) X = StandardScaler().fit_transform(X) Z0 = np.array(data_df["stock_p_hancge"]) Z1 = np.array(data_df["sp500_p_change"]) return X,y,Z0,Z1 def mykernel(X, Y,gamma=None): X, Y = check_pairwise_arrays(X, Y) if gamma is None: gamma = 1.0 / X.shape[1] K = euclidean_distances(X, Y, squared=True) k *= -gamma np.exp(K, K) # exponentiate K in-place return safe_sparse_dot(X, Y.T, dense_output=True) + k size = 2094 invest_amount = 10000 total_invests = 0 if_market = 0 if_strat = 0 X, y , Z0,Z1= Build_Data_Set() print(len(X)) test_size = len(X) - size -1 start = time.clock() clf = svm.SVC(kernel="mykernel") clf.fit(X[:size],y[:size]) y_pred = clf.predict(X[size+1:]) y_true = y[size+1:] time_taken = time.clock()-start print time_taken,"Seconds" for x in range(1, test_size+1): if y_pred[-x] == 1: invest_return = invest_amount + (invest_amount * (Z0[-x]/100)) market_return = invest_amount + (invest_amount * (Z1[-x]/100)) total_invests += 1 if_market += market_return if_strat += invest_return print accuracy_score(y_true, y_pred) print precision_recall_fscore_support(y_true, y_pred, average='macro') print "Total Trades:", total_invests print "Ending with Strategy:",if_strat print "Ending with Market:",if_market compared = ((if_strat - if_market) / if_market) * 100.0 do_nothing = total_invests * invest_amount avg_market = ((if_market - do_nothing) / do_nothing) * 100.0 avg_strat = ((if_strat - do_nothing) / do_nothing) * 100.0 print "Compared to market, we earn",str(compared)+"% more" print "Average investment return:", str(avg_strat)+"%" print "Average market return:", str(avg_market)+"%" The predefined kernels are working but for my custom kernel I am getting an error: ValueError: 'mykernel' is not in list According to the official documentation it seems like the code above should work.
You need to pass the kernel function itself as the kernel= parameter rather than just the function name, i.e.: clf = svm.SVC(kernel=mykernel) rather than clf = svm.SVC(kernel="mykernel")