Multiplying a specific cell value based on if statement in a Pandas dataframe - pandas

So I am trying to associate a specific value ('Property Damage') with every row in my dataset but I am having some trouble with this. Specifically, I want to multiply the value in the 'MD' column for each row by a number (0.02, 0.15, etc.) if it meets the conditions specified in the for loop (e.g. if i >= 0.8062, print etc.). I have included my code below:
df['RAND'] = np.random.uniform(0, 1, size=df.index.size)
dfRAND = list(df['RAND'])
def sim_1():
for i in dfRAND:
result = []
if i >= 0.8062:
df['Property Damage'] = df['MD'].apply(lambda x: x * 0.02)
print(list(val for x, val in enumerate(df['Count']) if
x == dfRAND.index(i)), 'LF0', i,':', df['Property Damage'])
elif 0.01 <= i < 0.89062:
df['Property Damage'] = list(df['MD'].apply(lambda x: x * 0.15))
print(list(val for x, val in enumerate(df['Count']) if
x == dfRAND.index(i)),'LF1', i, ':', df['Property Damage'])
elif 0.05 <= i < 0.01:
df['Property Damage'] = list(df['MD'].apply(lambda x: x * 0.20))
print(list(val for x, val in enumerate(df['Count']) if
x == dfRAND.index(i)),'LF2', i,':', df['Property Damage'])
elif 0.025 <= i < 0.05:
df['Property Damage'] = list(df['MD'].apply(lambda x: x * 0.50))
print(list(val for x, val in enumerate(df['Count']) if
x == dfRAND.index(i)),'LF3', i,':', df['Property Damage'])
elif 0.0125 <= i < 0.025:
df['Property Damage'] = list(df['MD'].apply(lambda x: x * 1))
print(list(val for x, val in enumerate(df['Count']) if
x == dfRAND.index(i)),'LF4', i,':', df['Property Damage'])
elif 0.0063 <= i < 0.0125:
df['Property Damage'] = list(df['MD'].apply(lambda x: x * 1))
print(list(val for x, val in enumerate(df['Count']) if
x == dfRAND.index(i)),'LF5', i,':', df['Property Damage'])
The problem I am having at the moment is that the code prints all the 'Property Damage' values for each row. I want it to give me the 'Property Damage' value for a specific row based on whichever condition is met in the for loop.
Any help is appreciated. Thanks in advance.

Are you looking for something like this?
my_bins = {pd.Series.max(df['RAND'])-1: 1,
.01: .15,
.0125: 1,
.025: .5,
.05: .2,
pd.Series.max(df['RAND'])+1 : .02}
df['rand_multiplier'] = pd.cut(df['RAND'], bins = sorted(my_bins.keys()), labels = list(range(len(my_bins) - 1))).apply(lambda x: my_bins[sorted(my_bins.keys())[x]])
df.apply(lambda row: row['MD'] * row['rand_multiplier'], axis = 1)
I'm in a bit of a hurry so it's not the prettiest thing. Basically I created bins based on the criteria you had and created a "multiplier" column which associates each entry in df['RAND'] with a multiplying factor. Then we can iterate over df and apply the multiplying factor to your 'MD' row.
Of course, I can't show the produced results without the 'MD' data.

Related

SMOTE adds many rows with 0 values to dataframe

Please help me, i cannot understand why X_synthetic_df returns hundreds of rows with 0 values. All of the rows have normal values fine until row 1745. From that row, all the other row values contain nothing but zeros
def nearest_neighbors(nominal_columns, numeric_columns, df, row, k):
def distance(row1, row2):
distance = 0
for col in nominal_columns:
if row1[col] != row2[col]:
distance += 1
for col in numeric_columns:
distance += (row1[col] - row2[col])**2
return distance**0.5
distances = []
for i in range(len(df)):
r = df.iloc[i]
if r.equals(row):
continue
d = distance(row, r)
if(d!=0):
distances.append((d, i))
distances.sort()
nearest = [i for d, i in distances[:k]]
return nearest
def smotenc(X, y, nominal_cols, numeric_cols, k=5, seed=None):
minority_class = y[y==1]
majority_class = y[y==0]
minority_samples = X[y == 1]
minority_target = y[y == 1]
n_synthetic_samples = len(majority_class)-len(minority_class)
synthetic_samples = np.zeros((n_synthetic_samples, X.shape[1]))
if seed is not None:
np.random.seed(seed)
for i in range(len(minority_samples)):
nn = nearest_neighbors(nominal_cols, numeric_cols, minority_samples, minority_samples.iloc[i], k=k)
for j in range(min(k, n_synthetic_samples - i*k)):
nn_idx = int(np.random.choice(a=nn))
diff = minority_samples.iloc[(nn_idx)] - minority_samples.iloc[i]
print(diff)
if (diff == 0).all():
continue
synthetic_sample = minority_samples.iloc[i] + np.random.rand() * diff
synthetic_samples[(i*k)+j, :] = synthetic_sample
X_resampled = pd.concat([X[y == 1], pd.DataFrame(synthetic_samples,columns=X.columns)], axis=0)
y_resampled = np.concatenate((y[y == 1], [1] * n_synthetic_samples))
return X_resampled, y_resampled
minority_features = df_nominal.columns.get_indexer(df_nominal.columns)
synthetic = smotenc(check_x.head(3000),check_y.head(3000),nominal_cols,numeric_cols,seed = None)
X_synthetic_df = synthetic[0]
X_synthetic_df = pd.DataFrame(X_synthetic_df,columns = X.columns)
I was a dataframe with n synthetic samples, where n is the difference between the majority samples and minority class samples

Z3 ArithRef type: is there a way to show value once model evaluated?

Using Z3Py, once a model has been checked for an optimization problem, is there a way to convert ArithRef expressions into values?
Such as
y = If(x > 5, 0, 0.5 * x)
Once values have been found for x, can I get the evaluated value for y, without having to calculate again based on the given values for x?
Many thanks.
You need to evaluate, but it can be done by the model for you automatically:
from z3 import *
x = Real('x')
y = If(x > 5, 0, 0.5 * x)
s = Solver()
r = s.check()
if r == sat:
m = s.model();
print("x =", m.eval(x, model_completion=True))
print("y =", m.eval(y, model_completion=True))
else:
print("Solver said:", r)
This prints:
x = 0
y = 0
Note that we used the parameter model_completion=True since there are no constraints to force x (and consequently y) to any value in this model. If you have sufficient constraints added, you wouldn't need that parameter. (Of course, having it does not hurt.)

apply my created function to a data frame

I have the following function to calculate the black scholes model,
where I paste the necessary data from S0, K , K , T , r
and market_price in the function manually.
I would like to apply this same function to a pandas data frame
There I have the values ​​needed to perform the calculation
Data frame example
data = {'Name':['BOVAE115', 'BOVAE119', 'BBDCE251', 'BBDCE246'],
'Valor':[110.050003, 110.050003, 19.500000, 19.500000],
'Strike Value':[15.00, 19.00, 24.67, 25.19],
'Temp':[0.119048, 0.119048, 0.119048, 0.119048],
'Taxa':[11.65, 11.65, 11.65, 11.65],
'Market Price':[0.391968, 0.391968, 0.391968, 0.391968],
'Order':['c','c','c','c']
}
# Create DataFrame
df = pd.DataFrame(data)
df
How do I apply the created function to this list of values
Function
See the code
S0 = df['Valor']
K = df['Strike Value']
T = df['Temp']
r = df['Taxa']
market_price = df['Price']
flag = df['Order']
from py_vollib.black_scholes import black_scholes as bs
from py_vollib.black_scholes.greeks.analytical import vega
def implied_vol(S0, K, T, r, market_price, flag='c', tol=0.00001):
#"""Calculating the implied volatility of an European option
# S0: stock price
# K: strike price
# T: time to maturity
# r: risk-free rate
# market_price: option price in market
#"""
max_iter = 500 #max no. of iterations
vol_old = 0.3 #initial guess
for k in range(max_iter):
bs_price = bs(flag, S0, K, T, r, vol_old)
Cprime = vega(flag, S0, K, T, r, vol_old)*100
C = bs_price - market_price
vol_new = vol_old - C/Cprime
new_bs_price = bs(flag, S0, K, T, r, vol_new)
if (abs(vol_old-vol_new) < tol or abs(new_bs_price-market_price) < tol):
break
vol_old = vol_new
implied_vol = vol_new
return implied_vol
S0 = 14.73
K = 15.04
T = 20/252
r = 0.1165
market_price = 0.41
print(implied_vol(S0, K, T, r, market_price)*100)
I wanted to return the implied vol value in a data frame column
How can I apply this function in my dataframe

My tensorflow code does not work good -bank analysis

I'm studding tensorflow and I wrote some code but it does not work good.
the data was downloaded from uci:http://archive.ics.uci.edu/ml/datasets/Bank+Marketing
I want to find the client who will subscribe a term deposit,but the result matched is 0.
I use 3 layers neural network and sigmod for output.
My code is like this,please help me.
hidden_layer1 = 200
hidden_layer2 = 200
x = tf.placeholder(tf.float32,[None,16])
y = tf.placeholder(tf.float32,[None,1])
Weights_L1 = tf.Variable(tf.random_normal([16,hidden_layer1]))
biases_L1 = tf.Variable(tf.random_normal([1,hidden_layer1]))
Wx_plus_b_L1 = tf.matmul(x,Weights_L1) + biases_L1
L1=tf.nn.relu(Wx_plus_b_L1)
Weights_L2 = tf.Variable( tf.random_normal([hidden_layer1,1]))
biases_L2 = tf.Variable( tf.random_normal([1,1]))
Wx_plus_b_L2 = tf.matmul(L1,Weights_L2) + biases_L2
pred = tf.nn.sigmoid(Wx_plus_b_L2)
loss = tf.reduce_mean(tf.square(y-pred))
learning_rate=0.05
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
pred_correct = tf.equal(y,pred)
accuracy = tf.reduce_mean(tf.cast(pred_correct,tf.float32))
batch_num = 0
with tf.Session() as ss:
ss.run(tf.global_variables_initializer())
for step in range(500):
ss.run(train_step,feed_dict={x:bank_train_x,y:bank_train_y})
if step%100==0:
batch_num = batch_num +1
acc1 = ss.run(accuracy,feed_dict={x:bank_train_x,y:bank_train_y})
print("train acc"+ str(step) + ", " + str(acc1) +" , batch_num:" + str(batch_num))
#print(ss.run(learning_rate,feed_dict={global_:step}))
p = ss.run(pred,feed_dict={x:bank_train_x,y:bank_train_y})
acc2 = ss.run(accuracy,feed_dict={x:bank_test_x,y:bank_test_y})
print("test acc" + str(acc2))
def calc(pred,y):
l = y.shape[0]
a = 0
b=0
c=0
d=0
for i in range(l):
if (p[i] >0.5 and y[i] == 0):
a = a +1
elif (p[i] >0.5 and y[i] == 1):
b = b+1
elif (p[i] <0.5 and y[i] == 0):
c = c+1
elif (p[i] <0.5 and y[i] == 1):
d = d +1
print(a,b,c,d)
calc(p,bank_train_y)
#the result is 169 0 34959 4629

Multiple if-else conditions in tensorflow

I have a float tensor with shape (1) whose value lies between 0.0 and 1.0.
I want to 'bin' the range in this tensor, as in:
if 0.0 < x < 0.2:
return tf.Constant([0])
if 0.2 < x < 0.4:
return tf.Constant([1])
if 0.4 < x < 0.6:
return tf.Constant([2])
if 0.6 < x:
return tf.Constant([3])
No idea how to do it!
You have not explained what will happen in the border points (0.2, 0.4, ...) and have not shown what do you want to output for x > 0.6, so my assumptions are:
closed open interval; a < x <= b
the same bin procedure continues till 1 with a step 0.2
For such a simple case you do not need if else condition (also it will be slow). You can achieve it with math and casting:
import tensorflow as tf
x = tf.constant(0.25)
res = tf.cast(5 * x, tf.int32)
with tf.Session() as sess:
print sess.run(res)
try tg.logical_and
the following example might help
b = tf.constant([5,2,-3,1])
c1 = tf.greater(b,0) # b>0
c2 = tf.less(b,5) # b<5
c_f = tf.logical_and(c1, c2) # 0 < b < 5
sess=tf.Session()
sess.run(c_f)