How to develop a function - pandas

I am completely new in python and I am learning it. I have written the following code but i couldnt make any functions of it. Can somebody help me please?
import pandas as pd
import numpy as np
f = open('1.csv', 'r')
df = pd.read_csv(f, usecols=[0], sep="\t", index_col=False)
Primary_List = df.values.tolist()
x = 0
y = len(Primary_List)
for i in range(x, y):
x = i
MyMatrix = Primary_List[x:x + 10]
print(MyMatrix)

You could create a function where you pass in the filename, then you could use this code to read and print many csv files.
def createMatrix(filename):
f = open(filename, 'r')
df = pd.read_csv(f, usecols=[0], sep="\t", index_col=False)
Primary_List = df.values.tolist()
x = 0
y = len(Primary_List)
for i in range(x, y):
x = i
MyMatrix = Primary_List[x:x + 10]
return MyMatrix
print(createMatrix('1.csv'))
print(createMatrix('2.csv'))
print(createMatrix('3.csv'))

Related

python function as cvxpy parameter for dynamic optimization (optimal control)

import numpy as np
def af(a,b):
return np.array([[a,b],[b**2, b]])
np.random.seed(1)
n = 2
m = 2
T = 50
alpha = 0.2
beta = 3
# A = np.eye(n) - alpha * np.random.rand(n, n)
B = np.random.randn(n, m)
x_0 = beta * np.random.randn(n)
import cvxpy as cp
x = cp.Variable((n, T + 1))
u = cp.Variable((m, T))
A = cp.Parameter((2,2))
cost = 0
constr = []
for t in range(T):
cost += cp.sum_squares(x[:, t + 1]) + cp.sum_squares(u[:, t])
A = af(*x[:,t])
constr += [x[:, t + 1] == A # x[:, t] + B # u[:, t], cp.norm(u[:, t], "inf") <= 1]
# sums problem objectives and concatenates constraints.
constr += [x[:, T] == 0, x[:, 0] == x_0]
problem = cp.Problem(cp.Minimize(cost), constr)
problem.solve()
I want to use python function (lambdify function) as cvxpy parameter. I tried this method, please let me know if cvxpy support python function as parameter. thank you.

Plot multiple graphs without using a for loop

So, my question may not be exactly what is in the title.
I have a function
y = a*x + b
And I want to plot y whith different values of b.
I know that I can do the following:
import numpy as np
import matplotlib.pyplot as plt
a = 2
x = np.array([0,1,2,3,4])
b = 0
for i in range(10):
y = a*x + b
b = b+1
plt.plot(x,y)
And that returns exactly what I want.
But, there is someway that I can make this by using
b = np.array([0,1,2,3,4,5,6,7,8,9])? So, then my code could look something like:
import numpy as np
import matplotlib.pyplot as plt
a = 2
x = np.array([0,1,2,3,4])
b = np.array([0,1,2,3,4,5,6,7,8,9])
y = a*x + b
plt.plot(x,y)
Yes, you can use matrix operations to create a 2D matrix with the result of the operation y = a*x + b.
a = 2
x = np.array([0,1,2,3,4])
b = np.array([0,1,2,3,4,5,6,7,8,9])
y = a*x[:,None]+b
plt.plot(x, y)
EDIT: I'm shwing the solution provided by #Quang Hoang which is much simpler than mine.
original code was:
y = np.tile(a*x, (b.size,1)) + b[:,np.newaxis]
plt.plot(x, y.T)

Data-Visualization Python

Plot 4 different line plots for the 4 companies in dataframe open_prices. Year would be on X-axis, stock price on Y axis, you will need (2,2) plot. Set figure size to 10, 8 and share X-axis for better visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nsepy import get_history
import datetime as dt
%matplotlib inline
start = dt.datetime(2015, 1, 1)
end = dt.datetime.today()
infy = get_history(symbol='INFY', start = start, end = end)
infy.index = pd.to_datetime(infy.index)
hdfc = get_history(symbol='HDFC', start = start, end = end)
hdfc.index = pd.to_datetime(hdfc.index)
reliance = get_history(symbol='RELIANCE', start = start, end = end)
reliance.index = pd.to_datetime(reliance.index)
wipro = get_history(symbol='WIPRO', start = start, end = end)
wipro.index = pd.to_datetime(wipro.index)
open_prices = pd.concat([infy['Open'], hdfc['Open'],reliance['Open'],
wipro['Open']], axis = 1)
open_prices.columns = ['Infy', 'Hdfc', 'Reliance', 'Wipro']
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
axes[0, 0].plot(open_prices.index.year,open_prices.INFY)
axes[0, 1].plot(open_prices.index.year,open_prices.HDB)
axes[1, 0].plot(open_prices.index.year,open_prices.TTM)
axes[1, 1].plot(open_prices.index.year,open_prices.WIT)
Blank graph is coming.Please help....?!??
Below code works fine , I have changed the following things
a) axis should be ax b) DF column names were incorrect c) for any one to try this example would also need to install lxml library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nsepy import get_history
import datetime as dt
start = dt.datetime(2015, 1, 1)
end = dt.datetime.today()
infy = get_history(symbol='INFY', start = start, end = end)
infy.index = pd.to_datetime(infy.index)
hdfc = get_history(symbol='HDFC', start = start, end = end)
hdfc.index = pd.to_datetime(hdfc.index)
reliance = get_history(symbol='RELIANCE', start = start, end = end)
reliance.index = pd.to_datetime(reliance.index)
wipro = get_history(symbol='WIPRO', start = start, end = end)
wipro.index = pd.to_datetime(wipro.index)
open_prices = pd.concat([infy['Open'], hdfc['Open'],reliance['Open'],
wipro['Open']], axis = 1)
open_prices.columns = ['Infy', 'Hdfc', 'Reliance', 'Wipro']
print(open_prices.columns)
ax=[]
f, ax = plt.subplots(2, 2, sharey=True)
ax[0,0].plot(open_prices.index.year,open_prices.Infy)
ax[1,0].plot(open_prices.index.year,open_prices.Hdfc)
ax[0,1].plot(open_prices.index.year,open_prices.Reliance)
ax[1,1].plot(open_prices.index.year,open_prices.Wipro)
plt.show()

How to format data in Pandas to allow for the correct representation using Seaborn?

I am trying to generate the heatmap below. I have generated a figure, but I need help formatting the x-axis on the bottom to have the names show up in the appropriate order. Any help is appreciated. Thank You!
Proposed Heatmap
Generated Heatmap
df = pd.read_table('/srv/data/shared/virus_data.txt', header=None)
df.set_index(0, inplace=True)
df_bgs = df.loc[:, df.isna().any(axis=0)].iloc[3:]
df.dropna(axis=1, inplace=True)
df.sort_values(['Treatment', 'Time'], axis=1, inplace=True)
dfn = df.iloc[3:].astype('float')
dfn.index.name = 'Gene'
col_names = df.loc['Time'] + ' ' + df.loc['Treatment'] + 'HR ' + \
df.loc['Replicate']
dfn.columns = col_names
df.columns = col_names
dfn = dfn.sub(df_bgs.mean(axis=1), axis=0)
dfn[dfn<0] = 0
dfn = dfn.div(dfn.loc['HPRT1'], axis=1)
dfn.drop('HPRT1', axis=0, inplace=True)
dfn = np.log2(dfn+0.01)
treatment2select = ['M', 'M+SNV', 'M+ANDV']
df_ec = dfn.loc[:, df.loc['Treatment'].str.contains('EC')]
df_m = dfn.loc[:, df.loc['Treatment'].isin(treatment2select)]
def row_z_score(df):
return df.sub(df.mean(axis=1), axis=0).div(df.std(axis=1), axis=0)
df_ec = row_z_score(df_ec)
df_m = row_z_score(df_m)
uni_treatment = df.loc['Treatment'].unique()
treatment2color = dict(zip(uni_treatment,sns.color_palette(palette="YlGn",
n_colors=len(uni_treatment))))
col_colors = df.loc['Treatment'].map(treatment2color)
g = sns.clustermap(df_m, col_cluster=True,
col_colors = col_colors,
cmap='RdBu_r', method='ward')
g.ax_col_dendrogram.set_visible(False)

Printing the equation of the best fit line

I have created the best fit lines for the dataset using the following code:
fig, ax = plt.subplots()
for dd,KK in DATASET.groupby('Z'):
fit = polyfit(x,y,3)
fit_fn = poly1d(fit)
ax.plot(KK['x'],KK['y'],'o',KK['x'], fit_fn(KK['x']),'k',linewidth=4)
ax.set_xlabel('x')
ax.set_ylabel('y')
The graph displays the best fit line for each group of Z. I want print the equation of the best fit line on top of the line.Please suggest what can i do out here
So you need to write some function that convert a poly parameters array to a latex string, here is an example:
import pylab as pl
import numpy as np
x = np.random.randn(100)
y = 1 + 2 * x + 3 * x * x + np.random.randn(100) * 2
poly = pl.polyfit(x, y, 2)
def poly2latex(poly, variable="x", width=2):
t = ["{0:0.{width}f}"]
t.append(t[-1] + " {variable}")
t.append(t[-1] + "^{1}")
def f():
for i, v in enumerate(reversed(poly)):
idx = i if i < 2 else 2
yield t[idx].format(v, i, variable=variable, width=width)
return "${}$".format("+".join(f()))
pl.plot(x, y, "o", alpha=0.4)
x2 = np.linspace(-2, 2, 100)
y2 = np.polyval(poly, x2)
pl.plot(x2, y2, lw=2, color="r")
pl.text(x2[5], y2[5], poly2latex(poly), fontsize=16)
Here is the output:
Here's a one liner.
If fit is the poly1d object, while plotting the fitted line, just use label argument as bellow,
label='y=${}$'.format(''.join(['{}x^{}'.format(('{:.2f}'.format(j) if j<0 else '+{:.2f}'.format(j)),(len(fit.coef)-i-1)) for i,j in enumerate(fit.coef)]))