How to color a plot lines based on amplitude - matplotlib

So I want to change the color of the blue vlines(matplotlib) in the above plot.
First I want to make the negative(< 0) values different color and take their absolute so that only amplitude is visible but they will be a different color than the negative ones. Positive values could remain unchanged.
minimum reproducible code as below:
import numpy as np
import random
import matplotlib.pyplot as plt
peakmzs = np.array([random.uniform(506, 2000) for i in range(2080)])
peakmzs = peakmzs[peakmzs.argsort()[::1]]
spec = np.zeros_like(peakmzs)
b = np.where((peakmzs > 1500) & (peakmzs < 1540))[0]
spec[b] = [random.uniform(0, 0.002) for i in range(len(b))]
b = np.where((peakmzs > 700) & (peakmzs < 820))[0]
spec[b] = [random.uniform(0, 0.05) for i in range(len(b))]
spec[300:302] = 0.07
b = np.where((peakmzs > 600) & (peakmzs < 650))[0]
spec[b] = [random.uniform(0, 0.03) for i in range(len(b))]
plt.vlines(peakmzs, spec, ymax=spec.max())
plt.show()
shp_values = np.zeros_like(peakmzs)
b = np.where((peakmzs > 1500) & (peakmzs < 1540))[0]
b_ = np.random.randint(1500, 1540, 10)
# print(b_)
shp_values[b] = [random.uniform(-0.003, 0.002) for i in range(len(b))]
shp_values[b_] = 0
b = np.where((peakmzs > 700) & (peakmzs < 820))[0]
shp_values[b] = [random.uniform(-0.004, 0.002) for i in range(len(b))]
b_ = np.random.randint(700, 820, 70)
shp_values[b_] = 0
# [random.uniform(-0.005, 0.003) for i in range(len(peakmzs))]
plt.plot(shp_values)

Based on the suggestion from #JohanC,
demo_shp = np.array(shapvalues[0][19])
colors = np.where(demo_shp < 0, 'cyan', 'pink')
plt.vlines(peakmzs, ymin=[0], ymax=demo_shp, colors=colors)
plt.show()

Related

centre the peak at x=0

Right now the rectangle signal is centre on x = 4, how can I make it centre on x = 0
def rect(n,T):
a = np.zeros(int((n-T)/2,))
b = np.ones((T,))
c= np.zeros(int((n-T)/2,))
a1 = np.append(a,b)
a2 = np.append(a1,c)
return a2
x =rect(11,6)
plt.step(x, 'r')
plt.show()
This is so far that I wrote. Appreciate anyone can give the Idea
A method to center the rectangle at x=0 is to provide x values to plt.step. One way to accomplish this is to use numpy arange and center the x values around 0 by using the length of a2 returned in the rects function
# Changed to y because it will be our y values in plt.step
y = rect(11, 6)
# Add 0.5 so it's centered
x = np.arange(-len(y)/2 + 0.5, len(y)/2 + 0.5)
And then plot it using plt.step and setting where to mid (more info in the plt.step docs):
plt.step(x, y, where='mid', color='r')
Hope this helps. Here is the full code:
import numpy as np
import matplotlib.pyplot as plt
def rect(n, T):
a = np.zeros(int((n-T)/2,))
b = np.ones((T,))
c = np.zeros(int((n-T)/2,))
a1 = np.append(a, b)
a2 = np.append(a1, c)
return a2
y = rect(11, 6)
# Add 0.5 so it's centered
x = np.arange(-len(y)/2 + 0.5, len(y)/2 + 0.5)
plt.step(x, y, where='mid', color='r')
plt.show()

How to add new columns to pandas data frame using .loc

I am calculating very simple daily stock calculations in data frame ( for e.g. SMA, VWAP, RSI etc). After I upgraded to anaconda 3.0, my code stopped working and gives followed error. I don't have much experience in coding and need some help.
KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Index(['RSI', 'ZONE'], dtype='object'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"
Followed is the code.
import yfinance as yf
import pandas as pd
def convert_to_dataframe_daily(data):
window = 10
window20 = 20
window50 = 50
window100 = 100
window200 = 200
ema_time = 8
#data = yf.download("googl", period="30d", interval="5m")
#data = yf.download('TSLA', period='30d', interval='5m')
pd.set_option('display.max_columns', None)
#calculation for VWAP
volumeC = data['Volume']
priceC = data['Close']
df = data.assign(VWAP=((volumeC * priceC).cumsum() / volumeC.cumsum()).ffill())
#Convert the timezone to Chicago central
#df.index = pd.DatetimeIndex(df.index.tz_convert('US/Central')) # aware--> aware
#reset the dataframe index and separate time
df.reset_index(inplace=True)
#df.index.intersection
#df2 = df[df.index.isin(dts)]
#df['Date'] = pd.to_datetime(df['Datetime']).dt.date
#df['Time'] = pd.to_datetime(df['Datetime']).dt.time
# calculate stochastic
df['low5']= df['Low'].rolling(5).min()
df['high5']= df['High'].rolling(5).max()
#k = 100 * (c - l) / (h - l)
df['K'] = (df['Close']-df['low5'])/(df['high5']-df['low5'])
#s.reindex([1, 2, 3])
columns = df.columns.values.tolist()
#df[columns[index]]
#df = pd.DataFrame(np.random.randn(8, 4),index=dates, columns=['A', 'B', 'C', 'D'])
df = df.loc[:, ('Date','Open','High','Low', 'Close','Volume','VWAP','K','RSI', 'ZONE')]
#df = df.reindex(['Date','Open','High','Low', 'Close','Volume','VWAP','K','RSI', 'ZONE'])
df['RSI'] = calculate_rsi(df)
filter_Z1 = df['K'] <=0.1
filter_Z2 = (df['K'] > 0.1) & (df['K'] <= 0.2)
filter_Z3 = (df['K'] > 0.2) & (df['K'] <= 0.3)
filter_Z4 = (df['K'] > 0.3) & (df['K'] <= 0.4)
filter_Z5 = (df['K'] > 0.4) & (df['K'] <= 0.5)
filter_Z6 = (df['K'] > 0.5) & (df['K'] <= 0.6)
filter_Z7 = (df['K'] > 0.6) & (df['K'] <= 0.7)
filter_Z8 = (df['K'] > 0.7) & (df['K'] <= 0.8)
filter_Z9 = (df['K'] > 0.8) & (df['K'] <= 0.9)
filter_Z10 = (df['K'] > 0.9) & (df['K'] <= 1)
#plug in stochastic zones
df['ZONE'].where(-filter_Z1, 'Z1', inplace=True)
df['ZONE'].where(-filter_Z2, 'Z2', inplace=True)
df['ZONE'].where(-filter_Z3, 'Z3', inplace=True)
df['ZONE'].where(-filter_Z4, 'Z4', inplace=True)
df['ZONE'].where(-filter_Z5, 'Z5', inplace=True)
df['ZONE'].where(-filter_Z6, 'Z6', inplace=True)
df['ZONE'].where(-filter_Z7, 'Z7', inplace=True)
df['ZONE'].where(-filter_Z8, 'Z9', inplace=True)
df['ZONE'].where(-filter_Z9, 'Z9', inplace=True)
df['ZONE'].where(-filter_Z10, 'Z10', inplace=True)
df = df['Date','Open','High','Low', 'Close','Volume','VWAP','K','RSI', 'ZONE']
return df
data = yf.download('ba', period='500d', interval='1d')
df = convert_to_dataframe_daily(data)
print(df)
A few lines need to be tweaked
Instead of
df = df.loc[:, ('Date','Open','High','Low', 'Close','Volume','VWAP','K','RSI', 'ZONE')]
use
df = df[['Date','Open','High','Low', 'Close','Volume','VWAP','K']]
before
df['ZONE'].where(-filter_Z1, 'Z1', inplace=True)
...
put a line
df['ZONE'] = 0
The line before return df should be changed to
df = df[['Date','Open','High','Low', 'Close','Volume','VWAP','K','RSI', 'ZONE']]

How to set 'y > 0' formula in set_xlim of matplotlib?

I want to set x range according to y value in plotting graph such as y > 0 but I'm not sure how to set this one. Could you let me know how to set it?
df = pd.read_csv(file.csv)
x = np.array(df1['A'])
y = np.array(df1['B'])
z = np.array(df1['C'])
x_for_ax1 = np.ma.masked_where((y < 0) | (y > 100), x)
fig, (ax2, ax1) = plt.subplots(ncols=1, nrows=2)
# range of ax1.set_xlim and ax1.set_xlim is same.
ax1.set_ylim([-10, 40])
ax2.set_ylim([-5, 5])
ax1.set_xlim([x_for_ax1.min(), x_for_ax1.max()])
ax2.set_xlim([x_for_ax1.min(), x_for_ax1.max()])
If you want to set the x-limits to the range of the y-axis, you can use a masked array and get its minimum and maximum.
In the example below, at the left both subplots get the x-limits where either y or z are in range. At the right, each subplot only gets the x-range where its corresponding y is in range.
For demonstration purposes, the example creates a data frame from some dummy data.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
a = np.linspace(-1, 4, 500)
b = np.sin(a) * 100
c = np.cos(a) * 150
df = pd.DataFrame({'A': a, 'B': b, 'C': c})
x = np.array(df['A'])
y = np.array(df['B'])
z = np.array(df['C'])
fig, ((ax1, ax3),(ax2, ax4)) = plt.subplots(ncols=2, nrows=2)
ax1.set_xlabel('x')
ax2.set_xlabel('x')
ax3.set_xlabel('x')
ax4.set_xlabel('x')
ax1.set_ylabel('y')
ax3.set_ylabel('y')
ax2.set_ylabel('z')
ax4.set_ylabel('z')
ymin = 1
ymax = 100
zmin = 1
zmax = 150
x_for_ax1 = np.ma.masked_where(((y < ymin) | (y > ymax)) & ((z < zmin) | (z > zmax)), x)
x_for_ax3 = np.ma.masked_where((y < ymin) | (y > ymax), x)
x_for_ax4 = np.ma.masked_where((z < zmin) | (z > zmax), x)
ax1.plot(x, y)
ax3.plot(x, y)
ax1.set_ylim([ymin, ymax])
ax3.set_ylim([ymin, ymax])
ax2.plot(x, z)
ax4.plot(x, z)
ax2.set_ylim([zmin, zmax])
ax4.set_ylim([zmin, zmax])
ax1.set_xlim([x_for_ax1.min(), x_for_ax1.max()])
ax2.set_xlim([x_for_ax1.min(), x_for_ax1.max()])
ax1.set_title('x limited to y and z range')
ax2.set_title('x limited to y and z range')
ax3.set_xlim([x_for_ax3.min(), x_for_ax3.max()])
ax3.set_title('x limited to y range')
ax4.set_xlim([x_for_ax4.min(), x_for_ax4.max()])
ax4.set_title('x limited to z range')
plt.tight_layout(w_pad=1)
plt.show()

Data-Visualization Python

Plot 4 different line plots for the 4 companies in dataframe open_prices. Year would be on X-axis, stock price on Y axis, you will need (2,2) plot. Set figure size to 10, 8 and share X-axis for better visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nsepy import get_history
import datetime as dt
%matplotlib inline
start = dt.datetime(2015, 1, 1)
end = dt.datetime.today()
infy = get_history(symbol='INFY', start = start, end = end)
infy.index = pd.to_datetime(infy.index)
hdfc = get_history(symbol='HDFC', start = start, end = end)
hdfc.index = pd.to_datetime(hdfc.index)
reliance = get_history(symbol='RELIANCE', start = start, end = end)
reliance.index = pd.to_datetime(reliance.index)
wipro = get_history(symbol='WIPRO', start = start, end = end)
wipro.index = pd.to_datetime(wipro.index)
open_prices = pd.concat([infy['Open'], hdfc['Open'],reliance['Open'],
wipro['Open']], axis = 1)
open_prices.columns = ['Infy', 'Hdfc', 'Reliance', 'Wipro']
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
axes[0, 0].plot(open_prices.index.year,open_prices.INFY)
axes[0, 1].plot(open_prices.index.year,open_prices.HDB)
axes[1, 0].plot(open_prices.index.year,open_prices.TTM)
axes[1, 1].plot(open_prices.index.year,open_prices.WIT)
Blank graph is coming.Please help....?!??
Below code works fine , I have changed the following things
a) axis should be ax b) DF column names were incorrect c) for any one to try this example would also need to install lxml library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nsepy import get_history
import datetime as dt
start = dt.datetime(2015, 1, 1)
end = dt.datetime.today()
infy = get_history(symbol='INFY', start = start, end = end)
infy.index = pd.to_datetime(infy.index)
hdfc = get_history(symbol='HDFC', start = start, end = end)
hdfc.index = pd.to_datetime(hdfc.index)
reliance = get_history(symbol='RELIANCE', start = start, end = end)
reliance.index = pd.to_datetime(reliance.index)
wipro = get_history(symbol='WIPRO', start = start, end = end)
wipro.index = pd.to_datetime(wipro.index)
open_prices = pd.concat([infy['Open'], hdfc['Open'],reliance['Open'],
wipro['Open']], axis = 1)
open_prices.columns = ['Infy', 'Hdfc', 'Reliance', 'Wipro']
print(open_prices.columns)
ax=[]
f, ax = plt.subplots(2, 2, sharey=True)
ax[0,0].plot(open_prices.index.year,open_prices.Infy)
ax[1,0].plot(open_prices.index.year,open_prices.Hdfc)
ax[0,1].plot(open_prices.index.year,open_prices.Reliance)
ax[1,1].plot(open_prices.index.year,open_prices.Wipro)
plt.show()

Binning data and plotting

I have a dataframe of essentially random numbers, (except for one column), some of which are NaNs. MWE:
import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
randomNumberGenerator = np.random.RandomState(1000)
z = 5 * randomNumberGenerator.rand(101)
A = 4 * z - 3+ randomNumberGenerator.randn(101)
B = 4 * z - 2+ randomNumberGenerator.randn(101)
C = 4 * z - 1+ randomNumberGenerator.randn(101)
D = 4 * z - 4+ randomNumberGenerator.randn(101)
A[50] = np.nan
A[:3] = np.nan
B[12:20] = np.nan
sources= pd.DataFrame({'z': z})
sources['A'] = A
sources['B'] = B
sources['C'] = C
sources['D'] = D
#sources= sources.dropna()
x = sources.z
y1 = sources.A
y2 = sources.B
y3 = sources.C
y4 = sources.D
for i in [y1, y2, y3, y4]:
count = np.count_nonzero(~np.logical_or(np.isnan(x), np.isnan(i)))
label = 'Points plotted: %d'%count
plt.scatter(x, i, label = label)
plt.legend()
I need to bin the data according to x and plot different columns in each bin, in 3 side-by-side subplots:
x_1 <= 1 plot A-B | 1 < x_2 < 3 plot B+C | 3 < x_3 plot C-D
I've tried to bin the data with
x1 = sources[sources['z']<1] # z < 1
x2 = sources[sources['z']<3]
x2 = x2[x2['z']>=1] # 1<= z < 3
x3 = sources[sources['z']<max(z)]
x3 = x3[x3['z']>=3] # 3 <= z <= max(z)
x1 = x1['z']
x2 = x2['z']
x3 = x3['z']
but there's got to be a better way to go about it. What's the best way to produce something like this?
For binning in pandas is used cut, so solution is:
sources= pd.DataFrame({'z': z})
sources['A'] = A
sources['B'] = B
sources['C'] = C
sources['D'] = D
#sources= sources.dropna()
bins = pd.cut(sources['z'], [-np.inf, 1, 3, max(z)], labels=[1,2,3])
m1 = bins == 1
m2 = bins == 2
m3 = bins == 3
x11 = sources.loc[m1, 'A']
x12 = sources.loc[m1, 'B']
x21 = sources.loc[m2, 'B']
x22 = sources.loc[m2, 'C']
x31 = sources.loc[m3, 'C']
x32 = sources.loc[m3, 'D']
y11 = sources.loc[m1, 'A']
y12 = sources.loc[m1, 'B']
y21 = sources.loc[m2, 'B']
y22 = sources.loc[m2, 'C']
y31 = sources.loc[m3, 'C']
y32 = sources.loc[m3, 'D']
tups = [(x11, x12, y11, y12), (x21, x22,y21, y22),(x31, x32, y31, y32)]
fig, ax = plt.subplots(1,3)
ax = ax.flatten()
for k, (i1, i2, j1, j2) in enumerate(tups):
count1 = np.count_nonzero(~np.logical_or(np.isnan(i1), np.isnan(j1)))
count2 = np.count_nonzero(~np.logical_or(np.isnan(i2), np.isnan(j2)))
label1 = 'Points plotted: %d'%count1
label2 = 'Points plotted: %d'%count2
ax[k].scatter(i1, j1, label = label1)
ax[k].scatter(i2, j2, label = label2)
ax[k].legend()