controlling the number of x ticks in pyplot - matplotlib

I want to display all 13 x ticks, but the graph only shows 7 of them having two intervals.
plt.locator_params(axis='x',nbins=13)
Why doesn't above code work??
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as dates
y = [0, 0.86, 0.826, 0.816, 0.807, 0.803, 0.804, 0.803, 0.802,0.81, 0.813, 0.813, 0.813]
times = pd.date_range('2015-02-25', periods=13)
fig, ax = plt.subplots(1)
fig.autofmt_xdate()
xfmt = dates.DateFormatter('%d-%m-%y')
ax.xaxis.set_major_formatter(xfmt)
plt.locator_params(axis='x',nbins=13)
ax.plot_date(times.to_pydatetime(), y, 'v-')
ax.xaxis.set_minor_locator(dates.WeekdayLocator(byweekday=(1),
interval=1))
ax.xaxis.set_minor_formatter(dates.DateFormatter('%d\n%a'))
ax.xaxis.grid(True, which="minor")
ax.yaxis.grid()
plt.tight_layout()
plt.show()

The warning should give you some clue why this is happening:
UserWarning: 'set_params()' not defined for locator of type <class 'pandas.tseries.converter.PandasAutoDateLocator'>
str(type(self)))
Use plt.xticks(times.to_pydatetime()) instead:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as dates
y = [0, 0.86, 0.826, 0.816, 0.807, 0.803, 0.804, 0.803, 0.802,0.81, 0.813, 0.813, 0.813]
times = pd.date_range('2015-02-25', periods=13)
fig, ax = plt.subplots(1)
fig.autofmt_xdate()
xfmt = dates.DateFormatter('%d-%m-%y')
ax.xaxis.set_major_formatter(xfmt)
ax.plot_date(times.to_pydatetime(), y, 'v-')
ax.xaxis.set_minor_locator(dates.WeekdayLocator(byweekday=(1),
interval=1))
plt.xticks(times.to_pydatetime())
ax.xaxis.set_minor_formatter(dates.DateFormatter('%d\n%a'))
ax.xaxis.grid(True, which="minor")
ax.yaxis.grid()
plt.tight_layout()
plt.show()

Related

How to annotate in 2 decimal places using Matplotlib

I am trying to create a heatmap displaying correlation coefficient values. I'm quite new at this, but the code below would annotate in multiple decimal places, whereas i'm trying to narrow down to 2 d.p.
Does anyone have experience with this?
import pandas_datareader.data as web
import pandas as pd
import datetime as dt
import csv
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
import seaborn as sns
style.use('ggplot')
def visualize_data():
df = pd.read_csv('sti_joined.csv')
df.set_index('Date', inplace=True)
df_corr = df.pct_change().corr()
print(df_corr.head())
data = df_corr.values
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
# heatmap = ax.pcolor(data, cmap=plt.cm.get_cmap('RdYlGn'))
heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn)
fig.colorbar(heatmap)
ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False)
ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
for y in range(data.shape[0]):
for x in range(data.shape[1]):
plt.text(x + 0.5, y + 0.5, '%.4f' % data[y, x],
horizontalalignment='center',
verticalalignment='center',
)
column_labels = df_corr.columns
row_labels = df_corr.index
ax.set_xticklabels(column_labels)
ax.set_yticklabels(row_labels)
plt.xticks(rotation=90)
heatmap.set_clim(-1,1)
plt.tight_layout()
plt.show()
visualize_data()
Instead of '%.4f' % data[y, x], you can try using something like
'{0:.2f}'.format(data[y,x])

How to plot multiple graphs stacked above each other

I need to plot a set of 9 or more data sets with a common x-axis. I was able to do it for 2 of them but the rest of them just don't appear. They have to be stacked one above the other. with a common x axis. I have attached the image of what I have been able to do so far.
stack of plot
I have used the following code
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import matplotlib.gridspec as gridspec
from matplotlib.lines import Line2D
import matplotlib.lines as mlines
file1 = '1.dat'
file2 = '10.dat'
data1 = pd.read_csv(file1, delimiter='\s+', header=None, engine='python')
data1.columns = ['M','B','C']
data2 = pd.read_csv(file2, delimiter='\s+', header=None, engine='python')
data2.columns = ['N','A','D']
def fit_data():
fig = plt.figure(1,figsize=(12,11))
ax1= fig.add_subplot(211,)
ax1.plot(data1['M'], data1['B'], color='cornflowerblue', linestyle= '-', lw=0.5)
ax1.scatter(data1['M'], data1['B'], marker='o', color='red', s=25)
ax1.errorbar(data1['M'], data1['B'], data1['C'], fmt='.', ecolor='red',color='red', elinewidth=1,capsize=3)
ax2 = fig.add_subplot(211, sharex=ax1 )
ax2.plot(data2['N'], data2['A'], color='cornflowerblue', linestyle= '-', lw=0.5)
ax2.scatter(data2['N'], data2['A'], marker='o', color='blue', s=25)
ax2.errorbar(data2['N'], data2['A'], data2['D'], fmt='.', ecolor='blue',color='blue', elinewidth=1,capsize=3)
plt.setp(ax1.get_xticklabels(), visible=False) # hide labels
fig.subplots_adjust(hspace=0)
ax1.tick_params(axis='both',which='minor',length=5,width=2,labelsize=18)
ax1.tick_params(axis='both',which='major',length=8,width=2,labelsize=18)
plt.savefig("1.pdf")
#fig.set_size_inches(w=13,h=10)
plt.show()
plt.close()
fit_data()
I read through stacking of plots but wasn't able to apply the same here.
I modified the code to this but this is what I get. modified code.
I need the stacking to be done to do a comparative study. Something like this image. comparative study
This is the part of the code I have modified and used.
plt.setp(ax1.get_xticklabels(), visible=False) # hide labels
fig.subplots_adjust(hspace=0.0) # remove vertical space between subplots
Should it be done seperately for ax1, ax2 and so on?
plt.subplots_adjust(hspace=0.0) removes the space between them.
You can have as many plots as you want:
from matplotlib import pyplot as plt
import numpy as np
numer_of_plots = 9
X = np.random.random((numer_of_plots, 50))
fig, axs = plt.subplots(nrows=numer_of_plots, ncols=1)
for ax, x in zip(axs, X):
ax.plot(range(50), x)
plt.subplots_adjust(hspace=0.0)
plt.show()

How to add droplines to a seaborn scatterplot?

Using the following example code in a Jupyter notebook:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
df = pd.DataFrame(np.random.rand(5, 2), columns=['a', 'b'])
sns.set()
g = sns.relplot(data=df, x='a', y='b', kind='scatter');
g.set(xlim=(0, 1))
g.set(ylim=(0, 1));
The resulting plot shows the data points, but I would also like to have vertical drop lines and occasionally horizontal ones as well. To clarify what I mean by droplines, here is a mockup of the actual vs. the desired output:
Update: A little more complex input that makes it harder to manually draw the lines:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
df = pd.DataFrame(np.random.rand(20, 3), columns=['a', 'b', 'c'])
df['d'] = ['apples', 'bananas', 'cherries', 'dates'] * 5
sns.set()
g = sns.relplot(data=df, x='a', y='b', hue='c', col='d', col_wrap=2, kind='scatter');
g.set(xlim=(0, 1))
g.set(ylim=(0, 1));
There are several ways to plot vertical/horizontal lines. One of the is to use hlines or vlines. This can be done using a loop for sake of ease.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np; np.random.seed(121)
fig, ax = plt.subplots()
df = pd.DataFrame(np.random.rand(5, 2), columns=['a', 'b'])
sns.set()
g = sns.relplot(data=df, x='a', y='b', kind='scatter', color='blue', ax=ax);
for x, y in zip(df['a'], df['b']):
ax.hlines(y, 0, x, color='blue')
ax.vlines(x, 0, y, color='blue')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.close(g.fig)

How can I draw scatter trend line on matplot? Python-Pandas

I want to draw a scatter trend line on matplot. How can I do that?
Python
import pandas as pd
import matplotlib.pyplot as plt
csv = pd.read_csv('/tmp/test.csv')
data = csv[['fee', 'time']]
x = data['fee']
y = data['time']
plt.scatter(x, y)
plt.show()
CSV
fee,time
100,650
90,700
80,860
70,800
60,1000
50,1200
time is integer value.
Scatter chart
I'm sorry I found the answer by myself.
How to add trendline in python matplotlib dot (scatter) graphs?
Python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
csv = pd.read_csv('/tmp/test.csv')
data = csv[['fee', 'time']]
x = data['fee']
y = data['time']
plt.scatter(x, y)
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
plt.plot(x,p(x),"r--")
plt.show()
Chart
With text:
from sklearn.metrics import r2_score
plt.plot(x,y,"+", ms=10, mec="k")
z = np.polyfit(x, y, 1)
y_hat = np.poly1d(z)(x)
plt.plot(x, y_hat, "r--", lw=1)
text = f"$y={z[0]:0.3f}\;x{z[1]:+0.3f}$\n$R^2 = {r2_score(y,y_hat):0.3f}$"
plt.gca().text(0.05, 0.95, text,transform=plt.gca().transAxes,
fontsize=14, verticalalignment='top')
You also can use Seaborn lmplot:
import seaborn as sns
import pandas as pd
from io import StringIO
textfile = StringIO("""fee,time
100,650
90,700
80,860
70,800
60,1000
50,1200""")
df = pd.read_csv(textfile)
_ = sns.lmplot(x='fee', y='time', data=df, ci=None)
Output:

name "plot" is not defined

I'm trying to plot some data using matplotlib with the code below.
import matplotlib.pyplot as plt
import numpy as np
data_x = np.linspace(0, 10, 100)
data_y = 10 * np.exp(-data_x)
np.savetxt('tabelle1.txt', np.column_stack([data_x, data_y]), header='U I')
x, y = np.genfromtxt('tabelle1', unpack=True)
plt.plot(x, y, 'rx')
plt.xlabel(r'$x$')
plt.ylabel(r'$y$')
plt.yscale('log')
plt.tight_layout()
plt.savefig('loesung.pdf')
However, this generates an error saying NameError: name plot is not defined.
How can I fix this?
please try
#Add this script
import matplotlib
#Before
import matplotlib.pyplot as plt