pandas dataframe bar plot put space between bars - pandas

So I want my image look like this
But now my image look like this
How do I reduce the space between bars without making the bar width into 1?
Here is my code:
plot=repeat.loc['mean'].plot(kind='bar',rot=0,alpha=1,cmap='Reds',
yerr=repeat.loc['std'],error_kw=dict(elinewitdh=0.02,ecolor='grey'),
align='center',width=0.2,grid=None)
plt.ylabel('')
plt.grid(False)
plt.title(cell,ha='center')
plt.xticks([])
plt.yticks([])
plt.ylim(0,120)
plt.tight_layout()`

make the plot from scratch if the toplevel functions from pandas or seaborn do not give you the desired result! :)
import seaborn.apionly as sns
import scipy as sp
import matplotlib.pyplot as plt
# some fake data
data = sp.randn(10,10) + 1
data = data[sp.argsort(sp.average(data,axis=1))[::-1],:]
avg = sp.average(data,axis=1)
std = sp.std(data,axis=1)
# a practical helper from seaborn to quickly generate the colors
colors = sns.color_palette('Reds',n_colors = data.shape[0])
fig, ax = plt.subplots()
pos = range(10)
ax.bar(pos,avg,width=1)
for col,patch in zip(colors,ax.patches):
patch.set_facecolor(col)
patch.set_edgecolor('k')
for i,p in enumerate(pos):
ax.plot([p,p],[avg[i],avg[i]+std[i]],color='k',lw=2, zorder=-1)

Related

Equivalent of Hist()'s Layout hyperparameter in Sns.Pairplot?

Am trying to find hist()'s figsize and layout parameter for sns.pairplot().
I have a pairplot that gives me nice scatterplots between the X's and y. However, it is oriented horizontally and there is no equivalent layout parameter to make them vertical to my knowledge. 4 plots per row would be great.
This is my current sns.pairplot():
sns.pairplot(X_train,
x_vars = X_train.select_dtypes(exclude=['object']).columns,
y_vars = ["SalePrice"])
This is what I would like it to look like: Source
num_mask = train_df.dtypes != object
num_cols = train_df.loc[:, num_mask[num_mask == True].keys()]
num_cols.hist(figsize = (30,15), layout = (4,10))
plt.show()
What you want to achieve isn't currently supported by sns.pairplot, but you can use one of the other figure-level functions (sns.displot, sns.catplot, ...). sns.lmplot creates a grid of scatter plots. For this to work, the dataframe needs to be in "long form".
Here is a simple example. sns.lmplot has parameters to leave out the regression line (fit_reg=False), to set the height of the individual subplots (height=...), to set its aspect ratio (aspect=..., where the subplot width will be height times aspect ratio), and many more. If all y ranges are similar, you can use the default sharey=True.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# create some test data with different y-ranges
np.random.seed(20230209)
X_train = pd.DataFrame({"".join(np.random.choice([*'uvwxyz'], np.random.randint(3, 8))):
np.random.randn(100).cumsum() + np.random.randint(100, 1000) for _ in range(10)})
X_train['SalePrice'] = np.random.randint(10000, 100000, 100)
# convert the dataframe to long form
# 'SalePrice' will get excluded automatically via `melt`
compare_columns = X_train.select_dtypes(exclude=['object']).columns
long_df = X_train.melt(id_vars='SalePrice', value_vars=compare_columns)
# create a grid of scatter plots
g = sns.lmplot(data=long_df, x='SalePrice', y='value', col='variable', col_wrap=4, sharey=False)
g.set(ylabel='')
plt.show()
Here is another example, with histograms of the mpg dataset:
import matplotlib.pyplot as plt
import seaborn as sns
mpg = sns.load_dataset('mpg')
compare_columns = mpg.select_dtypes(exclude=['object']).columns
mpg_long = mpg.melt(value_vars=compare_columns)
g = sns.displot(data=mpg_long, kde=True, x='value', common_bins=False, col='variable', col_wrap=4, color='crimson',
facet_kws={'sharex': False, 'sharey': False})
g.set(xlabel='')
plt.show()

Barplot per each ax in matplotlib

I have the following dataset, ratings in stars for two fictitious places:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'id':['A','A','A','A','A','A','A','B','B','B','B','B','B'],
'rating':[1,2,4,5,5,5,3,1,3,3,3,5,2]})
Since the rating is a category (is not a continuous data) I convert it to a category:
df['rating_cat'] = pd.Categorical(df['rating'])
What I want is to create a bar plot per each fictitious place ('A or B'), and the count per each rating. This is the intended plot:
I guess using a for per each value in id could work, but I have some trouble to decide the size:
fig, ax = plt.subplots(1,2,figsize=(6,6))
axs = ax.flatten()
cats = df['rating_cat'].cat.categories.tolist()
ids_uniques = df.id.unique()
for i in range(len(ids_uniques)):
ax[i].bar(df[df['id']==ids_uniques[i]], df['rating'].size())
But it returns me an error TypeError: 'int' object is not callable
Perhaps it's something complicated what I am doing, please, could you guide me with this code
The pure matplotlib way:
from math import ceil
# Prepare the data for plotting
df_plot = df.groupby(["id", "rating"]).size()
unique_ids = df_plot.index.get_level_values("id").unique()
# Calculate the grid spec. This will be a n x 2 grid
# to fit one chart by id
ncols = 2
nrows = ceil(len(unique_ids) / ncols)
fig = plt.figure(figsize=(6,6))
for i, id_ in enumerate(unique_ids):
# In a figure grid spanning nrows x ncols, plot into the
# axes at position i + 1
ax = fig.add_subplot(nrows, ncols, i+1)
df_plot.xs(id_).plot(axes=ax, kind="bar")
You can simplify things a lot with Seaborn:
import seaborn as sns
sns.catplot(data=df, x="rating", col="id", col_wrap=2, kind="count")
If you're ok with installing a new library, seaborn has a very helpful countplot. Seaborn uses matplotlib under the hood and makes certain plots easier.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'id':['A','A','A','A','A','A','A','B','B','B','B','B','B'],
'rating':[1,2,4,5,5,5,3,1,3,3,3,5,2]})
sns.countplot(
data = df,
x = 'rating',
hue = 'id',
)
plt.show()
plt.close()

Plot multiple mplfinance plots sharing x axis

I am trying to plot 5 charts one under the other with mplfinance.
This works:
for coin in coins:
mpf.plot(df_coins[coin], title=coin, type='line', volume=True, show_nontrading=True)
However each plot is a separate image in my Python Notebook cell output. And the x-axis labelling is repeated for each image.
I try to make a single figure containing multiple subplot/axis, and plot one chart into each axis:
from matplotlib import pyplot as plt
N = len(df_coins)
fig, axes = plt.subplots(N, figsize=(20, 5*N), sharex=True)
for i, ((coin, df), ax) in zip(enumerate(df_coins.items()), axes):
mpf.plot(df, ax=ax, title=coin, type='line', volume=True, show_nontrading=True)
This displays subfigures of the correct dimensions, however they are not getting populated with data. Axes are labelled from 0.0 to 1.0 and the title is not appearing.
What am I missing?
There are two ways to subplot. One is to set up a figure with mplfinance objects. The other way is to use your adopted matplotlib subplot to place it.
yfinace data
import matplotlib.pyplot as plt
import mplfinance as mpf
import yfinance as yf
tickers = ['AAPL','GOOG','TSLA']
data = yf.download(tickers, start="2021-01-01", end="2021-03-01", group_by='ticker')
aapl = data[('AAPL',)]
goog = data[('GOOG',)]
tsla = data[('TSLA',)]
mplfinance
fig = mpf.figure(style='yahoo', figsize=(12,9))
#fig.subplots_adjust(hspace=0.3)
ax1 = fig.add_subplot(3,1,1, sharex=ax3)
ax2 = fig.add_subplot(3,1,2, sharex=ax3)
ax3 = fig.add_subplot(3,1,3)
mpf.plot(aapl, type='line', ax=ax1, axtitle='AAPL', xrotation=0)
mpf.plot(goog, type='line', ax=ax2, axtitle='GOOG', xrotation=0)
mpf.plot(tsla, type='line', ax=ax3, axtitle='TSLA', xrotation=0)
ax1.set_xticklabels([])
ax2.set_xticklabels([])
matplotlib
N = len(tickers)
fig, axes = plt.subplots(N, figsize=(20, 5*N), sharex=True)
for df,t,ax in zip([aapl,goog,tsla], tickers, axes):
mpf.plot(df, ax=ax, axtitle=t, type='line', show_nontrading=True)# volume=True
In addition to the techniques mentioned by #r-beginners there is another technique that may work for you in the case where all plots share the same x-axis. That is to use mpf.make_addplot().
aps = []
for coin in coins[1:]:
aps.append(mpf.make_addplot(df_coins[coin]['Close'], title=coin, type='line'))
coin = coins[0]
mpf.plot(df_coins[coin],axtitle=coin,type='line',volume=True,show_nontrading=True,addplot=aps)
If you choose to do type='candle' instead of 'line', then change
df_coins[coin]['Close']
to simply
df_coins[coin]

how to set the distance between bars and axis using matplot lib [duplicate]

So currently learning how to import data and work with it in matplotlib and I am having trouble even tho I have the exact code from the book.
This is what the plot looks like, but my question is how can I get it where there is no white space between the start and the end of the x-axis.
Here is the code:
import csv
from matplotlib import pyplot as plt
from datetime import datetime
# Get dates and high temperatures from file.
filename = 'sitka_weather_07-2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
#for index, column_header in enumerate(header_row):
#print(index, column_header)
dates, highs = [], []
for row in reader:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
dates.append(current_date)
high = int(row[1])
highs.append(high)
# Plot data.
fig = plt.figure(dpi=128, figsize=(10,6))
plt.plot(dates, highs, c='red')
# Format plot.
plt.title("Daily high temperatures, July 2014", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature (F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()
There is an automatic margin set at the edges, which ensures the data to be nicely fitting within the axis spines. In this case such a margin is probably desired on the y axis. By default it is set to 0.05 in units of axis span.
To set the margin to 0 on the x axis, use
plt.margins(x=0)
or
ax.margins(x=0)
depending on the context. Also see the documentation.
In case you want to get rid of the margin in the whole script, you can use
plt.rcParams['axes.xmargin'] = 0
at the beginning of your script (same for y of course). If you want to get rid of the margin entirely and forever, you might want to change the according line in the matplotlib rc file:
axes.xmargin : 0
axes.ymargin : 0
Example
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
tips.plot(ax=ax1, title='Default Margin')
tips.plot(ax=ax2, title='Margins: x=0')
ax2.margins(x=0)
Alternatively, use plt.xlim(..) or ax.set_xlim(..) to manually set the limits of the axes such that there is no white space left.
If you only want to remove the margin on one side but not the other, e.g. remove the margin from the right but not from the left, you can use set_xlim() on a matplotlib axes object.
import seaborn as sns
import matplotlib.pyplot as plt
import math
max_x_value = 100
x_values = [i for i in range (1, max_x_value + 1)]
y_values = [math.log(i) for i in x_values]
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
sn.lineplot(ax=ax1, x=x_values, y=y_values)
sn.lineplot(ax=ax2, x=x_values, y=y_values)
ax2.set_xlim(-5, max_x_value) # tune the -5 to your needs

how to customize color legend when using for loop in matplotlib, scatter

I want to draw a 3D scatter, in which the data is colored by group. Here is the data sample:
aa=pd.DataFrame({'a':[1,2,3,4,5],
'b':[2,3,4,5,6],
'c':[1,3,4,6,9],
'd':[0,0,1,2,3],
'e':['abc','sdf','ert','hgf','nhkm']})
Here, a, b, c are axis x, y, z. e is the text shown in the scatter. I need d to group the data and show different colors.
Here is my code:
fig = plt.figure()
ax = fig.gca(projection='3d')
zdirs = aa.loc[:,'e'].__array__()
xs = aa.loc[:,'a'].__array__()
ys = aa.loc[:,'b'].__array__()
zs = aa.loc[:,'c'].__array__()
colors = aa.loc[:,'d'].__array__()
colors1=np.where(colors==0,'grey',
np.where(colors==1,'yellow',
np.where(colors==2,'green',
np.where(colors==3,'pink','red'))))
for i in range(len(zdirs)): #plot each point + it's index as text above
ax.scatter(xs[i],ys[i],zs[i],color=colors1[i])
ax.text(xs[i],ys[i],zs[i], '%s' % (str(zdirs[i])), size=10, zorder=1, color='k')
ax.set_xlabel('a')
ax.set_ylabel('b')
ax.set_zlabel('c')
plt.show()
But I do not know how to put a legend on the plot. I hope my legend is like:
The colors and the numbers should match and be ordered.
Could anyone help me with how to customize the color bar?
First of all, I've taken the liberty to reduce your code a bit:
I'd suggest to create a ListedColormap to map integer->color, which allows you to pass the color column via c=aa['d'] (note it's c=, not color=!)
you don't need to use __array__() here, in the code below you can directly use aa['a']
finally, you can add an empty scatter plot for each color in the ListedColormap, and this can then be rendered correctly by ax.legend()
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from matplotlib.colors import ListedColormap
import matplotlib.patches as mpatches
aa=pd.DataFrame({'a':[1,2,3,4,5],
'b':[2,3,4,5,6],
'c':[1,3,4,6,9],
'd':[0,0,1,2,3],
'e':['abc','sdf','ert','hgf','nhkm']})
fig = plt.figure()
ax = fig.gca(projection='3d')
cmap = ListedColormap(['grey', 'yellow', 'green', 'pink','red'])
ax.scatter(aa['a'],aa['b'],aa['c'],c=aa['d'],cmap=cmap)
for x,y,z,label in zip(aa['a'],aa['b'],aa['c'],aa['e']):
ax.text(x,y,z,label,size=10,zorder=1)
# Create a legend through an *empty* scatter plot
[ax.scatter([], [], c=cmap(i), label=str(i)) for i in range(len(aa))]
ax.legend()
ax.set_xlabel('a')
ax.set_ylabel('b')
ax.set_zlabel('c')
plt.show()