last tick label missing after change ticks frequency - matplotlib

I would like to change x ticks frequecy to every 5, but the last tick missing (20 in this case)!
#!/usr/bin/env python3
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
r = np.random.RandomState(10)
df = pd.DataFrame({
"x": np.linspace(0, 20, 10),
"y1": r.uniform(1, 10, 10),
"y2": r.uniform(5, 15, 10),
})
fig, ax = plt.subplots(figsize=(8, 4))
df.plot(x='x',ax=ax)
ax.set_xticks(np.arange(min(df['x']),max(df['x']),5))
plt.legend()
plt.show()
Output:

Related

Whiskers instead of bars in matplotlib

For a given bar plot, like
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(10)
y_bot = np.linspace(30, 50, 10)
y_dif = np.linspace(10, 5, 10)
plt.bar(x, y_dif, bottom=y_bot)
I would like to have whiskers (like in a boxplot), instead of bars:
How can I edit the bars to appear as whiskers?
You can use plt.errorbar() as follows:
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(10)
y_bot = np.linspace(30, 50, 10)
y_dif = np.linspace(10, 5, 10)
plt.bar(x, y_dif, bottom=y_bot, color='skyblue')
plt.errorbar(x, y_bot, yerr=(np.zeros_like(y_bot), y_dif), capsize=10, ecolor='black', ls='', lw=5, capthick=5)
plt.gca().use_sticky_edges = False # remove stickyness due to plt.bar()
plt.xticks(x)
plt.tight_layout()
plt.show()

who to plot stats.probplot in a grid?

I have a data frame with four columns I would like to plot the normality test for each column in a 2*2 grid, but it only plot one figure, and the else is empty.
import random
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
fig, axs = plt.subplots(2,2, figsize=(15, 6), facecolor='w', edgecolor='k')
fig.subplots_adjust(hspace = .5, wspace=.001)
data = {'col1': [random.randrange(1, 50, 1) for i in range(1000)], 'col2': [random.randrange(1, 50, 1) for i in range(1000)],'col3':[random.randrange(1, 50, 1) for i in range(1000)]
,'col4':[random.randrange(1, 50, 1) for i in range(1000)]}
df = pd.DataFrame(data)
for ax, d in zip(axs.ravel(), df):
ax=stats.probplot(df[d], plot=plt)
#ax.set_title(str(d))
plt.show()
is there a way to construct the subplot and the stats.probplot within a loop?
In your code, you need to change the for loop to this:
for ax, d in zip(axs.ravel(), df):
stats.probplot(df[d], plot=ax)
#ax.set_titl(str(d))
plt.show()
I hope this will help you move on.

Matplotlib--scatter plot with half filled markers

Question: Using a scatter plot in matplotlib, is there a simple way get a half-filled marker?
I know half-filled markers can easily be done using a line plot, but I would like to use 'scatter' because I want to use marker size and color (i.e., alternate marker face color) to represent other data. (I believe this will be easier with a scatter plot since I want to automate making a large number of plots from a large data set.)
I can't seem to make half-filled markers properly using a scatter plot. That is to say, instead of a half-filled marker, the plot shows half of a marker. I've been using matplotlib.markers.MarkerStyle, but that seems to only get me halfway there. I'm able to get following output using the code below.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.markers import MarkerStyle
plt.scatter(1, 1, marker=MarkerStyle('o', fillstyle='full'), edgecolors='k', s=500)
plt.scatter(2, 2, marker=MarkerStyle('o', fillstyle='left'), edgecolors='k', s=500)
plt.scatter(3, 3, marker=MarkerStyle('o', fillstyle='right'), edgecolors='k', s=500)
plt.scatter(4, 4, marker=MarkerStyle('o', fillstyle='top'), edgecolors='k', s=500)
plt.scatter(5, 5, marker=MarkerStyle('o', fillstyle='bottom'), edgecolors='k', s=500)
plt.show()
As mentioned in the comments, I don't see why you have to use plt.scatter but if you want to, you can fake a combined marker:
from matplotlib.markers import MarkerStyle
from matplotlib import pyplot as plt
#data generation
import pandas as pd
import numpy as np
np.random.seed(123)
n = 10
df = pd.DataFrame({"X": np.random.randint(1, 20, n),
"Y": np.random.randint(10, 30, n),
"S": np.random.randint(50, 500, n),
"C1": np.random.choice(["red", "blue", "green"], n),
"C2": np.random.choice(["yellow", "grey"], n)})
fig, ax = plt.subplots()
ax.scatter(df.X, df.Y, s=df.S, c=df.C1, edgecolor="black", marker=MarkerStyle("o", fillstyle="right"))
ax.scatter(df.X, df.Y, s=df.S, c=df.C2, edgecolor="black", marker=MarkerStyle("o", fillstyle="left"))
plt.show()
Sample output:
This works, of course, also for continuous data:
from matplotlib import pyplot as plt
from matplotlib.markers import MarkerStyle
import pandas as pd
import numpy as np
np.random.seed(123)
n = 10
df = pd.DataFrame({"X": np.random.randint(1, 20, n),
"Y": np.random.randint(10, 30, n),
"S": np.random.randint(100, 1000, n),
"C1": np.random.randint(1, 100, n),
"C2": np.random.random(n)})
fig, ax = plt.subplots(figsize=(10,8))
im1 = ax.scatter(df.X, df.Y, s=df.S, c=df.C1, edgecolor="black", marker=MarkerStyle("o", fillstyle="right"), cmap="autumn")
im2 = ax.scatter(df.X, df.Y, s=df.S, c=df.C2, edgecolor="black", marker=MarkerStyle("o", fillstyle="left"), cmap="winter")
cbar1 = plt.colorbar(im1, ax=ax)
cbar1.set_label("right half", rotation=90)
cbar2 = plt.colorbar(im2, ax=ax)
cbar2.set_label("left half", rotation=90)
plt.show()
Sample output:
But be reminded that plt.plot with marker definitions might be faster for large-scale datasets: The plot function will be faster for scatterplots where markers don't vary in size or color.

Coloring minimum bars in seaborn FacetGrid barplot

Any easy way to automatically color (or mark in any way) the minimum/maximum bars for each plot of a FacetGrid?
For example, how to mark the minimal Z value on each one of the following 16 plots?
df = pd.DataFrame({'A':[10, 20, 30, 40]*4, 'Y':[1,2,3,4]*4, 'W':range(16), 'Z':range(16)})
g = sns.FacetGrid(df, row="A", col="Y", sharey=False)
g.map(sns.barplot, "W", "Z")
plt.show()
The following approach loops through the diagonal axes, for each ax searches the minimum height of the bars and then colors those:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df = pd.DataFrame({'A': [10, 20, 30, 40] * 4, 'Y': [1, 2, 3, 4] * 4, 'W': range(16), 'Z': range(16)})
g = sns.FacetGrid(df, row="A", col="Y", sharey=False)
g.map(sns.barplot, "W", "Z")
for i in range(len(g.axes)):
ax = g.axes[i, i]
min_height = min([p.get_height() for p in ax.patches])
for p in ax.patches:
if p.get_height() == min_height:
p.set_color('red')
plt.tight_layout()
plt.show()

pandas scatter plot and groupby does not work

I am trying to do a scatter plot with pandas. Unfortunately kind='scatter' doesn't work. If I change this to kind='line' it works as expected. What can I do to fix this?
for label, d in df.groupby('m'):
d[['te','n']].sort_values(by='n', ascending=False).plot(kind="scatter", x='n', y='te', ax=ax, label='m = '+str(label))```
Use plot.scatter instead:
df = pd.DataFrame({'x': [0, 5, 7,3, 2, 4, 6], 'y': [0, 5, 7,3, 2, 4, 6]})
df.plot.scatter('x', 'y')
Use this snippet if you want individual labels and colours:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({
'm': np.random.randint(0, 5, size=100),
'x': np.random.uniform(size=100),
'y': np.random.uniform(size=100),
})
fig, ax = plt.subplots()
for label, d in df.groupby('m'):
# generate a random color:
color = list(np.random.uniform(size=3))
d.plot.scatter('x', 'y', label=f'group {label}', ax=ax, c=[color])