Circular dot on matplotlib barh graph - matplotlib

import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'y':['a','b','c','d','e','f','g','h','i']\
,'x':[10,9,9,8,7,6,10,6,7]})
df.sort_values(by='x',inplace=True,ascending = True)
plt.barh(bottom=list(range(1,10)), width=df.x, height = 0.15, align='center',color = 'blue')
plt.xlim([0,11])
plt.yticks(list(range(1,10)),skills.y)
plt.show()
This code gives me a horizontal bar graph.
I want to add a circular dot at the edge of each bars.
Can someone please help me with that.
Tableau graph
I did this in tableau, I want to replicate the same in python.
Also, please let me know if there a better way of coding the same.
I am using Anaconda Python 3.5, Matplotlib library, Windows 10, Idlex IDE

You could just add a scatterplot on top of your bars, using matplotlib scatter function.
Also, note that you could use the numpy.arange function to generate your x values, instead of your current list(range(1,10)).
See example below
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.DataFrame({'y':['a','b','c','d','e','f','g','h','i'],
'x':[10,9,9,8,7,6,10,6,7]})
df.sort_values(by='x',inplace=True,ascending = True)
plt.barh(bottom=np.arange(len(df)), width=df.x, height = 0.15, align='center',color = 'blue')
plt.scatter(df.x.values, y=np.arange(df.shape[0]), color='b', s=40)
plt.xlim([0,11])
plt.yticks(np.arange(len(df)),df.y)
plt.show()

Related

Seaborn hue not showing every values [duplicate]

I am trying to plot some data with the following code
from sklearn.datasets import make_blobs
import seaborn as sns
import numpy as np
X, y = make_blobs(n_samples=1000, n_features=2, centers=10, cluster_std=1.0, center_box=(-10.0, 10.0), shuffle=True, random_state=None)
palette = np.array(sns.color_palette("bright", 10)) #Chossing color
sns.scatterplot(X[:,0],X[:,1],legend='full',c=palette[y])
The color is beautiful, but the legend is missing.
When I check the documentation, I see:
How to draw the legend. If “brief”, numeric hue and size variables
....
So it seems I also need to include the hue argument.
But when I try the hue argument with the following codes, the following graph is created instead...
sns.scatterplot(X[:,0],X[:,1],legend='full',hue=y,c=palette[y])
The legend is showing, but the color is not what I want. After adding the hue argument, it seems it overwrite the palette argument. No matter what palette I choose, the color is still ugly as hell...
My question is:
How to show legend while maintaining the color that I want?
You would need to use the palette kwarg, and specify the hues with your y values.
from sklearn.datasets import make_blobs
import seaborn as sns
import matplotlib.pyplot as plt
X, y = make_blobs(n_samples=1000, n_features=2, centers=10, cluster_std=1.0,
center_box=(-10.0, 10.0), shuffle=True, random_state=None)
palette = sns.color_palette("bright", 10) #Choosing color
sns.scatterplot(X[:, 0], X[:, 1], palette=palette, hue=y, legend='full')
plt.show()

Data Points not fully shown Seaborn

import matplotlib.pyplot as plt
import seaborn as sb
from matplotlib import style
style.use("dark_background")
tips = sb.load_dataset("tips")
plt.figure(figsize=(8,4))
sb.set_context('paper', font_scale=1.2)
sb.lmplot(x="total_bill", y="tip", hue="sex", data=tips,markers=['o', '^'],
scatter_kws={'s':50, "linewidth" : 0.2, "edgecolor" : 'w'}, aspect=1.5)
I am using dataset tips(seaborn dataset). When am plotting the graph some data points are not fully shown like one at (0,1) the triangle data point is not fully shown and other at (51,10) the circle data point is half visible.
Graph : Graph Image
Where am I doing wrong?
In addition to the correct answer by #r-beginners, you can pass clip_on=False to the plotting function to prevent artists to being clipped at the edge of the axes.
sb.lmplot(x="total_bill", y="tip", hue="sex", data=tips,markers=['o', '^'],
scatter_kws={'s':50, "linewidth" : 0.2, "edgecolor" : 'w', 'clip_on':False}, aspect=1.5)
You can use ax.set_xlim() in such a case. You can use ax.set_xlim() to expand the area over which the image is displayed. Also, seaborn gives priority to height and aspect ratio over figsize.
import matplotlib.pyplot as plt
import seaborn as sb
from matplotlib import style
style.use("dark_background")
tips = sb.load_dataset("tips")
# plt.figure(figsize=(8,4))
sb.set_context('paper', font_scale=1.2)
sb.lmplot(x="total_bill", y="tip", hue="sex", data=tips,markers=['o', '^'],
scatter_kws={'s':50, "linewidth" : 0.2, "edgecolor" : 'w'}, aspect=2, legend_out=False, height=4)
ax = plt.gca()
ax.set_xlim((0,55))

Control gridline spacing in seaborn

I'd like to change the spacing of the horizontal grid lines on a seaborn chart, I've tried setting the style with no luck:
seaborn.set_style("whitegrid", {
"ytick.major.size": 0.1,
"ytick.minor.size": 0.05,
'grid.linestyle': '--'
})
bar(range(len(data)),data,alpha=0.5)
plot(avg_line)
The gridlines are set automatically desipite me trying to overide the tick size
Any suggestions? Thanks!
you can set the tick locations explicitly later, and it will draw the grid at those locations.
The neatest way to do this is to use a MultpleLocator from the matplotlib.ticker module.
For example:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
sns.set_style("whitegrid", {'grid.linestyle': '--'})
fig,ax = plt.subplots()
ax.bar(np.arange(0,50,1),np.random.rand(50)*0.016-0.004,alpha=0.5)
ax.yaxis.set_major_locator(ticker.MultipleLocator(0.005))
plt.show()
The OP asked about modifying tick distances in Seaborn.
If you are working in Seaborn and you use a plotting feature that returns an Axes object, then you can work with that just like any other Axes object in matplotlib. For example:
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from matplotlib.ticker import MultipleLocator
df = sm.datasets.get_rdataset("Guerry", "HistData").data
ax = sns.scatterplot('Literacy', 'Lottery', data=df)
ax.yaxis.set_major_locator(MultipleLocator(10))
ax.xaxis.set_major_locator(MultipleLocator(10))
plt.show()
Put if you are working with one of the Seaborn processes that involve FacetGrid objects, you will see precious little help on how to modify the tick marks without manually setting them. You have dig out the Axes object from the numpy array inside FacetGrid.axes .
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import MultipleLocator
tips = sns.load_dataset("tips")
g = sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips, )
g.axes[0][0].yaxis.set_major_locator(MultipleLocator(3))
Note the double subscript required. g is a FacetGrid object, which holds a two-dimensional numpy array of dtype=object, whose entries are matplotlib AxesSubplot objects.
If you are working with a FacetGrid that has multiple axes, then each one will have to be extracted and modified.

My pandas-generated subplots are layouted incorrectly

I ran the following code to get two plots next to each other (it is a minimal working example that you can copy):
import pandas as pd
import numpy as np
from matplotlib.pylab import plt
comp1 = np.random.normal(0,1,size=200)
values = pd.Series(comp1)
plt.close("all")
f = plt.figure()
plt.show()
sp1 = f.add_subplot(2,2,1)
values.hist(bins=100, alpha=0.5, color="r", normed=True)
sp2 = f.add_subplot(2,2,2)
values.plot(kind="kde")
Unfortunately, I then get the following image:
This is also an interesting layout, but I wanted the figures to be next to each other. What did I do wrong? How can I correct it?
For clarity, I could also use this:
import pandas as pd
import numpy as np
from matplotlib.pylab import plt
comp1 = np.random.normal(0,1,size=200)
values = pd.Series(comp1)
plt.close("all")
fig, axes = plt.subplots(2,2)
plt.show()
axes[0,0].hist(values, bins=100, alpha=0.5, color="r", normed=True) # Until here, it works. You get a half-finished correct image of what I was going for (though it is 2x2 here)
axes[0,1].plot(values, kind="kde") # This does not work
Unfortunately, in this approach axes[0,1] refers to the subplot that has a plot method but does not know kind="kde". Please take into consideration that the in the first version plot is executed on the pandas object, whereas in the second version plot is executed on the subplot, which does not work with the kind="kde" parameter.
use ax= argument to set which subplot object to plot:
import pandas as pd
import numpy as np
from matplotlib.pylab import plt
comp1 = np.random.normal(0,1,size=200)
values = pd.Series(comp1)
plt.close("all")
f = plt.figure()
sp1 = f.add_subplot(2,2,1)
values.hist(bins=100, alpha=0.5, color="r", normed=True, ax=sp1)
sp2 = f.add_subplot(2,2,2)
values.plot(kind="kde", ax=sp2)

Matplotlib Color Palette

Is it possible to change what colors Matplotlib cycles through when it is generating its own colors for a graph's lines? I'm using the pylab module.
from pylab import *
import matplotlib.cm as cm
x=[1,2,3,4]
y=[5,6,7,8]
fig1 = Figure()
plot1 = fig1.add_subplot(311)
plot1.plot(x,y)
plot2 = fig1.add_subplot(312)
plot2.plot(x,y)
plot3 = fig1.add_subplot(313)
plot3.plot(x,y)
Yes, of course. Since it accept many kinds of color definition. It's easy to define your own color map. Here I just get colors from the colormap hot
import pylab as py
import numpy as np
import matplotlib.cm as cm
a = np.arange(0,10)
py.figure()
for i in np.arange(10):
c = cm.hot(i/10.,1)
py.plot(a,i*a,color=c)
py.show()
The colors are extracted from color maps. You can use one of the predefined colormaps, or define your own.
Unfortunately there is no way to use multiple colormaps per figure, you have to do it manually:
import pylab as pl
import matplotlib.cm as cm
xval = pl.arange(0, 20, 0.2)
pl.subplot(311)
pl.plot(xval, pl.sin(xval), c=cm.summer(0))
pl.subplot(312)
pl.plot(xval, pl.cos(xval), c=cm.spring(0))
pl.subplot(313)
pl.plot(xval, pl.arctan(xval), xval, pl.fabs(xval))
pl.show()