Plotting class with different marker python - numpy

I have data sets columns of features x1 and x2 and class y which has value either 0 or 1. I want to plot x1 and x2 in scatter plot such that values y == 1 will appear as "+" and values y == 0 will appear as "o".
x1 = np.array(100)
x2 = np.array(100)
#y = array of length 100 either with value 1 or 0
plt.scatter(x1, x2, y=1, marker='+')
plt.scatter(x1, x2, y=0, marker='o')
plt.show()
Any suggestions?

You can just index your x1 and x2 arrays using the condition of y==0 or y==1:
plt.scatter(x1[y==1], x2[y==1], marker='+')
plt.scatter(x1[y==0], x2[y==0], marker='o')

Use np.where to get the indices of where the y-array is 0 or 1, and then plot them accordingly. Below is an example
import matplotlib.pyplot as plt
import numpy as np
plt.close('all')
x = np.arange(100)
y = np.random.randint(0, 2, 100)
arg_0 = np.where(y == 0)
arg_1 = np.where(y == 1)
fig, ax = plt.subplots()
ax.scatter(x[arg_0], y[arg_0], marker='o')
ax.scatter(x[arg_1], y[arg_1], marker='+')
ax.set_ylim(-0.1, 1.1)
fig.show()

Related

Showing Matplotlib pie chart only top 3 item's percentage [duplicate]

I have the following code:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(123456)
import pandas as pd
df = pd.DataFrame(3 * np.random.rand(4, 4), index=['a', 'b', 'c', 'd'],
columns=['x', 'y','z','w'])
plt.style.use('ggplot')
colors = plt.rcParams['axes.color_cycle']
fig, axes = plt.subplots(nrows=2, ncols=3)
for ax in axes.flat:
ax.axis('off')
for ax, col in zip(axes.flat, df.columns):
ax.pie(df[col], labels=df.index, autopct='%.2f', colors=colors)
ax.set(ylabel='', title=col, aspect='equal')
axes[0, 0].legend(bbox_to_anchor=(0, 0.5))
fig.savefig('your_file.png') # Or whichever format you'd like
plt.show()
Which produce the following:
My question is, how can I remove the label based on a condition. For example I'd only want to display labels with percent > 20%. Such that the labels and value of a,c,d won't be displayed in X, etc.
The autopct argument from pie can be a callable, which will receive the current percentage. So you only would need to provide a function that returns an empty string for the values you want to omit the percentage.
Function
def my_autopct(pct):
return ('%.2f' % pct) if pct > 20 else ''
Plot with matplotlib.axes.Axes.pie
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 6))
for ax, col in zip(axes.flat, df.columns):
ax.pie(df[col], labels=df.index, autopct=my_autopct)
ax.set(ylabel='', title=col, aspect='equal')
fig.tight_layout()
Plot directly with the dataframe
axes = df.plot(kind='pie', autopct=my_autopct, figsize=(8, 6), subplots=True, layout=(2, 2), legend=False)
for ax in axes.flat:
yl = ax.get_ylabel()
ax.set(ylabel='', title=yl)
fig = axes[0, 0].get_figure()
fig.tight_layout()
If you need to parametrize the value on the autopct argument, you'll need a function that returns a function, like:
def autopct_generator(limit):
def inner_autopct(pct):
return ('%.2f' % pct) if pct > limit else ''
return inner_autopct
ax.pie(df[col], labels=df.index, autopct=autopct_generator(20), colors=colors)
For the labels, the best thing I can come up with is using list comprehension:
for ax, col in zip(axes.flat, df.columns):
data = df[col]
labels = [n if v > data.sum() * 0.2 else ''
for n, v in zip(df.index, data)]
ax.pie(data, autopct=my_autopct, colors=colors, labels=labels)
Note, however, that the legend by default is being generated from the first passed labels, so you'll need to pass all values explicitly to keep it intact.
axes[0, 0].legend(df.index, bbox_to_anchor=(0, 0.5))
For labels I have used:
def my_level_list(data):
list = []
for i in range(len(data)):
if (data[i]*100/np.sum(data)) > 2 : #2%
list.append('Label '+str(i+1))
else:
list.append('')
return list
patches, texts, autotexts = plt.pie(data, radius = 1, labels=my_level_list(data), autopct=my_autopct, shadow=True)
You can make the labels function a little shorter using list comprehension:
def my_autopct(pct):
return ('%1.1f' % pct) if pct > 1 else ''
def get_new_labels(sizes, labels):
new_labels = [label if size > 1 else '' for size, label in zip(sizes, labels)]
return new_labels
fig, ax = plt.subplots()
_,_,_ = ax.pie(sizes, labels=get_new_labels(sizes, labels), colors=colors, autopct=my_autopct, startangle=90, rotatelabels=False)

How do I plot a contour from a table of values?

I have a table that has 2 features (x,y) - and a vector with the same length that contains their corresponding values (z).
I'm trying to use matplotlib to print this as a 2D plot but I am get an error:
TypeError: Input z must be at least a (2, 2) shaped array, but has shape (5797, 1)
Is there any way to solve this? (since I am trying to use 1d arrays instead of 2d arrays)
The relevant code:
x, y = train_features[:,0], train_features[:,1]
z = train_predictions.detach()
print(x.size())
print(y.size())
print(z.size())
plt.figure()
CS = plt.contour(x, y, z)
CS = plt.contourf(x, y, z)
plt.clabel(CS, fontsize=8, colors='black')
cbar = plt.colorbar(CS)
The prints that result from the prints commands:
torch.Size([5797])
torch.Size([5797])
torch.Size([5797, 1])
EDIT:
I tried to implement this with a second method:
import matplotlib.pyplot as plt
import matplotlib.tri as tri
import numpy as np
npts = 200
ngridx = 100
ngridy = 200
x = train_features[:,0]
y = train_features[:,1]
z = train_predictions.detach().squeeze()
fig, ax1 = plt.subplots()
# -----------------------
# Interpolation on a grid
# -----------------------
# A contour plot of irregularly spaced data coordinates
# via interpolation on a grid.
# Create grid values first.
xi = np.linspace(1, 10, ngridx)
yi = np.linspace(1, 10, ngridy)
# Perform linear interpolation of the data (x,y)
# on a grid defined by (xi,yi)
triang = tri.Triangulation(x, y)
interpolator = tri.LinearTriInterpolator(triang, z)
Xi, Yi = np.meshgrid(xi, yi)
zi = interpolator(Xi, Yi)
ax1.contour(xi, yi, zi, levels=100, linewidths=0.5, colors='k')
cntr1 = ax1.contourf(xi, yi, zi, levels=14, cmap="RdBu_r")
fig.colorbar(cntr1, ax=ax1)
ax1.plot(x, y, 'ko', ms=3)
ax1.set_title('grid and contour (%d points, %d grid points)' %
(npts, ngridx * ngridy))
But the resulting image was the following:
even though z's values are:
tensor([-0.2434, -0.2155, -0.1900, ..., 64.7516, 65.2064, 65.6612])

Refreshing plot in matplotlib

As part of displaying the progression of a linear regression model fit, I need to be able to update/refresh an xy plot. Below is a simple script for 3 sets of y data, which need to be shown sequentially. However, they are piled up on top of each other. When fig.canvas.flush_events() is substituted with fig.clear() or fig.clf() the result is a blank plot. What am I - as a newbie -missing?
import torch as tc
import matplotlib.pyplot as plt
tc.manual_seed(1)
X=tc.linspace(-3,3,30)
y0=X.pow(2)+0.5*tc.randn(X.shape[0])
y1=y0/1.3
y2=y0/1.6
y=[y0,y1,y2]
fig=plt.figure()
ax=fig.add_subplot()
ax.set_xlim(-3.3,3.3)
ax.set_ylim(-0.5,9.5)
for i in range(3):
y_new=y[i]
ax.plot(X,y_new,'db')
fig.canvas.draw()
fig.canvas.flush_events()
plt.pause(1)
fig.show()
In your loop, you are creating a new line every time you call ax.plot. The better way is to create a Line2D artist and to update the coordinates of the point in the loop:
(NB i've converted your example to using numpy instead of torch)
import matplotlib.pyplot as plt
import numpy as np
X = np.linspace(-3, 3, 30)
y0 = np.power(X, 2) + 0.5 * np.random.randn(X.shape[0])
y1 = y0 / 1.3
y2 = y0 / 1.6
y = [y0, y1, y2]
fig = plt.figure()
ax = fig.add_subplot()
l, = ax.plot(X, y0, 'db')
ax.set_xlim(-3.3, 3.3)
ax.set_ylim(-0.5, 9.5)
for i in range(3):
y_new = y[i]
l.set_ydata(y_new)
fig.canvas.draw()
plt.pause(1)
plt.show()
For this kind of things, you'd be better off using the FuncAnimation module provided by maptlotlib though:
import matplotlib.pyplot as plt
from matplotlib import animation
import numpy as np
X = np.linspace(-3, 3, 30)
y0 = np.power(X, 2) + 0.5 * np.random.randn(X.shape[0])
y1 = y0 / 1.3
y2 = y0 / 1.6
y = [y0, y1, y2]
fig = plt.figure()
ax = fig.add_subplot()
l, = ax.plot(X, y0, 'db')
ax.set_xlim(-3.3, 3.3)
ax.set_ylim(-0.5, 9.5)
def animate(y_new):
l.set_ydata(y_new)
return l,
ani = animation.FuncAnimation(fig, func=animate, frames=y, interval=1000)
fig.show()

ValueError: Contour levels must be increasing - how to plot 3 feature data

import numpy as np
from matplotlib import pyplot as plt
data = np.random.normal(0,1,[100,3])
x = data[:,0]
y = data[:,1]
z = data[:,2]
plt.contour([x,y],z)
When I run this code with dummy data I get:
ValueError: Contour levels must be increasing
Do you have any idea what would this mean and how I could fix it?
plt.contour is a bit particular about its input, the z values must be on values on a rectangular 2D grid, see for example:
import matplotlib.pyplot as plt
import numpy as np
x = np.expand_dims(np.arange(1,11,1), axis=1)
y = np.expand_dims(np.arange(2,21,2), axis=0)
z = y * x
print(x.shape)
print(y.shape)
print(z.shape)
plt.figure()
plt.contour(z)
plt.show()
You can also provide x and y values for plt.contour by using np.meshgrid :
XX,YY = np.meshgrid(x,y)
plt.figure()
plt.contour(XX, YY, z)
plt.show()
If you have z-values with irregular values for x and y, you might use plt.tricontour, see the following example:
from matplotlib.tri import Triangulation
data = np.random.normal(0,1,[100,3])
x = data[:,0]
y = data[:,1]
#z = data[:,2]
z = x * y
tri = Triangulation(x,y)
plt.figure()
plt.tricontour(tri, z, )
plt.scatter(x,y, c=z)
plt.show()
Edit: from JohanC's comment i learned that this can be simplified without importing matplotlib.tri by:
plt.figure()
plt.tricontour(x,y,z)
plt.scatter(x,y, c=z)
plt.show()

Plotting multiple set of data in pcolor plot python

I have data sets like (x,y,(z1,z2,z3..)). I am trying
plt.pcolor(x,y,z1)
plt.pcolor(x,y,z2)
plt.pcolor(x,y,z3)
plt.colorbar()
plt.show()
This is showing only the pcolor plot of the last data set. How can I plot all in same plot and same colorbar scale?
You could try with subplots, and make sure all the images with the same intensity scale (use the same vmin and vmax arguments of pcolor() for all your images). Below is an example:
import numpy as np
import matplotlib.pyplot as plt
dx, dy = 0.15, 0.05
y, x = np.mgrid[slice(-3, 3 + dy, dy),
slice(-3, 3 + dx, dx)]
z = (1 - x / 2. + x ** 5 + y ** 3) * np.exp(-x ** 2 - y ** 2)
z1 = z[:-1, :-1]
z2 = z[:-1, :-1]
z3 = z[:-1, :-1]
z_min, z_max = -np.abs(z).max(), np.abs(z).max()
data = [[x,y,z1],[x,y,z2],[x,y,z3]]
# Plot each slice as an independent subplot
fig, axes = plt.subplots(nrows=1, ncols=3)
for dat, ax in zip(data, axes.flat):
# The vmin and vmax arguments specify the color limits
pc = ax.pcolor(dat[0],dat[1],dat[2], vmin=z_min, vmax=z_max)
# Make an axis for the colorbar on the right side
cax = fig.add_axes([0.9, 0.1, 0.03, 0.8])
fig.colorbar(pc, cax=cax)
plt.show()
It will show like this: