Plot 2D array of (x,y,z) points in 3D space (Matplotlib) - numpy

I'm a beginner in Python and specially in Matplotlib. I have a 22797x3 array, built from a multiplication between two other arrays, one 22797x400 long and the other 400x3 long. In the resulted array (22797x3),each line represents a point with (x,y,z) coordinates, hence the 3 columns. How could I plot that resulted array in a 3D surface, where I can see all the 22797 points spread in 3D space? This data is for future Kmeans clustering, so I need to visualise it.
So far I've tried:
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#building the 22797x3 array:
#loading the first array from .txt file, 22797x400 long.
array = np.loadtxt('C:\Users\Scripts/final_array.txt', usecols=range(400))
array = np.float32(array)
#loading the second array from .txt file, 400x3 long.
small_vh2 = np.loadtxt('C:\Users\Scripts/small_vh2.txt', usecols=range(3))
small_vh2 = np.float32(small_vh2)
#multiplying and getting result array 22797x3 long:
Y = np.array(np.matmul(array,small_vh2))
#I've checked Y dimensions, it's 22797x3 long, working fine.
#now I must plot it in 3D:
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(Y[:, 0], Y[:, 1], Y[:, 2])
plt.show()
I keep getting the result shown in the image below:
https://i.stack.imgur.com/jRyHM.jpg
What I need is to get is the 22797 points, and I keep getting only 4 points plotted. Does anybody know what is wrong with the code?

from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from matplotlib import pyplot as plt
# made 2 random arrays of the same size as yours
array = np.random.rand(22797, 400)
small_vh2 = np.random.rand(400,3)
Y = np.matmul(array,small_vh2)
#now I must plot it in 3D:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(Y[:, 0], Y[:, 1], Y[:, 2], alpha = 0.1)
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
plt.show()

Related

How to remove offset of axis in matplotlib 3d plot?

import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.set_xlim([0, 1])
plt.show()
results in the following plot:
There is a small margin between the axis and there limits.
How can I get rid of this offset of the axis.
For example ax.set_xlim([0, 1]) has not effect!

How to render a heatmap for a large array

I have a large dataset from which I derive a squared matrix I would like to visualize as a heatmap. I'm using Matplotlib and Seaborn. Unfortunately, it seems to work only for a relatively small amount of data.
size = 10000
similarity_matrix = np.random.rand(size, size)
fig, ax = plt.subplots()
sns.heatmap(similarity_matrix, vmin=0, vmax=1)
plt.savefig("matrix.png")
This stops working from approximately size=6000, resulting in a white heatmap.
imshow or matshow seems to work fine:
np.random.seed(42)
size = 10000
similarity_matrix = np.random.rand(size, size)
plt.imshow(similarity_matrix, cmap='hot')
plt.colorbar()
Output:
The original code didn't generate a plot for me
Changing fig, ax = plt.subplots() to plt.figure(figsize=(14, 14)), worked to create the plot.
At figsize=(10, 10), the figure didn't render in Jupyter, but the correct image did save to a file.
A figure smaller than figsize=(14, 14), wouldn't render in Jupyter.
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# create matrix
size = 10000
similarity_matrix = np.random.rand(size, size)
# plot matrix
# create figure and set size
plt.figure(figsize=(14, 14))
# add heatmap
sns.heatmap(similarity_matrix, vmin=0, vmax=1)
# save the figure
plt.savefig('test.png', dpi=600)
# show the figure; this was slow
plt.show()

Triangular surface plot matplotlib

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax = fig.gca(projection='3d')
# Make data.
X = np.arange(-5, 5, 0.25)
Y = np.arange(-5, 5, 0.25)
X, Y = np.meshgrid(X, Y)
R = np.sqrt(X**2 + Y**2)
Z = np.sin(R)
ax.plot_trisurf(X, Y, Z, cmap='viridis', edgecolor='none')
plt.show()
I tried to plot this data in form of triangular data but I get this error:
ValueError: x and y must be equal-length 1-D arrays
Can someone help me on it?
In triangular surface plot X,Y,Z must be one dimensional array rather than two dimensional as in case of wireframe and surface plot.
Don't use np.meshgrid().

unexpected constant color using matplotlib surface_plot and facecolors

I am plotting a function on the surface of a sphere. To test my code, I simply plot the spherical coordinate phi divided by pi. I get
Unexpectedly, half of the sphere is of the same color, and the colors on the other half aren't correct (at phi=pi, i should get 1, not 2). If I divide the data array by 2, the problem disappears. Can someone explain to me what is happening?
Here is the code I use:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
# prepare the sphere surface
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
phi = np.linspace(0,2*np.pi, 50)
theta = np.linspace(0, np.pi, 25)
x=np.outer(np.cos(phi), np.sin(theta))
y=np.outer(np.sin(phi), np.sin(theta))
z=np.outer(np.ones(np.size(phi)), np.cos(theta))
# prepare function to plot
PHI=np.outer(phi,np.ones(np.size(theta)))
THETA=np.outer(np.ones(np.size(phi)),theta)
data = PHI/np.pi
# plot
surface=ax.plot_surface(x, y, z, cstride=1, rstride=1,
facecolors=cm.jet(data),cmap=plt.get_cmap('jet'))
# add colorbar
m = cm.ScalarMappable(cmap=surface.cmap,norm=surface.norm)
m.set_array(data)
plt.colorbar(m)
plt.show()
There is a little bit of chaos in the code.
When specifying facecolors, there is no reason to supply a colormap, because the facecolors do not need to be retrieved from a colormap.
Colormaps range from 0 to 1. Your data ranges from 0 to 2. Hence half of the facecolors are just the same. So you first need to normalize the data to the (0,1)-range, e.g. using a Normalize instance, then you can apply the colormap.
norm = plt.Normalize(vmin=data.min(), vmax=data.max())
surface=ax.plot_surface(x, y, z, cstride=1, rstride=1,
facecolors=cm.jet(norm(data)))
For the colorbar you should then use the same colormap and the same normalization as for the plot itself.
m = cm.ScalarMappable(cmap=cm.jet,norm=norm)
m.set_array(data)
Complete code:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
# prepare the sphere surface
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
phi = np.linspace(0,2*np.pi, 50)
theta = np.linspace(0, np.pi, 25)
x=np.outer(np.cos(phi), np.sin(theta))
y=np.outer(np.sin(phi), np.sin(theta))
z=np.outer(np.ones(np.size(phi)), np.cos(theta))
# prepare function to plot
PHI=np.outer(phi,np.ones(np.size(theta)))
THETA=np.outer(np.ones(np.size(phi)),theta)
data = PHI/np.pi
# plot
norm = plt.Normalize(vmin=data.min(), vmax=data.max())
surface=ax.plot_surface(x, y, z, cstride=1, rstride=1,
facecolors=cm.jet(norm(data)))
# add colorbar
m = cm.ScalarMappable(cmap=cm.jet,norm=norm)
m.set_array(data)
plt.colorbar(m)
plt.show()

Plotting 3D Decision Boundary From Linear SVM

I've fit a 3 feature data set using sklearn.svm.svc(). I can plot the point for each observation using matplotlib and Axes3D. I want to plot the decision boundary to see the fit. I've tried adapting the 2D examples for plotting the decision boundary to no avail. I understand that clf.coef_ is a vector normal to the decision boundary. How can I plot this to see where it divides the points?
Here is an example on a toy dataset. Note that plotting in 3D is funky with matplotlib. Sometimes points that are behind the plane might appear as though they are in front of it, so you may have to fiddle with rotating the plot to ascertain what's going on.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.svm import SVC
rs = np.random.RandomState(1234)
# Generate some fake data.
n_samples = 200
# X is the input features by row.
X = np.zeros((200,3))
X[:n_samples/2] = rs.multivariate_normal( np.ones(3), np.eye(3), size=n_samples/2)
X[n_samples/2:] = rs.multivariate_normal(-np.ones(3), np.eye(3), size=n_samples/2)
# Y is the class labels for each row of X.
Y = np.zeros(n_samples); Y[n_samples/2:] = 1
# Fit the data with an svm
svc = SVC(kernel='linear')
svc.fit(X,Y)
# The equation of the separating plane is given by all x in R^3 such that:
# np.dot(svc.coef_[0], x) + b = 0. We should solve for the last coordinate
# to plot the plane in terms of x and y.
z = lambda x,y: (-svc.intercept_[0]-svc.coef_[0][0]*x-svc.coef_[0][1]*y) / svc.coef_[0][2]
tmp = np.linspace(-2,2,51)
x,y = np.meshgrid(tmp,tmp)
# Plot stuff.
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(x, y, z(x,y))
ax.plot3D(X[Y==0,0], X[Y==0,1], X[Y==0,2],'ob')
ax.plot3D(X[Y==1,0], X[Y==1,1], X[Y==1,2],'sr')
plt.show()
Output:
EDIT (Key Mathematical Linear Algebra Statement In Comment Above):
# The equation of the separating plane is given by all x in R^3 such that:
# np.dot(coefficients, x_vector) + intercept_value = 0.
# We should solve for the last coordinate: x_vector[2] == z
# to plot the plane in terms of x and y.
You cannot visualize the decision surface for a lot of features. This is because the dimensions will be too many and there is no way to visualize an N-dimensional surface.
However, you can use 2 features and plot nice decision surfaces as follows.
I have also written an article about this here:
https://towardsdatascience.com/support-vector-machines-svm-clearly-explained-a-python-tutorial-for-classification-problems-29c539f3ad8?source=friends_link&sk=80f72ab272550d76a0cc3730d7c8af35
Case 1: 2D plot for 2 features and using the iris dataset
from sklearn.svm import SVC
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features.
y = iris.target
def make_meshgrid(x, y, h=.02):
x_min, x_max = x.min() - 1, x.max() + 1
y_min, y_max = y.min() - 1, y.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
return xx, yy
def plot_contours(ax, clf, xx, yy, **params):
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
out = ax.contourf(xx, yy, Z, **params)
return out
model = svm.SVC(kernel='linear')
clf = model.fit(X, y)
fig, ax = plt.subplots()
# title for the plots
title = ('Decision surface of linear SVC ')
# Set-up grid for plotting.
X0, X1 = X[:, 0], X[:, 1]
xx, yy = make_meshgrid(X0, X1)
plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
ax.set_ylabel('y label here')
ax.set_xlabel('x label here')
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
ax.legend()
plt.show()
Case 2: 3D plot for 2 features and using the iris dataset
from sklearn.svm import SVC
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from mpl_toolkits.mplot3d import Axes3D
iris = datasets.load_iris()
X = iris.data[:, :3] # we only take the first three features.
Y = iris.target
#make it binary classification problem
X = X[np.logical_or(Y==0,Y==1)]
Y = Y[np.logical_or(Y==0,Y==1)]
model = svm.SVC(kernel='linear')
clf = model.fit(X, Y)
# The equation of the separating plane is given by all x so that np.dot(svc.coef_[0], x) + b = 0.
# Solve for w3 (z)
z = lambda x,y: (-clf.intercept_[0]-clf.coef_[0][0]*x -clf.coef_[0][1]*y) / clf.coef_[0][2]
tmp = np.linspace(-5,5,30)
x,y = np.meshgrid(tmp,tmp)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot3D(X[Y==0,0], X[Y==0,1], X[Y==0,2],'ob')
ax.plot3D(X[Y==1,0], X[Y==1,1], X[Y==1,2],'sr')
ax.plot_surface(x, y, z(x,y))
ax.view_init(30, 60)
plt.show()