Expand matrix based on vector - numpy

I want to turn matrix A into matrix B.
Is there a better/more efficient approach with NumPy than the following?
import numpy as np
a = np.array([[0.02, 0.05, 0.05],
[0.35, 0.10, 0.45],
[0.08, 0.25, 0.15]])
w = np.array([0.75, 0.25])
B = np.insert(a, 9, a[2, :]).reshape(4, 3)
B = np.insert(B.T, 12, B[:, 2]).reshape(4, 4).T
B[2:4, :] = np.multiply(B[2:4, :].T, w).T

.insert isn't a good choice here because numpy needs to allocate memory to create a whole new array every time you do so. Instead, just pre-allocate the size of array you need, and then assign to its slices.
a = np.array([[0.02, 0.05, 0.05],
[0.35, 0.10, 0.45],
[0.08, 0.25, 0.15]])
w = np.array([0.75, 0.25])
b_shape = tuple(s + 1 for s in a.shape) # We need one more row and column than a
b = np.zeros(b_shape) # Create zero array of required shape
b[:a.shape[0], :a.shape[1]] = a # Set a in the top left corner
b[:, -1] = b[:, -2] # Set last column from second-last column
b[-1, :] = b[-2, :] # Set last row from second-last row
b[-w.shape[0]:, :] = b[-w.shape[0]:, :] * w[:, None] # Multiply last two rows with `w`
w[:, None] makes w a column vector (a 2x1 matrix), and numpy broadcasts the shapes to do the correct elementwise multiplication.
This gives us the required b:
array([[0.02 , 0.05 , 0.05 , 0.05 ],
[0.35 , 0.1 , 0.45 , 0.45 ],
[0.06 , 0.1875, 0.1125, 0.1125],
[0.02 , 0.0625, 0.0375, 0.0375]])
Putting this in a function to compare runtimes against your approach:
import numpy as np
import timeit
from matplotlib import pyplot as plt
#% Define functions
def func_insert(a, w):
B = np.insert(a, a.size, a[-1, :]).reshape(a.shape[0]+1, a.shape[1])
B = np.insert(B.T, B.size, B[:, -1]).reshape(a.shape[0]+1, a.shape[1]+1).T
B[-w.shape[0]:, :] = np.multiply(B[-w.shape[0]:, :].T, w).T
return B
def func_prealloc(a, w):
b_shape = tuple(s + 1 for s in a.shape)
b = np.zeros(b_shape)
b[:a.shape[0], :a.shape[1]] = a
b[:, -1] = b[:, -2]
b[-1, :] = b[-2, :]
b[-w.shape[0]:, :] = b[-w.shape[0]:, :] * w[:, None]
return b
#% Time function calls
sizes = [3, 10, 50, 100, 500, 1000, 5000, 10_000]
times = np.zeros((len(sizes), 2))
for i, size in enumerate(sizes):
a = np.random.random((size, size))
w = np.random.random((2,))
times[i, 0] = timeit.timeit("func_insert(a, w)", globals=globals(), number=10) / 10
print(".")
times[i, 1] = timeit.timeit("func_prealloc(a, w)", globals=globals(), number=10) / 10
print("x")
#% Plot results
fig, ax = plt.subplots()
ax.plot(sizes, times[:, 0], label="Insert")
ax.plot(sizes, times[:, 1], label="Prealloc")
ax.set_xscale('log')
ax.set_yscale('log')
ax.legend()
ax.set_xlabel('Array size (NxN)')
ax.set_ylabel('Time per function call (s)')
ax.grid(True)
fig.tight_layout()
]
There's a consistent 3-5x speedup by preallocating.

Related

How to remove noise-only rows from a numpy array?

I want to remove rows from a Numpy array, where there is just noise and a random constant offset.
My data looks similar to this:
offset = np.array([0.2, 3.2])
signal = np.sin(np.arange(0, 2 * np.pi, 0.1))
null = np.zeros_like(signal)
data_block = np.array([signal, null]).T
padding = np.zeros((10, 2))
block = np.vstack((padding, data_block, padding)) + offset
# add noise
shape = block.shape
noise = np.random.rand(shape[0], shape[1]) * .01
everything = noise + block
In reality, there is no step but rather a smooth transition from the offset to the data. It is like first there is the offset, which starts to move, once the data block starts, and becomes another offset when it stops. The noise amplitude is much smaller than the data amplitude.
I would like to retrieve the rows with the data block from everything, preferably based on continuous, smooth change in the data block. How can I do that?
This is my best effort on identifying the data_block. I would be happy if it could be improved!
import numpy as np
offset = np.array([0.2, 3.2])
signal = np.sin(np.arange(0, 2 * np.pi, 0.1))
null = np.zeros_like(signal)
data_block = np.array([signal, null]).T
padding = np.zeros((10, 2))
block = np.vstack((padding, data_block, padding)) + offset
# add noise
shape = block.shape
noise = np.random.rand(shape[0], shape[1]) * .01
everything = noise + block
from matplotlib import pyplot as plt
x = np.arange(shape[0])
plt.plot(x, everything[:, 0])
plt.plot(x, everything[:, 1])
plt.show()
diff_everything = np.diff(everything, axis=0)
x = np.arange(shape[0] - 1)
plt.plot(x, diff_everything[:, 0])
plt.plot(x, diff_everything[:, 1])
plt.show()
mask = (np.linalg.norm(diff_everything[:, :], axis=1) > 0.01)
mask = np.append(mask, False)
data = everything[mask, :]
shape = data.shape
x=np.arange(0,shape[0])
plt.plot(x, data[:, 0])
plt.plot(x, data[:, 1])
plt.show()

How to plot same colors for same values in a map?

I'm creating a colorbar with the function make_colormap. Source: Create own colormap using matplotlib and plot color scale.
Also i'm plotting many maps with for month, data in normals.groupby('MONTH'):
I want to create a color bar with the same values for the same colors (to be able to compare values in maps) but in the:
rvb = make_colormap(
[c('brown'), c('orange'), 0.10, c('orange'), c('yellow'), 0.20, c('green'), c('cyan'), 0.66, c('blue'), c('purple') ])
I can only put percentages. Do you know how can i modify this to put exact values instead of percentages?
import matplotlib.colors as mcolors
def make_colormap(seq):
"""Return a LinearSegmentedColormap
seq: a sequence of floats and RGB-tuples. The floats should be increasing
and in the interval (0,1).
"""
seq = [(None,) * 3, 0.0] + list(seq) + [1.0, (None,) * 3]
cdict = {'red': [], 'green': [], 'blue': []}
for i, item in enumerate(seq):
if isinstance(item, float):
r1, g1, b1 = seq[i - 1]
r2, g2, b2 = seq[i + 1]
cdict['red'].append([item, r1, r2])
cdict['green'].append([item, g1, g2])
cdict['blue'].append([item, b1, b2])
return mcolors.LinearSegmentedColormap('CustomMap', cdict)
c = mcolors.ColorConverter().to_rgb
rvb = make_colormap(
[c('brown'), c('orange'), 0.10, c('orange'), c('yellow'), 0.20, c('green'), c('cyan'), 0.66, c('blue'), c('purple') ])
for month, data in normals.groupby('MONTH'):
lons, lats= np.array(data['LONGITUDE']), np.array(data['LATITUDE'])
ppvalues=np.array(data['PP']).astype(int)
month = data['MONTH'].iloc[0]
fig = plt.figure('map', figsize=(7,7), dpi=200)
ax = fig.add_axes([0.1, 0.12, 0.80, 0.75], projection=ccrs.PlateCarree())
plt.xlabel('LONGITUDE')
plt.ylabel('LATITUDE')
ax.outline_patch.set_linewidth(0.3)
l = NaturalEarthFeature(category='cultural', name='admin_0_countries', scale='50m', facecolor='none')
ax.add_feature(l, edgecolor='black', linewidth=0.25)
img = ax.scatter(lons, lats, s=7, c=ppvalores, cmap=rvb,
marker='o', transform=ccrs.PlateCarree())
#ticks=[0,1,2,3,4,5,6,7,8,9,10]
cb = plt.colorbar(img, extend='both',
spacing='proportional', orientation='horizontal',
cax=fig.add_axes([0.12, 0.12, 0.76, 0.02]))
plt.show()
fig.savefig("path/".png")
I'm relatively new in python so would you mind to help me?
Thanks in advance.
You could apply a norm. Using the same norm for all plots would make the colors consistent. It is unclear what the range of your data['PP'] column is. Here is an example of the changes if you would like 100, 200 and 660 for the three values in the list given to make_colormap:
vmin = data['PP'].min() # the overall minimum
vmax = data['PP'].max() # the overall maximum
norm = plt.Normalize(vmin, vmax) # function that maps the range of data['PP'] to the range [0,1]
rvb = make_colormap(
[c('brown'), c('orange'), norm(100), c('orange'), c('yellow'), norm(200), c('green'), c('cyan'), norm(660), c('blue'), c('purple')])
for month, data in normals.groupby('MONTH'):
...
img = ax.scatter(..., cmap=rvb, norm=norm)
...

matplotlib heatmap, customize y axis

Right now my code looks like this:
#generate 262*20 elements
values = np.random.random(262*20).tolist()
# convert the list to a 2D NumPy array
values = np.array(values).reshape((262, 20))
h, w = values.shape
#h=262, w=20
fig = plt.figure(num=None, dpi=80,figsize=(9, 7), facecolor='w', edgecolor='k')
ax = fig.add_subplot(111)
#fig, ax = plt.subplots()
plt.imshow(values)
plt.colorbar()
plt.xticks(np.arange(w), list('PNIYLKCVFWABCDEFGHIJ'))
ax.set_aspect(w/h)
plt.show()
The plot looks like this:
As you can see, the range of y axis is 0-261.
But I want my y axis to go from 26 to 290, missing 57, 239, and 253. So still 262 in total. I tried to generate a list like this:
mylist =[26, 27, ......missing 57, 239, 253, ....290]
plt.yticks(np.arange(h), mylist)
The Y axis just looks like everything squished together.
So I tried:
pylab.ylim([26, 290])
And It looks like this:
So it just feels like the data in first row always corresponds to [0], not to [26]
Suggest you use pcolormesh. If you want gaps, then use an numpy.ma.masked array for the area with gaps.
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
values = np.random.rand(290,20)
values[:26, :] = np.NaN
values[ [57, 239, 253], :] = np.NaN
values = np.ma.masked_invalid(values)
h, w = values.shape
fig, ax = plt.subplots(figsize=(9,7))
# Make one larger so these values represent the edge of the data pixels.
y = np.arange(0, 290.5)
x = np.arange(0, 20.5)
pcm = ax.pcolormesh(x, y, values, rasterized=True) # you don't need rasterized=True
fig.colorbar(pcm)
plt.xticks(np.arange(w), list('PNIYLKCVFWABCDEFGHIJ'))
plt.show()
Result
EDIT: If you want to just work w/ a 262x20 array:
values = np.random.rand(262,20)
h, w = values.shape
fig, ax = plt.subplots(figsize=(9,7))
# Make one larger so these values represent the edge of the data pixels.
y = np.arange(0, 290.5)
y = np.delete(y, [57, 239, 253])
y = np.delete(y, range(26))
x = np.arange(0, 20.5)
pcm = ax.pcolormesh(x, y, values, rasterized=True) # you don't need rasterized=True
fig.colorbar(pcm)
plt.xticks(np.arange(w), list('PNIYLKCVFWABCDEFGHIJ'))
plt.show()
Note that this doesn't put a blank line at 57, 239 and 253. If you want that, you need to do:
values = np.random.rand(262,20)
Z = np.ones((290, 20)) * np.NaN
inds = set(range(290)) - set(list(range(26)) + [57, 239, 253])
for nn, ind in enumerate(inds):
Z[ind, :] = values[nn,:]
h, w = values.shape
fig, ax = plt.subplots(figsize=(9,7))
# Make one larger so these values represent the edge of the data pixels.
y = np.arange(0, 290.5)
x = np.arange(0, 20.5)
pcm = ax.pcolormesh(x, y, Z, rasterized=True) # you don't need rasterized=True
fig.colorbar(pcm)
plt.xticks(np.arange(w), list('PNIYLKCVFWABCDEFGHIJ'))
plt.show()

Pretty confusion matrix visualisation with matplotlib

I'm wondering if there are some templates for viewing confusion matrices in matplotlib with a similar rendering, of which I ignore the specific nomenclature.
I have tried doing something similar with your fig 2. Here is my code using hand written digits data.
import numpy as np
from scipy import ndimage
from matplotlib import pyplot as plt
from sklearn import manifold, datasets
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import leaves_list, linkage
def get_small_Xy(X, y, n=8):
X = np.vstack([X[y==e][0:n] for e in np.unique(y)])
y = np.hstack([[e]*n for e in np.unique(y)])
return X, y
# Load digit data
X_, y_ = datasets.load_digits(return_X_y=True)
# get a small set of data
X, y = get_small_Xy(X_, y_)
# Get similarity matrix
D = 1-squareform(pdist(X, metric='cosine'))
Z = linkage(D, method='ward')
ind = leaves_list(Z)
D = D[ind, :]
D = D[:, ind]
# labels and colors related
lbs = np.array([i if i==j else 10 for i in y for j in y])
colors = np.array(['C{}'.format(i) for i in range(10)]+['gray'])
colors[7] = '#413c39'
c = colors[lbs]
font1 = {'family': 'Arial',
'weight': 'normal',
'size': 8,
}
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
n = np.product(X.shape[0])
xx, yy = np.meshgrid(range(n), range(n))
xy = np.stack([xx.ravel(), yy.ravel()]).T
ax.scatter(xy[:, 0], xy[:, 1], s=D**4*30, fc=c, ec=None, alpha=0.8)
ax.set_xlim(-1, n)
ax.set_ylim(n, -1)
ax.tick_params(top=False, bottom=False, left=False, right=False, labelleft=False, labelbottom=False)
# place text
for i, e in enumerate(y):
ax.text(-1.2, i, e, ha='right', va='center', fontdict=font1, c=colors[e])
for i, e in enumerate(y):
ax.text(i, -1, e, ha='center', va='bottom', fontdict=font1, c=colors[e])
# draw lines
for e in np.where(np.diff(y))[0]:
ax.axhline(e+0.5, color='gray', lw=0.5, alpha=0.8)
ax.axvline(e+0.5, color='gray', lw=0.5, alpha=0.8)
One issue is the alpha of all points, which seems not to possible to set with different values with plot scatters in one run.

Matplotlib using image for points on plot

I have the following matplotlib script. I want to replace the points on the plot with images. Let's say 'red.png' for the red points and 'blue.png' for the blue points. How can I adjust the following to plot these images instead of the default points?
from scipy import linalg
import numpy as np
import pylab as pl
import matplotlib as mpl
import matplotlib.image as image
from sklearn.qda import QDA
###############################################################################
# load sample dataset
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data[:, 0:2] # Take only 2 dimensions
y = iris.target
X = X[y > 0]
y = y[y > 0]
y -= 1
target_names = iris.target_names[1:]
###############################################################################
# QDA
qda = QDA()
y_pred = qda.fit(X, y, store_covariances=True).predict(X)
###############################################################################
# Plot results
# constants
dpi = 72; imageSize = (32,32)
# read in our png file
im_red = image.imread('red.png')
im_blue = image.imread('blue.png')
def plot_ellipse(splot, mean, cov, color):
v, w = linalg.eigh(cov)
u = w[0] / linalg.norm(w[0])
angle = np.arctan(u[1] / u[0])
angle = 180 * angle / np.pi # convert to degrees
# filled gaussian at 2 standard deviation
ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5,
180 + angle, color=color)
ell.set_clip_box(splot.bbox)
ell.set_alpha(0.5)
splot.add_artist(ell)
xx, yy = np.meshgrid(np.linspace(4, 8.5, 200), np.linspace(1.5, 4.5, 200))
X_grid = np.c_[xx.ravel(), yy.ravel()]
zz_qda = qda.predict_proba(X_grid)[:, 1].reshape(xx.shape)
pl.figure()
splot = pl.subplot(1, 1, 1)
pl.contourf(xx, yy, zz_qda > 0.5, alpha=0.5)
pl.scatter(X[y == 0, 0], X[y == 0, 1], c='b', label=target_names[0])
pl.scatter(X[y == 1, 0], X[y == 1, 1], c='r', label=target_names[1])
pl.contour(xx, yy, zz_qda, [0.5], linewidths=2., colors='k')
print(xx)
pl.axis('tight')
pl.show()
You can plot images instead of markers in a figure using BboxImage as in this tutorial.
from matplotlib import pyplot as plt
from matplotlib.image import BboxImage
from matplotlib.transforms import Bbox, TransformedBbox
# Load images.
redMarker = plt.imread('red.jpg')
blueMarker = plt.imread('blue.jpg')
# Data
blueX = [1, 2, 3, 4]
blueY = [1, 3, 5, 2]
redX = [1, 2, 3, 4]
redY = [3, 2, 3, 4]
# Create figure
fig = plt.figure()
ax = fig.add_subplot(111)
# Plots an image at each x and y location.
def plotImage(xData, yData, im):
for x, y in zip(xData, yData):
bb = Bbox.from_bounds(x,y,1,1)
bb2 = TransformedBbox(bb,ax.transData)
bbox_image = BboxImage(bb2,
norm = None,
origin=None,
clip_on=False)
bbox_image.set_data(im)
ax.add_artist(bbox_image)
plotImage(blueX, blueY, blueMarker)
plotImage(redX, redY, redMarker)
# Set the x and y limits
ax.set_ylim(0,6)
ax.set_xlim(0,6)
plt.show()