How to show the peaks of pmf by matplotlib and scipy? - matplotlib

this is the code(I want to know the peak of the picture but I don't know how to add this kind of code)
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from scipy import stats
n=25
p=0.6
k=np.arange(0,50)
#the pmf forming
picture=stats.binom.pmf(k,n,p)
print(picture)
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
mean,var,skew,kurt=stats.binom.stats(n,p,moments='mvsk')
print(mean,var,skew,kurt)
#the picture forming
plt.plot(k,picture,'o-')
plt.grid(True)
plt.show()

You can use scatter
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from scipy import stats
n=25
p=0.6
k=np.arange(0,50)
#the pmf forming
picture=stats.binom.pmf(k,n,p)
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
mean,var,skew,kurt=stats.binom.stats(n,p,moments='mvsk')
print(mean,var,skew,kurt)
#the picture forming
plt.plot(k,picture,'o-')
plt.grid(True)
# the two new lines
max_ind = np.argmax(picture)
plt.scatter(x=k[max_ind],y=picture[max_ind],c='r',s=100,zorder=10)
and this produces

Related

how to display netcdf raster values over map?

I'm trying to plot netcdf raster values of snowfall data in a text format overlaying what I currently have (mentioned further below). Example, something like this below:
Example
This is all the relevant code I have so far. I excluded the non relevant code. I tried plt.text and it gave me "ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()"
What I have plotted so far
import numpy
from datetime import datetime
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.mpl.ticker as cticker
import matplotlib.pyplot as plt
from matplotlib import ticker, patheffects
from metpy.units import units
import numpy as np
import numpy.ma as ma
from scipy.ndimage import gaussian_filter, maximum_filter, minimum_filter
import xarray as xr
from metpy.plots import USCOUNTIES
from gradient import Gradient
import pandas as pd
import matplotlib.colors as col
#Open NOAA Snowfall dataset
ds = xr.open_dataset('sfav2_CONUS_2021093012_to_2022042512.nc')
ds
lat = ds.lat
lon = ds.lon
#converts snowfall data to inches
snowdata = ds['Data'] * 39
plt.text(lon, lat, snowdata, transform=datacrs)
As far as I know there isn't a vectorized way of plotting text (plt.text or plt.annotated). So you'll have to loop over the arrays and plot each point.
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
import cartopy.crs as ccrs
import numpy as np
data = np.random.rand(18, 9)
lons, lats = np.mgrid[-17:18:2, 8:-9:-2]
lons = lons * 10
lats = lats * 10
fig, ax = plt.subplots(figsize=(10, 5), dpi=86, facecolor="w", subplot_kw=dict(projection=ccrs.EqualEarth()))
ax.pcolormesh(lons, lats, data, cmap="coolwarm", alpha=.2, transform=ccrs.PlateCarree())
ax.coastlines()
for val, lat, lon in zip(data.flat, lats.flat, lons.flat):
ax.text(
lon, lat, f"{val:1.1f}", ha="center", va="center", transform=ccrs.PlateCarree(),
path_effects=[PathEffects.withStroke(linewidth=3, foreground="w", alpha=.5)],
)

In Pandas, how can a DataFrame be binned by two columns, with the other columns changed to the means within those bins?

I've got the standard iris dataset projected down to two dimensions using UMAP, with the UMAP dimensions for the x and y positions of the 2D plot added as columns to the dataframe:
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
import umap # pip install umap-learn
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3), iris.target_names)))
_umap = umap.UMAP().fit_transform(iris.data)
iris_df['UMAP_x'] = _umap[:,0]
iris_df['UMAP_y'] = _umap[:,1]
iris_df.head()
I'd like to bin both the UMAP_x and UMAP_y columns into like 25 bins and then the other columns in the dataframe change to being the mean values of the columns in each of the bins. How might this be done? It feels like cut or resampling might lead to the answer, but I'm not sure how.
You can use cut to define bins and then use groupby with transform to calculate mean value for each bin.
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
import umap
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3), iris.target_names)))
_umap = umap.UMAP().fit_transform(iris.data)
iris_df['UMAP_x'] = _umap[:,0]
iris_df['UMAP_y'] = _umap[:,1]
# Define bins for UMAP_x and UMAP_y params
iris_df['UMAP_x_bin'] = pd.cut(iris_df['UMAP_x'], bins=25)
iris_df['UMAP_y_bin'] = pd.cut(iris_df['UMAP_y'], bins=25)
# Calculate mean value for each bin
iris_df['UMAP_x_mean'] = iris_df.groupby('UMAP_x_bin')['UMAP_x'].transform('mean')
iris_df['UMAP_y_mean'] = iris_df.groupby('UMAP_y_bin')['UMAP_y'].transform('mean')
iris_df.head()

Some Matplotlib plots are blank/incomplete when run in dask (parallel)?

Some plots are showing up partially drawn. Looks like there is some global state that needs to be locked on?
import matplotlib.pyplot as plt
import numpy as np
import dask
import os
from dask.distributed import Client
client = Client(processes=False)
def oneplot(x):
fig = plt.figure(num=f'{x}')
ax = fig.subplots(1, 1)
ax.plot(np.random.randn(100))
plt.savefig(os.path.expanduser(f'~/test_{x}.png'))
def test():
d = [client.submit(oneplot, i) for i in range(10)]
return d
I had the same issue when working with dask and matplotlib. I solved it using fig.savefig(...) instead of using plt.savefig(...). It might work for you as well.

Understanding plt.show() in Matplotlib

import numpy as np
import os.path
from skimage.io import imread
from skimage import data_dir
img = imread(os.path.join(data_dir, 'checker_bilevel.png'))
import matplotlib.pyplot as plt
#plt.imshow(img, cmap='Blues')
#plt.show()
imgT = img.T
plt.figure(1)
plt.imshow(imgT,cmap='Greys')
#plt.show()
imgR = img.reshape(20,5)
plt.figure(2)
plt.imshow(imgR,cmap='Blues')
plt.show(1)
I read that plt.figure() will create or assign the image a new ID if not explicitly given one. So here, I have given the two figures, ID 1 & 2 respectively. Now I wish to see only one one of the image.
I tried plt.show(1) epecting ONLY the first image will be displayed but both of them are.
What should I write to get only one?
plt.clf() will clear the figure
import matplotlib.pyplot as plt
plt.plot(range(10), 'r')
plt.clf()
plt.plot(range(12), 'g--')
plt.show()
plt.show will show all the figures created. The argument you forces the figure to be shown in a non-blocking way. If you only want to show a particular figure you can write a wrapper function.
import matplotlib.pyplot as plt
figures = [plt.subplots() for i in range(5)]
def show(figNum, figures):
if plt.fignum_exists(figNum):
fig = [f[0] for f in figures if f[0].number == figNum][0]
fig.show()
else:
print('figure not found')

Add thousands comma separator to Seaborn Heatmap [duplicate]

I am trying to format my colorbar such the numbers are formatted with commas. Any help would be greatly appreciated
import numpy as np
import matplotlib.pyplot as plt
plt.matshow(np.array(([30000,8000],[12000,25000])))
plt.colorbar()
You can create and specify a custom formatter:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
comma_fmt = FuncFormatter(lambda x, p: format(int(x), ','))
plt.matshow(np.array(([30000,8000],[12000,25000])))
plt.colorbar(format=comma_fmt)
plt.show()