Difference in bins distribution between Matplotlib & Holoviews - matplotlib

ALL software version info
Python 3.7.4;
On iMac (21.5-inch, 2017);
Using IDLE.
Description of expected behavior and the observed behavior
Problem is: Different bins distribution between Matplotlib & Holoviews is obtained.
Complete, minimal, self-contained example code that reproduces the issue
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
wine = load_wine()
print("Feature Names : ", wine.feature_names)
print("\nTarget Names : ", wine.target_names)
wine_df = pd.DataFrame(wine.data, columns = wine.feature_names)
wine_df["Target"] = wine.target
wine_df["Target"] = ["Class_1" if typ==0 else "Class_2" if typ==1 else "Class_3" for typ in wine_df["Target"]]
print("\nDataset Size : ", wine_df.shape)
print(wine_df.head())
Target1=wine_df.query('Target == "Class_1"')
Target2=wine_df.query('Target == "Class_2"')
Target3=wine_df.query('Target == "Class_3"')
x = Target1['proline']
y = Target2['proline']
z = Target3['proline']
plt.hist(x, bins=20,histtype='bar',color='blue',alpha=0.7,label='Class_1')
plt.hist(y, bins=20,histtype='bar',color='red',alpha=0.7,label='Class_2')
plt.hist(z, bins=20,histtype='bar',color='orange',alpha=0.7,label='Class_3')
plt.xlabel('proline')
plt.ylabel('Frequency')
plt.title('Malic Acid Distribution')
plt.legend(frameon=False)
plt.tight_layout()
plt.savefig("Test", dpi=300)
plt.show()
import holoviews as hv
hv.extension('bokeh')
from bokeh.plotting import show
from holoviews import dim, opts
import hvplot.pandas
hist=wine_df.hvplot.hist(y="proline", by="Target", width=600, height=400, ylim=(0,16), alpha=0.7, bins=20, ylabel="Frequency", title="Malic Acid Distribution")
show(hv.render(hist))

Related

How to show the peaks of pmf by matplotlib and scipy?

this is the code(I want to know the peak of the picture but I don't know how to add this kind of code)
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from scipy import stats
n=25
p=0.6
k=np.arange(0,50)
#the pmf forming
picture=stats.binom.pmf(k,n,p)
print(picture)
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
mean,var,skew,kurt=stats.binom.stats(n,p,moments='mvsk')
print(mean,var,skew,kurt)
#the picture forming
plt.plot(k,picture,'o-')
plt.grid(True)
plt.show()
You can use scatter
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from scipy import stats
n=25
p=0.6
k=np.arange(0,50)
#the pmf forming
picture=stats.binom.pmf(k,n,p)
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
mean,var,skew,kurt=stats.binom.stats(n,p,moments='mvsk')
print(mean,var,skew,kurt)
#the picture forming
plt.plot(k,picture,'o-')
plt.grid(True)
# the two new lines
max_ind = np.argmax(picture)
plt.scatter(x=k[max_ind],y=picture[max_ind],c='r',s=100,zorder=10)
and this produces

In Pandas, how can a DataFrame be binned by two columns, with the other columns changed to the means within those bins?

I've got the standard iris dataset projected down to two dimensions using UMAP, with the UMAP dimensions for the x and y positions of the 2D plot added as columns to the dataframe:
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
import umap # pip install umap-learn
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3), iris.target_names)))
_umap = umap.UMAP().fit_transform(iris.data)
iris_df['UMAP_x'] = _umap[:,0]
iris_df['UMAP_y'] = _umap[:,1]
iris_df.head()
I'd like to bin both the UMAP_x and UMAP_y columns into like 25 bins and then the other columns in the dataframe change to being the mean values of the columns in each of the bins. How might this be done? It feels like cut or resampling might lead to the answer, but I'm not sure how.
You can use cut to define bins and then use groupby with transform to calculate mean value for each bin.
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
import umap
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3), iris.target_names)))
_umap = umap.UMAP().fit_transform(iris.data)
iris_df['UMAP_x'] = _umap[:,0]
iris_df['UMAP_y'] = _umap[:,1]
# Define bins for UMAP_x and UMAP_y params
iris_df['UMAP_x_bin'] = pd.cut(iris_df['UMAP_x'], bins=25)
iris_df['UMAP_y_bin'] = pd.cut(iris_df['UMAP_y'], bins=25)
# Calculate mean value for each bin
iris_df['UMAP_x_mean'] = iris_df.groupby('UMAP_x_bin')['UMAP_x'].transform('mean')
iris_df['UMAP_y_mean'] = iris_df.groupby('UMAP_y_bin')['UMAP_y'].transform('mean')
iris_df.head()

How can I convert Arduino signal from Python to Fast Fourier transform?

I'm now trying to convert the signal into a Fast Fourier transform in Python and draw a graph. I have a problem with Len here. How can I fix this? And does anyone have any other ideas about converting Fast Fourier transform?
Exception has occurred: TypeError
object of type 'method' has no len()
That is my problem.
from PyQt5.QtWidgets import*
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
import random
from PyQt5 import QtCore, QtGui, QtWidgets
import datetime
import serial
import time
import random
import numpy as np
from matplotlib import animation
from collections import deque
import threading
x = 0
value = [0]
ser = serial.Serial('com5', 9600)
class scope :
def data(self) :
if ser.readable() :
time.sleep(0.01)
reciving = ser.readline(ser.inWaiting())
str = reciving.decode()
if len(str) > 0 :
if str[:1] == 'X' :
value[0] = str[1:]
#print(float(value[5]))
time.sleep(0.5)
x = float(value[0])
return x
s = scope()
n = len(s.data)
Ts = 0.01
Fs = 1/Ts
# length of the signal
k = np.arange(n)
T = n/Fs
freq = k/T # two sides frequency range
freq = freq[range(int(n/2))] # one side frequency range
Y = np.fft.fft(x)/n # fft computing and normalization
Y = Y[range(int(n/2))]
fig, ax = plt.subplots(2, 1)
ax.plot(freq, abs(Y), 'r', linestyle=' ', marker='^')
ax.set_xlabel('Freq (Hz)')
ax.set_ylabel('|Y(freq)|')
#3ax.vlines(freq, [0], abs(Y))
ax.grid(True)
t = threading.Thread(target= s.data)
t.daemon = True
t.start()
plt.show()

Some Matplotlib plots are blank/incomplete when run in dask (parallel)?

Some plots are showing up partially drawn. Looks like there is some global state that needs to be locked on?
import matplotlib.pyplot as plt
import numpy as np
import dask
import os
from dask.distributed import Client
client = Client(processes=False)
def oneplot(x):
fig = plt.figure(num=f'{x}')
ax = fig.subplots(1, 1)
ax.plot(np.random.randn(100))
plt.savefig(os.path.expanduser(f'~/test_{x}.png'))
def test():
d = [client.submit(oneplot, i) for i in range(10)]
return d
I had the same issue when working with dask and matplotlib. I solved it using fig.savefig(...) instead of using plt.savefig(...). It might work for you as well.

Map offsite with matplotlib(using geopandas and cartopy)

I have created a map like this:
The problem with it is that on the right side of the map is always a little bit offsite. I have set the bounds to:
ax.set_xlim(-215800,
1000000)
ax.set_ylim(3402659,
4879248)
No matter how I increase the xlim, or set margin the right side is still outside the bounds of the canvas. Can somebody help?
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from osgeo import ogr
import matplotlib.pyplot as plt
import matplotlib as mpl
import rasterio
import cartopy.crs as ccrs
import geopandas
from geopandas import *
from matplotlib import colors
MediumApple='#55FF00'
Cantaloupe='#FFA77F'
Marsred='#FF0000'
crs = ccrs.UTM(zone=10)
ax = plt.axes(projection=crs)
import matplotlib.patches as mpatches
for county in CAcountylist:
with rasterio.drivers():
with rasterio.open(r"CA\%s \%s.tif"%(county,county),"r") as src:
meta = src.meta
im=src.read().astype('f')
im=np.transpose(im,[1,2,0])
print im.shape
print im.min(),im.max()
im[im==0]=np.nan
im=im.squeeze()
xmin = src.transform[0]
xmax = src.transform[0] + src.transform[1]*src.width
print src.width,src.height
ymin = src.transform[3] + src.transform[5]*src.height
ymax = src.transform[3]
colors=[MediumApple,Cantaloupe,Marsred]
cmap=mpl.colors.ListedColormap([MediumApple,Cantaloupe,Marsred])
bounds_color=[1,1,2,2,3,3]
norm=mpl.colors.BoundaryNorm(bounds_color,cmap.N)
print xmin,xmax,ymin,ymax
ax.imshow(im, origin='upper', extent=[xmin,xmax,ymin,ymax], transform=crs, interpolation='nearest',cmap=cmap,norm=norm)
df=GeoDataFrame.from_file(r"\CACounty.shp")
df=df.to_crs(epsg=26910)
df.plot(axes=ax,alpha=0)
bounds = df.geometry.bounds
ax.set_xlim(-215800,
1000000)
ax.set_ylim(3402659,
4879248)
low_patch = mpatches.Patch(color='#55FF00', label='Low')
Moderate_patch = mpatches.Patch(color='#FFA77F', label='Moderate')
High_patch = mpatches.Patch(color='#FF0000', label='High')
plt.legend(handles=[low_patch,Moderate_patch,High_patch],loc=3)
plt.show()