What kind of plot this is? [duplicate] - matplotlib

I'm trying to get the errorbars to show at the confidence interval's limits, and not in the center.
What I want is this:
but what I'm getting is this:
To plot the bar chart I used this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(12345)
df = pd.DataFrame([np.random.normal(32000,200000,3650),
np.random.normal(43000,100000,3650),
np.random.normal(43500,140000,3650),
np.random.normal(48000,70000,3650)],
index=[1992,1993,1994,1995])
df1 = df.T
df1.columns = ['1992', '1993','1994','1995']
a = df1.describe()
means = a.loc['mean'].values.tolist()
stdevs = a.loc['std'].values.tolist()
counts = a.loc['count'].values.tolist()
index = np.arange(len(df1.columns))
CI = []
for i in range(len(means)):
CIval = 1.96*stdevs[i]/(counts[i]**(0.5))
CI.append(CIval)
#print(means, CI)
plt.figure()
fig, ax = plt.subplots(figsize=(10,10))
ax.set_xticks(index)
ax.set_xticklabels(df1.columns)
plt.bar(index, means, xerr = 0.1, yerr=CI)
plt.tight_layout()
plt.show()

The error bars are showing as expected. You have set a 0.1 value for the x error, however in your expected result image, there is no x errorbar so we can remove that. Secondly, we can increase the capsize of your error bars so that they are actually visible by using the capsize= in the call to plt.bar():
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(12345)
df = pd.DataFrame([np.random.normal(32000,200000,3650),
np.random.normal(43000,100000,3650),
np.random.normal(43500,140000,3650),
np.random.normal(48000,70000,3650)],
index=[1992,1993,1994,1995])
df1 = df.T
df1.columns = ['1992', '1993','1994','1995']
a = df1.describe()
means = a.loc['mean'].values.tolist()
stdevs = a.loc['std'].values.tolist()
counts = a.loc['count'].values.tolist()
index = np.arange(len(df1.columns))
CI = []
for i in range(len(means)):
CIval = 1.96*stdevs[i]/(counts[i]**(0.5))
CI.append(CIval)
fig, ax = plt.subplots(figsize=(10,10))
ax.set_xticks(index)
ax.set_xticklabels(df1.columns)
plt.bar(index, means, yerr=CI, capsize=10)
plt.tight_layout()
plt.show()

Related

Flight Path by shapely LineString is not correct

I want to connect airplanes in origin (lat_1 lon_1) to dest(lat_2 lon_2). I use these data.
callsign
latitude_1
longitude_1
latitude_2
longitude_2
0
HBAL102
-4.82114
-76.3194
-4.5249
-79.0103
1
AUA1028
-33.9635
151.181
48.1174
16.55
2
ABW120
41.9659
-87.8832
55.9835
37.4958
3
CSN461
33.9363
-118.414
50.0357
8.5723
4
ETH3730
25.3864
55.4221
50.6342
5.43903
But unfortunately, I would get an incorrect result when creating LineString with shapely. I used everything like rotate and affine but it didn't correct.
Code:
cols = pd.read_csv("/content/dirct_lines.csv",sep=";")
line = cols[["callsign","latitude_1","longitude_1","latitude_2","longitude_2"]].dropna()
line['geometry'] = line.apply(lambda x: [(x['latitude_1'],
x['longitude_1']),
(x['latitude_2'],
x['longitude_2'])], axis = 1)
geoline = gpd.GeoDataFrame(line,geometry="geometry",
crs="EPSG:4326")
import matplotlib.pyplot as plt
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
ax = world.plot(figsize=(14,9),
color='white', edgecolor='black')
geoline.plot(figsize=(14,9),ax=ax,facecolor = 'lightgrey', linewidth = 1.75,
edgecolor = 'red',
alpha = 2)
plt.show()
Shapely Output:
something that was interesting for me was that when I use Matplotlib to create lines everything is correct.
Code:
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(projection=ccrs.PlateCarree())
ax.stock_img()
org_lon, org_lat = cols["longitude_1"], cols["latitude_1"]
dst_lon, dst_lat = cols["longitude_2"], cols["latitude_2"]
plt.plot([org_lon, dst_lon], [org_lat, dst_lat],
color='black', linewidth=0.5, marker='_',
transform=ccrs.PlateCarree()
)
plt.savefig(f"fight_path.png",dpi=60,facecolor = None, bbox_inches = 'tight', pad_inches = None)
plt.show()
Matplotlib Output:
What is the problem?
why isn't correct by shapely?
it's just the way you are creating the geometry. Below works correctly.
import io
import geopandas as gpd
import pandas as pd
import shapely.geometry
df = pd.read_csv(
io.StringIO(
"""callsign,latitude_1,longitude_1,latitude_2,longitude_2
HBAL102,-4.82114,-76.3194,-4.5249,-79.0103
AUA1028,-33.9635,151.181,48.1174,16.55
ABW120,41.9659,-87.8832,55.9835,37.4958
CSN461,33.9363,-118.414,50.0357,8.5723
ETH3730,25.3864,55.4221,50.6342,5.43903
"""
)
)
geoline = gpd.GeoDataFrame(
geometry=[
shapely.geometry.LineString(points)
for points in zip(
gpd.points_from_xy(df["longitude_1"], df["latitude_1"]),
gpd.points_from_xy(df["longitude_2"], df["latitude_2"]),
)
],
data=df,
)
import matplotlib.pyplot as plt
world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
ax = world.plot(figsize=(14, 9), color="white", edgecolor="black")
geoline.plot(
figsize=(14, 9),
ax=ax,
facecolor="lightgrey",
linewidth=1.75,
edgecolor="red",
)
plt.show()

error bars at the limits in matplotlib barchart

I'm trying to get the errorbars to show at the confidence interval's limits, and not in the center.
What I want is this:
but what I'm getting is this:
To plot the bar chart I used this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(12345)
df = pd.DataFrame([np.random.normal(32000,200000,3650),
np.random.normal(43000,100000,3650),
np.random.normal(43500,140000,3650),
np.random.normal(48000,70000,3650)],
index=[1992,1993,1994,1995])
df1 = df.T
df1.columns = ['1992', '1993','1994','1995']
a = df1.describe()
means = a.loc['mean'].values.tolist()
stdevs = a.loc['std'].values.tolist()
counts = a.loc['count'].values.tolist()
index = np.arange(len(df1.columns))
CI = []
for i in range(len(means)):
CIval = 1.96*stdevs[i]/(counts[i]**(0.5))
CI.append(CIval)
#print(means, CI)
plt.figure()
fig, ax = plt.subplots(figsize=(10,10))
ax.set_xticks(index)
ax.set_xticklabels(df1.columns)
plt.bar(index, means, xerr = 0.1, yerr=CI)
plt.tight_layout()
plt.show()
The error bars are showing as expected. You have set a 0.1 value for the x error, however in your expected result image, there is no x errorbar so we can remove that. Secondly, we can increase the capsize of your error bars so that they are actually visible by using the capsize= in the call to plt.bar():
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(12345)
df = pd.DataFrame([np.random.normal(32000,200000,3650),
np.random.normal(43000,100000,3650),
np.random.normal(43500,140000,3650),
np.random.normal(48000,70000,3650)],
index=[1992,1993,1994,1995])
df1 = df.T
df1.columns = ['1992', '1993','1994','1995']
a = df1.describe()
means = a.loc['mean'].values.tolist()
stdevs = a.loc['std'].values.tolist()
counts = a.loc['count'].values.tolist()
index = np.arange(len(df1.columns))
CI = []
for i in range(len(means)):
CIval = 1.96*stdevs[i]/(counts[i]**(0.5))
CI.append(CIval)
fig, ax = plt.subplots(figsize=(10,10))
ax.set_xticks(index)
ax.set_xticklabels(df1.columns)
plt.bar(index, means, yerr=CI, capsize=10)
plt.tight_layout()
plt.show()

Figures names in Pandas Boxplots

I created 2 boxplots using pandas.
Then each figure gets referenced with plt.gcf()
When trying to show the plots, only the last boxplot gets shown. Its like fig1 is getting overwritten.
What is the correct way of showing both boxplots?
This is the sample code
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dates = pd.date_range('20000101', periods=10)
df = pd.DataFrame(index=dates)
df['A'] = np.cumsum(np.random.randn(10))
df['B'] = np.random.randint(-1,2,size=10)
df['C'] = range(1,11)
df['D'] = range(12,22)
# first figure
ax_boxplt1 = df[['A','B']].boxplot()
fig1 = plt.gcf()
# second figure
ax_boxplt2 = df[['C','D']].boxplot()
fig2 = plt.gcf()
# print figures
figures = [fig1,fig2]
for fig in figures:
print(fig)
Create a figure with two axes and plot to each of them separately
fig, axes = plt.subplots(2)
dates = pd.date_range('20000101', periods=10)
df = pd.DataFrame(index=dates)
df['A'] = np.cumsum(np.random.randn(10))
df['B'] = np.random.randint(-1,2,size=10)
df['C'] = range(1,11)
df['D'] = range(12,22)
# first figure
df[['A','B']].boxplot(ax=axes[0]) # Added `ax` parameter
# second figure
df[['C','D']].boxplot(ax=axes[1]) # Added `ax` parameter
plt.show()
In order to get two figures, define the figure before plotting to it. You can use a number enumerate the figures.
plt.figure(1)
# do something with the first figure
plt.figure(2)
# do something with the second figure
Complete example:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dates = pd.date_range('20000101', periods=10)
df = pd.DataFrame(index=dates)
df['A'] = np.cumsum(np.random.randn(10))
df['B'] = np.random.randint(-1,2,size=10)
df['C'] = range(1,11)
df['D'] = range(12,22)
# first figure
fig1=plt.figure(1)
ax_boxplt1 = df[['A','B']].boxplot()
# second figure
fig2=plt.figure(2)
ax_boxplt2 = df[['C','D']].boxplot()
plt.show()

I want to add a "spheres" to my data cluster

I want to add a kind of "spheres" to my data cluster.
My data cluster is this, which does not have ""spheres".
And this is my code
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import pandas as pd
from sklearn.cluster import KMeans
MY_FILE='total_watt.csv'
date = []
consumption = []
df = pd.read_csv(MY_FILE, parse_dates=[0], index_col=[0])
df = df.resample('1D', how='sum')
df = df.dropna()
date = df.index.tolist()
date = [x.strftime('%Y-%m-%d') for x in date]
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
date_numeric = encoder.fit_transform(date)
consumption = df[df.columns[0]].values
X = np.array([date_numeric, consumption]).T
kmeans = KMeans(n_clusters=3)
kmeans.fit(X)
centroids = kmeans.cluster_centers_
labels = kmeans.labels_
print(centroids)
print(labels)
fig, ax = plt.subplots(figsize=(10,8))
rect = fig.patch
rect.set_facecolor('#2D2B2B')
colors = ["b.","r.","g."]
for i in range(len(X)):
print("coordinate:",encoder.inverse_transform(X[i,0].astype(int)), X[i,1], "label:", labels[i])
ax.plot(X[i][0], X[i][1], colors[labels[i]], markersize = 10)
ax.scatter(centroids[:, 0],centroids[:, 1], marker = "x", s=150, linewidths = 5, zorder = 10)
a = np.arange(0, len(X), 5)
ax.set_xticks(a)
ax.set_xticklabels(encoder.inverse_transform(a.astype(int)))
ax.tick_params(axis='x', colors='lightseagreen')
ax.tick_params(axis='y', colors='lightseagreen')
plt.scatter(centroids[:, 0],centroids[:, 1], marker = "x", s=100, c="black", linewidths = 5, zorder = 10)
ax.set_title('Energy consumptions Clusters (high/medium/low)', color='gold')
ax.set_xlabel('time', color='gold')
ax.set_ylabel('date(year 2011)', color='gold')
plt.show()
"Spheres" is area which surroundings plot(cluster), as this picture.
I tried to google it.
But when I type "matplotlib spheres", I could not get any result..
The sample graph in your post looks like resulting from Generalized Gaussian Mixture where each sphere is a Gaussian 2-d density.
I'll write up a sample code shortly to demonstrate how to use GMM on your dataset and do this kind of plotting.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import pandas as pd
# code changes here
# ===========================================
from sklearn.mixture import GMM
# ===========================================
from sklearn.preprocessing import LabelEncoder
# replace it with you file path
MY_FILE='/home/Jian/Downloads/total_watt.csv'
df = pd.read_csv(MY_FILE, parse_dates=[0], index_col=[0])
df = df.resample('1D', how='sum')
df = df.dropna()
date = df.index.tolist()
date = [x.strftime('%Y-%m-%d') for x in date]
encoder = LabelEncoder()
date_numeric = encoder.fit_transform(date)
consumption = df[df.columns[0]].values
X = np.array([date_numeric, consumption]).T
# code changes here
# ===========================================
gmm = GMM(n_components=3, random_state=0)
gmm.fit(X)
y_pred = gmm.predict(X)
# the center is given by mean
gmm.means_
# ===========================================
import matplotlib as mpl
fig, ax = plt.subplots(figsize=(10,8))
for i, color in enumerate('rgb'):
# sphere background
width, height = 2 * 1.96 * np.sqrt(np.diagonal(gmm._get_covars()[i]))
ell = mpl.patches.Ellipse(gmm.means_[i], width, height, color=color)
ell.set_alpha(0.1)
ax.add_artist(ell)
# data points
X_data = X[y_pred == i]
ax.scatter(X_data[:,0], X_data[:,1], color=color)
# center
ax.scatter(gmm.means_[i][0], gmm.means_[i][1], marker='x', s=100, c=color)
ax.set_title('Energy consumptions Clusters (high/medium/low)', color='gold')
ax.set_xlabel('time', color='gold')
ax.set_ylabel('date(year 2011)', color='gold')
a = np.arange(0, len(X), 5)
ax.set_xticks(a)
ax.set_xticklabels(encoder.inverse_transform(a.astype(int)))
ax.tick_params(axis='x', colors='lightseagreen')
ax.tick_params(axis='y', colors='lightseagreen')

Using plot_date change node icon type

When using plot_date, how do you change some of the nodes in the set from a circle to an X?
For example all nodes are circles except the 3, 8, and 19 node, which are all Xs.
I have used a sample dataset, since you didnt provided any.
import pandas as pd
import matplotlib.pyplot as plt
data = {'2014-11-15':1, '2014-11-16':2, '2014-11-17':3, '2014-11-18':5, '2014-11-19':8, '2014-11-20': 19}
df = pd.DataFrame(list(data.iteritems()), columns=['Date', 'val'])
df = df.set_index(pd.to_datetime(df.Date, format='%Y-%m-%d'))
o_list = []
x_list = []
check_list = [3,8,19]
for index in df.index:
if df.val[index] in check_list:
o_list.append(index)
else:
x_list.append(index)
df_o = df.ix[o_list]
df_x = df.ix[x_list]
fig = plt.figure()
plt.plot_date(df_o.index, df_o.val, 'bo')
plt.plot_date(df_x.index, df_x.val, 'bx')
plt.show()