For a given bar plot, like
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(10)
y_bot = np.linspace(30, 50, 10)
y_dif = np.linspace(10, 5, 10)
plt.bar(x, y_dif, bottom=y_bot)
I would like to have whiskers (like in a boxplot), instead of bars:
How can I edit the bars to appear as whiskers?
You can use plt.errorbar() as follows:
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(10)
y_bot = np.linspace(30, 50, 10)
y_dif = np.linspace(10, 5, 10)
plt.bar(x, y_dif, bottom=y_bot, color='skyblue')
plt.errorbar(x, y_bot, yerr=(np.zeros_like(y_bot), y_dif), capsize=10, ecolor='black', ls='', lw=5, capthick=5)
plt.gca().use_sticky_edges = False # remove stickyness due to plt.bar()
plt.xticks(x)
plt.tight_layout()
plt.show()
Related
I want to connect airplanes in origin (lat_1 lon_1) to dest(lat_2 lon_2). I use these data.
callsign
latitude_1
longitude_1
latitude_2
longitude_2
0
HBAL102
-4.82114
-76.3194
-4.5249
-79.0103
1
AUA1028
-33.9635
151.181
48.1174
16.55
2
ABW120
41.9659
-87.8832
55.9835
37.4958
3
CSN461
33.9363
-118.414
50.0357
8.5723
4
ETH3730
25.3864
55.4221
50.6342
5.43903
But unfortunately, I would get an incorrect result when creating LineString with shapely. I used everything like rotate and affine but it didn't correct.
Code:
cols = pd.read_csv("/content/dirct_lines.csv",sep=";")
line = cols[["callsign","latitude_1","longitude_1","latitude_2","longitude_2"]].dropna()
line['geometry'] = line.apply(lambda x: [(x['latitude_1'],
x['longitude_1']),
(x['latitude_2'],
x['longitude_2'])], axis = 1)
geoline = gpd.GeoDataFrame(line,geometry="geometry",
crs="EPSG:4326")
import matplotlib.pyplot as plt
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
ax = world.plot(figsize=(14,9),
color='white', edgecolor='black')
geoline.plot(figsize=(14,9),ax=ax,facecolor = 'lightgrey', linewidth = 1.75,
edgecolor = 'red',
alpha = 2)
plt.show()
Shapely Output:
something that was interesting for me was that when I use Matplotlib to create lines everything is correct.
Code:
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(projection=ccrs.PlateCarree())
ax.stock_img()
org_lon, org_lat = cols["longitude_1"], cols["latitude_1"]
dst_lon, dst_lat = cols["longitude_2"], cols["latitude_2"]
plt.plot([org_lon, dst_lon], [org_lat, dst_lat],
color='black', linewidth=0.5, marker='_',
transform=ccrs.PlateCarree()
)
plt.savefig(f"fight_path.png",dpi=60,facecolor = None, bbox_inches = 'tight', pad_inches = None)
plt.show()
Matplotlib Output:
What is the problem?
why isn't correct by shapely?
it's just the way you are creating the geometry. Below works correctly.
import io
import geopandas as gpd
import pandas as pd
import shapely.geometry
df = pd.read_csv(
io.StringIO(
"""callsign,latitude_1,longitude_1,latitude_2,longitude_2
HBAL102,-4.82114,-76.3194,-4.5249,-79.0103
AUA1028,-33.9635,151.181,48.1174,16.55
ABW120,41.9659,-87.8832,55.9835,37.4958
CSN461,33.9363,-118.414,50.0357,8.5723
ETH3730,25.3864,55.4221,50.6342,5.43903
"""
)
)
geoline = gpd.GeoDataFrame(
geometry=[
shapely.geometry.LineString(points)
for points in zip(
gpd.points_from_xy(df["longitude_1"], df["latitude_1"]),
gpd.points_from_xy(df["longitude_2"], df["latitude_2"]),
)
],
data=df,
)
import matplotlib.pyplot as plt
world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
ax = world.plot(figsize=(14, 9), color="white", edgecolor="black")
geoline.plot(
figsize=(14, 9),
ax=ax,
facecolor="lightgrey",
linewidth=1.75,
edgecolor="red",
)
plt.show()
I am trying to create a heatmap displaying correlation coefficient values. I'm quite new at this, but the code below would annotate in multiple decimal places, whereas i'm trying to narrow down to 2 d.p.
Does anyone have experience with this?
import pandas_datareader.data as web
import pandas as pd
import datetime as dt
import csv
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
import seaborn as sns
style.use('ggplot')
def visualize_data():
df = pd.read_csv('sti_joined.csv')
df.set_index('Date', inplace=True)
df_corr = df.pct_change().corr()
print(df_corr.head())
data = df_corr.values
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
# heatmap = ax.pcolor(data, cmap=plt.cm.get_cmap('RdYlGn'))
heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn)
fig.colorbar(heatmap)
ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False)
ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
for y in range(data.shape[0]):
for x in range(data.shape[1]):
plt.text(x + 0.5, y + 0.5, '%.4f' % data[y, x],
horizontalalignment='center',
verticalalignment='center',
)
column_labels = df_corr.columns
row_labels = df_corr.index
ax.set_xticklabels(column_labels)
ax.set_yticklabels(row_labels)
plt.xticks(rotation=90)
heatmap.set_clim(-1,1)
plt.tight_layout()
plt.show()
visualize_data()
Instead of '%.4f' % data[y, x], you can try using something like
'{0:.2f}'.format(data[y,x])
I would like to plot the same as shown in the picture( but only the red part). The curve is a kernel density estimate based only on the X-values (the y-values are irrelevant and actually all 1,2 or 3. It is here just plotted like this to distinguish between red an blue. I have plotted the scatterplot, but how can I include the kernel density curve on the scatterplot? (the black dotted lines in the curve are just the quartiles and the median).
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.ticker import MaxNLocator
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.neighbors import KernelDensity
%matplotlib inline
# Change plotting style to ggplot
plt.style.use('ggplot')
from matplotlib.font_manager import FontProperties
X_plot = np.linspace(0, 30, 1000)[:, np.newaxis]
X1 = df[df['Zustandsklasse']==1]['Verweildauer'].values.reshape(-1,1)
X2 = df[df['Zustandsklasse']==2]['Verweildauer'].values.reshape(-1,1)
X3 = df[df['Zustandsklasse']==3]['Verweildauer'].values.reshape(-1,1)
#print(X1)
ax=sns.scatterplot(x="Verweildauer", y="CS_bandwith", data=df, legend="full", alpha=1)
kde=KernelDensity(kernel='gaussian').fit(X1)
log_dens = kde.score_samples(X_plot)
ax.plot(X_plot[:,0], np.exp(log_dens), color ="blue", linestyle="-", label="Gaussian Kernel")
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
ax.invert_yaxis()
plt.ylim(5.5, .5)
ax.set_ylabel("Zustandsklasse")
ax.set_xlabel("Verweildauer in Jahren")
handles, labels = ax.get_legend_handles_labels()
# create the legend again skipping this first entry
leg = ax.legend(handles[1:], labels[1:], loc="lower right", ncol=2, facecolor='silver', fontsize= 7)
ax.set_xticks(np.arange(0, 30, 5))
ax2 = ax.twinx()
#get the ticks at the same heights as the left axis
ax2.set_ylim(ax.get_ylim())
s=[(df["Zustandsklasse"] == t).sum() for t in range(1, 6)]
s.insert(0, 0)
print(s)
ax2.set_yticklabels(s)
ax2.set_ylim(ax.get_ylim())
ax2.set_ylabel("Anzahl Beobachtungen")
ax2.grid(False)
#plt.tight_layout()
plt.show()
Plotting target
Whats is plotted with the code above
It's much easier if you use subplots. Here is an example with seaborn's Titanic dataset:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
titanic = sns.load_dataset('titanic')
fig, ax = plt.subplots(nrows=3, sharex=True)
ax[2].set_xlabel('Age')
for i in [1, 2, 3]:
age_i = titanic[titanic['pclass'] == i]['age']
ax[i-1].scatter(age_i, [0] * len(age_i))
sns.kdeplot(age_i, ax=ax[i-1], shade=True, legend=False)
ax[i-1].set_yticks([])
ax[i-1].set_ylim(-0.01)
ax[i-1].set_ylabel('Class ' + str(i))
Using the following example code in a Jupyter notebook:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
df = pd.DataFrame(np.random.rand(5, 2), columns=['a', 'b'])
sns.set()
g = sns.relplot(data=df, x='a', y='b', kind='scatter');
g.set(xlim=(0, 1))
g.set(ylim=(0, 1));
The resulting plot shows the data points, but I would also like to have vertical drop lines and occasionally horizontal ones as well. To clarify what I mean by droplines, here is a mockup of the actual vs. the desired output:
Update: A little more complex input that makes it harder to manually draw the lines:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
df = pd.DataFrame(np.random.rand(20, 3), columns=['a', 'b', 'c'])
df['d'] = ['apples', 'bananas', 'cherries', 'dates'] * 5
sns.set()
g = sns.relplot(data=df, x='a', y='b', hue='c', col='d', col_wrap=2, kind='scatter');
g.set(xlim=(0, 1))
g.set(ylim=(0, 1));
There are several ways to plot vertical/horizontal lines. One of the is to use hlines or vlines. This can be done using a loop for sake of ease.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np; np.random.seed(121)
fig, ax = plt.subplots()
df = pd.DataFrame(np.random.rand(5, 2), columns=['a', 'b'])
sns.set()
g = sns.relplot(data=df, x='a', y='b', kind='scatter', color='blue', ax=ax);
for x, y in zip(df['a'], df['b']):
ax.hlines(y, 0, x, color='blue')
ax.vlines(x, 0, y, color='blue')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.close(g.fig)
I am using scientific notation in a colorbar within a 2D plot. I want to write 10^{-3} instead of e-3. I tried to change that (see code below) but it does not work...
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker
x = np.random.rand(100)
y = np.random.rand(100)
z = np.random.rand(100)*0.001
x=x.reshape((10,10))
y=y.reshape((10,10))
z=z.reshape((10,10))
fig, ax = plt.subplots(figsize=(8,6))
cs = ax.contourf(x,y,z, 10)
plt.xticks(fontsize=16,rotation=0)
plt.yticks(fontsize=16,rotation=0)
cbar = plt.colorbar(cs,)
cbar.set_label("test",fontsize = 22)
cbar.formatter.set_scientific(True)
cbar.formatter.set_powerlimits((0, 0))
cbar.ax.tick_params(labelsize=16)
cbar.ax.yaxis.get_offset_text().set_fontsize(22)
cbar.ax.xaxis.major.formatter._useMathText = True
cbar.update_ticks()
plt.savefig("test.png")
It seems you want a ScalarFormatter with mathtext in use.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker
x = np.tile(np.arange(10), 10).reshape((10,10))
y = np.repeat(np.arange(10),10).reshape((10,10))
z = np.sort(np.random.rand(100)*0.001).reshape((10,10))
fig, ax = plt.subplots(figsize=(8,6))
cs = ax.contourf(x,y,z, 10)
fmt = matplotlib.ticker.ScalarFormatter(useMathText=True)
fmt.set_powerlimits((0, 0))
cbar = plt.colorbar(cs,format=fmt)
plt.show()