In a matplotlib plot consisting of histogram subplots, how can the height and bar edges of one histogram be changed? - matplotlib

I've got a little function that generates a plot of two subplots. One subplot is two histograms overlaid and the other subplot is the results of dividing one histogram by the other.
For the second subplot, I don't know how to remove the edges between histogram bars (like the one above it) and I don't know how to reduce its height (such that it is, say, half the height of the one above it). I'm also not sure how to set the title to the very top of the plot.
How could these things be done?
My code is as follows:
import numpy
import matplotlib.pyplot
import datavision # sudo pip install datavision
import shijian # sudo pip install shijian
def main():
a = numpy.random.normal(2, 2, size = 120)
b = numpy.random.normal(2, 2, size = 120)
save_histogram_comparison_matplotlib(
values_1 = a,
values_2 = b,
label_1 = "a",
label_2 = "b",
normalize = True,
label_ratio_x = "frequency",
label_y = "",
title = "comparison of a and b",
filename = "test.png"
)
def save_histogram_comparison_matplotlib(
values_1 = None,
values_2 = None,
filename = None,
number_of_bins = None,
normalize = True,
label_x = "",
label_y = None,
label_ratio_x = "frequency",
label_ratio_y = "ratio",
title = None,
label_1 = "1",
label_2 = "2",
overwrite = True,
LaTeX = False
):
matplotlib.pyplot.ioff()
if LaTeX is True:
matplotlib.pyplot.rc("text", usetex = True)
matplotlib.pyplot.rc("font", family = "serif")
if number_of_bins is None:
number_of_bins_1 = datavision.propose_number_of_bins(values_1)
number_of_bins_2 = datavision.propose_number_of_bins(values_2)
number_of_bins = int((number_of_bins_1 + number_of_bins_2) / 2)
if filename is None:
filename = shijian.propose_filename(
filename = title.replace(" ", "_") + ".png",
overwrite = overwrite
)
values = []
values.append(values_1)
values.append(values_2)
bar_width = 0.8
figure, (axis_1, axis_2) = matplotlib.pyplot.subplots(nrows = 2)
ns, bins, patches = axis_1.hist(
values,
normed = normalize,
histtype = "stepfilled",
bins = number_of_bins,
alpha = 0.5,
label = [label_1, label_2],
rwidth = bar_width,
linewidth = 0
)
axis_1.legend()
axis_2.bar(
bins[:-1],
ns[0] / ns[1],
edgecolor = "#ffffff", # "none"
alpha = 1,
width = bins[1] - bins[0]
)
axis_1.set_xlabel(label_x)
axis_1.set_ylabel(label_y)
axis_2.set_xlabel(label_ratio_x)
axis_2.set_ylabel(label_ratio_y)
matplotlib.pyplot.title(title)
matplotlib.pyplot.savefig(filename)
matplotlib.pyplot.close()
if __name__ == "__main__":
main()

You have 3 questions:
1. How to remove the edges between histogram bars
Here, you can set the linewidth to 0 for the call to bar:
axis_2.bar(
bins[:-1],
ns[0] / ns[1],
linewidth=0,
alpha = 1,
width = bins[1] - bins[0]
)
2. How to reduce the height of the second subplot
Here, we can send kwargs to gridspec when we create the subplots. The relevant option is height_ratios. We send them using the gridspec_kw option to subplots. If we set it to (2,1), that makes the first subplot twice the height of the second one.
figure, (axis_1, axis_2) = matplotlib.pyplot.subplots(
nrows = 2,
gridspec_kw={'height_ratios':(2,1)}
)
3. How to set the title to the very top of the plot
When you call matplotlib.pyplot.title(title), that is actually setting the title of the currently active subplot axes, which in this case is axis_2. To set the title of the overall figure, you can set the suptitle:
matplotlib.pyplot.suptitle(title)
Or alternatively, since you already named your figure, you can use:
figure.suptitle(title)
And likewise, you could use:
figure.savefig(filename)
to save a few keystrokes.
Putting it all together:

Related

Series plot - Geopandas

I dont have a working code - but a snipet of my code can be as follows. I'm trying to use geopandas with mathplotlib, and trying to plot a map with links and points.
shape_file = os.path.join(os.getcwd(), "Healthboard")
healthboard = gp.read_file(os.path.join(shape_file, "healthboard.shp"))
healthboard = healthboard.to_crs({'init': 'epsg:4326'}) # re-projection
geo_df1 = geo_df1[geo_df1['HealthBoardArea2019Code'] == string1]
geo = geo_df[geo_df['Healthboard '] == string2]
new_shape_file = os.path.join(os.getcwd(), "Council_Shapefile")
council_to_healtboard = pd.read_csv("council_to_healthboard.csv")
council_to_healthboard = council_to_healtboard.rename(columns = {'CA': 'Council_area_code'})
council = gp.read_file(os.path.join(new_shape_file, "Council_shapefile.shp"))
council = council.to_crs({'init': 'epsg:4326'})
council = council.rename(columns = {'la_s_code':'Council_area_code'})
df = council.merge(council_to_healthboard, on = 'Council_area_code', how ='inner')
# Plotting stuff
fig, ax = plt.subplots(figsize=(15,15))
geo_df1.plot(ax = ax, markersize=35, color = "blue", marker = "*", label = "Postcode Sector")
geo.geometry.plot(ax = ax, color = "red", markersize=20, alpha = 0.8, label = 'SiteName')
#healthboard[healthboard["HBName"]=="Lothian"].plot(ax = ax, alpha = 0.6)
#healthboard[healthboard["HBName"]=="Lothian"].boundary.plot(ax = ax, color = "black", alpha = 0.6)
df[df["HB"]=="S08000024"].boundary.plot(ax =ax, color = "black", alpha = 0.1)
df[df["HB"]=="S08000024"].plot(ax =ax, cmap = "viridis", alpha = 0.1)
links_gp.plot(ax =ax, alpha = 0.25, color='brown', linestyle = "-")
My links_gp.plot has 40 time periods, as a result I want to make one plot, and have a button to adjust the parameters of time. Or if not possible a series of 40 plots. I've tried numerous ways but keep failing on this. I would really appreciate if someone could guide me on this.
I'm aware that you are using matplotlib, but if you don't mind using bokeh instead, you could use the following. To create an interactive plot with a possibility to adjust a parameter, bokeh provides a slider widget which can be used to change the plot based on a custom filter function.
An example from a geopandas dataframe with LineString geometries similar to the one you posted:
import geopandas as gpd
from bokeh.io import show, output_notebook
from bokeh.models import (CDSView, ColumnDataSource, CustomJS,
CustomJSFilter, Slider, Column)
from bokeh.layouts import column
from bokeh.plotting import figure
# prepare data source
links_gp['x'] = links_gp.apply(lambda row: list(row['geometry'].coords.xy[0]), axis=1)
links_gp['y'] = links_gp.apply(lambda row: list(row['geometry'].coords.xy[1]), axis=1)
# drop geometry column, because it can't be serialized to ColumnDataSource
links_gp.drop('geometry', axis=1, inplace=True)
linesource = ColumnDataSource(links_gp)
p = figure(title = 'Bokeh Time Slider',
plot_height = 500,
plot_width = 600,
toolbar_location = 'below',
tools = "pan, wheel_zoom, box_zoom, reset")
slider = Slider(title='Time Period', start=1, end=40, step=1, value=1)
# Callback triggers the filter when the slider moves
callback = CustomJS(args=dict(source=linesource),
code="""source.change.emit();""")
slider.js_on_change('value', callback)
# Custom filter that selects all lines of the time period based on the slider value
custom_filter = CustomJSFilter(args=dict(slider=slider),
code="""
var indices = [];
// iterate through rows of data source and check if time period value equals the slider value
for (var i = 0; i < source.get_length(); i++){
if (source.data['Time Period'][i] == slider.value){
indices.push(true);
} else {
indices.push(false);
}
}
return indices;
""")
# Use filter to determine which lines are visible
view = CDSView(source=linesource, filters=[custom_filter])
# plot lines to map
p.multi_line('x', 'y', source=linesource, color='red', line_width=3, view=view)
layout = column(p, slider)
show(layout)
This will be the result of the above code.

Shrink matplotlib parasite axis horizontally to take up approximately 25% of the image length

I have an image like the one below:
The issue is I need the curves to only take up about 25% - 30% of the image. In other words I need to shrink the size of the two parasite axes horizontally. Is this even possible?
Here is what I have so far:
"""
Plotting _____________________________________________________________________________________________________________
"""
fig = plt.figure(figsize=(20,15))
host1 = host_subplot(211, axes_class=AA.Axes)
plt.subplots_adjust(right=0.75)
#Create custom axes
cax1 = plt.axes(frameon=False)
# Now create parasite axis
par11 = host1.twiny()
par12 = host1.twiny()
top_offset = 50
new_fixed_axis1 = par12.get_grid_helper().new_fixed_axis
par12.axis["top"] = new_fixed_axis1(loc="top",
axes=par11,
offset=(0, top_offset))
par11.axis["top"].toggle(all=True)
par12.axis["top"].toggle(all=True)
# Bottom Axis
bottom_offset1 = -50
bottom_offset2 = -100
par21 = host1.twiny()
par22 = host1.twiny()
new_fixed_axis2 = par21.get_grid_helper().new_fixed_axis
par21.axis["bottom"] = new_fixed_axis2(loc="bottom",
axes=par12,
offset=(0, bottom_offset1))
# Set Host Axis Labels
host1.set_xlabel("UTC Time")
host1.set_ylabel("Elevation (km")
# Set Top Axis Labels
par11.set_xlabel("Sonde Potential Temperature (K)")
par12.set_xlabel("Sonde Relative Humidity %")
vmin, vmax = np.min(chan_1064), np.max(chan_1064)
im = host1.imshow(chan_1064, aspect="auto", cmap=get_a_color_map(), vmin=-2e-4, vmax=0.6e-2,
extent=(min(xs), max(xs), min(bin_alt_array), max(bin_alt_array)))
scatter = host1.scatter(xs, ys, s=100, color='gold')
host1.set_xlim(min(xs), max(xs))
fig.colorbar(im)
plt.draw()
leg = plt.legend( loc = 'lower right')
# Adjust Fonts
font = {'family' : 'normal',
'weight' : 'bold',
'size' : 12}
mpl.rc('font', **font)
plt.tight_layout()
plt.show()
Sorry if it's a simple solution but, I have not been able to figure it out for the life of me.

Setting z_order when working with ConnectionPatch

I am trying to produce a figure where red line is not going to be visible inside the green rectangle (i.e. within the middle panel). Setting z order seems to be in effect only for the lower panel (subplot) and it is ignored for the upper and middle one. Can anyone help with this please.
Alternatively, what would also work for me is, if I plot two subplots: top and bottom, and the piece of line that connects points X (from the bottom) and Y (from the top subplot) does not get plotted in the region which is between two places. In other words, line looks as a broken line going from X to the top of bottom panel, then having some skip and then continuing from min_y in top panel and going all the way to Y.
I am planning to achieve this by setting color of rectangle to be white so that it overwrites these lines (but this does not work).
from matplotlib.patches import ConnectionPatch
import matplotlib.patches as patches
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
fig, (ax_upper, ax_middle, ax_lower) = plt.subplots(3, 1, sharey = False)
ax_upper.spines['top'].set_visible(False)
ax_upper.spines['bottom'].set_visible(False)
ax_upper.spines['right'].set_visible(False)
ax_upper.get_xaxis().set_ticks([])
ax_middle.spines['left'].set_visible(False)
ax_middle.spines['right'].set_visible(False)
ax_middle.spines['top'].set_visible(False)
ax_middle.spines['bottom'].set_visible(False)
ax_middle.get_xaxis().set_ticks([])
ax_middle.get_yaxis().set_ticks([])
ax_lower.spines['top'].set_visible(False)
ax_lower.spines['right'].set_visible(False)
ax_upper.set_ylim(10, 100)
ax_lower.set_ylim(0, 10)
ax_lower.set_xlim(0, 100)
ax_upper.set_xlim(0, 100)
con = ConnectionPatch(xyA = (2, 2), xyB = (80,90), coordsA = "data", coordsB = "data", axesA = ax_lower, axesB = ax_upper)
ax_lower.add_artist(con)
con.set_zorder(1)
con.set_color("red")
con.set_linewidth(3)
con = ConnectionPatch(xyA = (95,5), xyB = (80, 90), coordsA = "data", coordsB = "data", axesA = ax_lower, axesB = ax_upper, lw=1)
ax_lower.add_artist(con)
con.set_zorder(1)
con.set_color("red")
con.set_linewidth(3)
ax_lower.plot([2], [2], marker="o", color = "red")
ax_upper.plot([80], [90], marker="o", color = "red", zorder = 2)
ax_lower.plot([95], [5], marker="o", color = "red")
ax_upper.plot([0,0],[0,0], label="class A", color = "red", zorder=1, marker = "o", )
ax_upper.legend(loc='upper left')
rect = Rectangle((0,0), 1, 1, linewidth=1, edgecolor='black', facecolor='green', zorder = 3)
ax_middle.add_patch(rect)
plt.show()
example output

how to define the layout for subplot using plotly

I used the code below for creating one row two column subplot using plotly package. But the two x-axis overlapped (see the attached screenshot). The first plot's axis is covering both plot. How to solve this issue? I don't see a good example from plotly site. Also how to remove the legend?
import plotly
import plotly.graph_objs as go
def plot(plot_dic, width=1000, **kwargs):
kwargs['output_type'] = 'div'
plot_str = plotly.offline.plot(plot_dic, **kwargs)
print('%%angular <div style=" width: %spx"> %s </div>' % ( width, plot_str))
# Create a trace
trace0 = go.Scatter(
x = np.arange(100)+1,
y = np.round(df[df['']=='value'].iloc[:,1:]*100, 2).values.reshape(100),
mode = 'lines+markers',
)
trace1 = go.Scatter(
x = np.arange(100)+1,
y = np.cumsum(np.round(df[df['']=='value'].iloc[:,1:]*100, 2).values).reshape(100),
mode = 'lines+markers',
)
fig = plotly.tools.make_subplots(rows=1, cols=2, subplot_titles=('Variance', 'Cumulative Variance')
# ,shared_xaxes=True, shared_yaxes=True
)
fig['layout'].update(height=500, width=800, title="Plot title"
,xaxis=dict(range = [1, 400],
dtick = 40,
showticklabels=True,
tickfont=dict(family='Arial, sans-serif', size=14, color='black'),
exponentformat='e',
showexponent='all')
,xaxis1=dict(range = [1, 400],
dtick = 40,
showticklabels=True,
tickfont=dict(family='Arial, sans-serif', size=14, color='black'),
exponentformat='e',
showexponent='all')
)
fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 1, 2)
plot(fig, width=600, show_link=False)
alternative way to create subplot but avoid overlap of x-axis is to change the subplot to 2 rows and 1 column instead.
fig = plotly.tools.make_subplots(rows=2, cols=1, subplot_titles=('Variance', 'Cumulative Variance')
# ,shared_xaxes=True, shared_yaxes=True)

Adjust space between tick labels a in matplotlib

I based my heatmap off of: Heatmap in matplotlib with pcolor?
I checked out How to change separation between tick labels and axis labels in Matplotlib but it wasn't what I needed
How do I fix the positions of the labels so they align with the ticks?
#!/usr/bin/python
import matplotlib.pyplot as plt
import numpy as np
import random
in_path = '/path/to/data'
in_file = open(in_path,'r').read().split('\r')
wd = '/'.join(in_path.split('/')[:-1]) + '/'
column_labels = [str(random.random()) + '_dsiu' for i in in_file[0].split('\t')[2:]]
row_labels = []
#Organize data for matrix population
D_cyano_counts = {}
for line in in_file[2:]:
t = line.split('\t')
D_cyano_counts[(t[0],t[1])] = [int(x) for x in t[2:]]
#Populate matrix
matrix = []
for entry in sorted(D_cyano_counts.items(), key = lambda x: (np.mean([int(bool(y)) for y in x[-1]]), np.mean(x[-1]))):#, np.mean(x[-1]))):
(taxon_id,cyano), counts = entry
normalized_counts = []
for i,j in zip([int(bool(y)) for y in counts], counts):
if i > 0:
normalized_counts.append(i * (5 + np.log(j)))
else:
normalized_counts.append(0)
#Labels
label_type = 'species'
if label_type == 'species': label = cyano
if label_type == 'taxon_id': label = taxon_id
row_labels.append(str(random.random()))
#Fill in matrix
matrix.append(normalized_counts)
matrix = np.array(matrix)
#Fig
fig, ax = plt.subplots()
heatmap = ax.pcolor(matrix, cmap=plt.cm.Greens, alpha = 0.7)
#Format
fig = plt.gcf()
#
ax.set_frame_on(False)
#
font = {'size':3}
ax.xaxis.tick_top()
ax.set_xticks([i + 0.5 for i in range(len(column_labels))])
ax.set_yticks([i + 0.5 for i in range(len(row_labels))])
ax.set_xticklabels(column_labels, rotation = (45), fontsize = 10, va='bottom')#, fontweight = 'demi')
ax.set_yticklabels(row_labels, fontsize = 9, fontstyle='italic')
cbar = plt.colorbar(heatmap)
help(ax.set_xticklabels)
ax.margins(x=0.01,y=0.01)
fig.set_size_inches(20, 13)
plt.savefig('figure.png')
you have to set the horizontal alignment of the labels to left in your case. They are centered by default.
The link from #Jean-Sébastien contains your answer
ax.set_xticklabels(column_labels, rotation = (45), fontsize = 10, va='bottom', ha='left')