I am trying to create a bar chart with a trendline. I can do this in excel and would like to automate the process. xlswriter is pretty easy to use and I have replicated the bar chart it is just the trend line that does not work for me. It seems to add 2 elements the line and an additional bar on the top of each stack.
This is the code to create the chart on the left
import xlsxwriter
# create worbook, workseet and chart
workbook = xlsxwriter.Workbook("Example.xlsx")
worksheet = workbook.add_worksheet()
chart1 = workbook.add_chart({'type': 'column', 'subtype': 'stacked'})
# Add the worksheet data
headings = ['Model 1', 'Model 2', 'Capacity']
data = [
[10, 40, 50, 20, 10, 50],
[30, 60, 70, 50, 40, 30],
[20, 30, 40, 40, 30, 30]
]
worksheet.write_row('A1', headings)
worksheet.write_column('A2', data[0])
worksheet.write_column('B2', data[1])
worksheet.write_column('C2', data[2])
# Configure the first series.
chart1.add_series({
'name': '=Sheet1!$A$1',
'values': '=Sheet1!$A$2:$A$7',
})
# Configure the first series.
chart1.add_series({
'name': '=Sheet1!$B$1',
'values': '=Sheet1!$B$2:$B$7',
})
chart1.add_series({
'name': '=Sheet1!$C$1',
'values': '=Sheet1!$C$2:$C$7',
'trendline': {'type': 'linear'},
})
# Set an Excel chart style.
chart1.set_style(11)
# Add a chart title
chart1.set_title ({'name': 'xlsxwriter chart'})
# Insert the chart into the worksheet (with an offset).
worksheet.insert_chart('F1', chart1)
# Finally, close the Excel file
workbook.close()
The bars select the data I am trying to insert as a trend line. Any help would be appreciated.
It looks like what you are trying to do is to add a secondary line chart rather than a trendline. You can do this with the XlsxWriter chart.combine() method.
Like this:
import xlsxwriter
# create worbook, workseet and chart
workbook = xlsxwriter.Workbook("Example.xlsx")
worksheet = workbook.add_worksheet()
chart1 = workbook.add_chart({'type': 'column', 'subtype': 'stacked'})
# Add the worksheet data
headings = ['Model 1', 'Model 2', 'Capacity']
data = [
[10, 40, 50, 20, 10, 50],
[30, 60, 70, 50, 40, 30],
[20, 30, 40, 40, 30, 30]
]
worksheet.write_row('A1', headings)
worksheet.write_column('A2', data[0])
worksheet.write_column('B2', data[1])
worksheet.write_column('C2', data[2])
# Configure the first series.
chart1.add_series({
'name': '=Sheet1!$A$1',
'values': '=Sheet1!$A$2:$A$7',
})
# Configure the first series.
chart1.add_series({
'name': '=Sheet1!$B$1',
'values': '=Sheet1!$B$2:$B$7',
})
# Add a chart title
chart1.set_title ({'name': 'xlsxwriter chart'})
# Create a second line chart.
chart2 = workbook.add_chart({'type': 'line'})
chart2.add_series({
'name': '=Sheet1!$C$1',
'values': '=Sheet1!$C$2:$C$7',
})
# Combine the charts.
chart1.combine(chart2)
# Insert the chart into the worksheet (with an offset).
worksheet.insert_chart('F1', chart1)
# Finally, close the Excel file
workbook.close()
Output:
Related
I want to create a stacked barplot using Seaborn with this MiltiIndex DataFrame
header = pd.MultiIndex.from_product([['#'],
['TE', 'SS', 'M', 'MR']])
dat = ([[100, 20, 21, 35], [100, 12, 5, 15]])
df = pd.DataFrame(dat, index=['JC', 'TTo'], columns=header)
df = df.stack()
df = df.sort_values('#', ascending=False).sort_index(level=0, sort_remaining=False)
The code I'm using for the plot is:
fontP = FontProperties()
fontP.set_size('medium')
colors = {'TE': 'green', 'SS': 'blue', 'M': 'yellow', 'MR': 'red'}
kwargs = {'alpha':0.5}
plt.figure(figsize=(12, 9))
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[0]], '#'],
color=colors[df2.index[0][1]], **kwargs)
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[1]], '#'],
color=colors[df2.index[1][1]], **kwargs)
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[2]], '#'],
color=colors[df2.index[2][1]], **kwargs)
bottom_plot = sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[3]], '#'],
color=colors[df2.index[3][1]], **kwargs)
bar1 = plt.Rectangle((0, 0), 1, 1, fc='green', edgecolor="None")
bar2 = plt.Rectangle((0, 0), 0, 0, fc='yellow', edgecolor="None")
bar3 = plt.Rectangle((0, 0), 2, 2, fc='red', edgecolor="None")
bar4 = plt.Rectangle((0, 0), 3, 3, fc='blue', edgecolor="None")
l = plt.legend([bar1, bar2, bar3, bar4], [
"TE", "M",
'MR', 'SS'
],
bbox_to_anchor=(0.95, 1),
loc='upper left',
prop=fontP)
l.draw_frame(False)
sns.despine()
bottom_plot.set_ylabel("#")
axes = plt.gca()
axes.yaxis.grid()
And I get:
My problem is the order of the colors in the second bar ('TTo'), I want the colors to be automatically selected based on the level 1 index value (['TE', 'SS', 'M', 'MR']) so that they are ordered correctly. Further down the one with the highest value with its corresponding color, in front the next one with the next highest value and its color and so on, as the first bar shows ('JC).
Maybe there is a simpler way to do this in Seaborn than the one I'm using...
I'm not sure how to create such a plot with seaborn. Here is a way to create it with a loop through the rows and adding one matplotlib bar at each step:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
sns.set()
header = pd.MultiIndex.from_product([['#'],
['TE', 'SS', 'M', 'MR']])
dat = ([[100, 20, 21, 35], [100, 12, 5, 15]])
df = pd.DataFrame(dat, index=['JC', 'TTo'], columns=header)
df = df.stack()
df = df.sort_values('#', ascending=False).sort_index(level=0, sort_remaining=False)
colors = {'TE': 'green', 'SS': 'blue', 'M': 'yellow', 'MR': 'red'}
prev_index0 = None
for (index0, index1), quantity in df.itertuples():
if index0 != prev_index0:
bottom = 0
plt.bar(index0, quantity, fc=colors[index1], ec='none', bottom=bottom, label=index1)
bottom += quantity
prev_index0 = index0
legend_handles = [plt.Rectangle((0, 0), 0, 0, color=colors[c], label=c) for c in colors]
plt.legend(handles=legend_handles)
plt.show()
To plot the bars back to front without stacking, the code can be simplified:
colors = {'TE': 'forestgreen', 'SS': 'cornflowerblue', 'M': 'gold', 'MR': 'crimson'}
for (index0, index1), quantity in df.itertuples():
plt.bar(index0, quantity, fc=colors[index1], ec='none', label=index1)
legend_handles = [plt.Rectangle((0, 0), 0, 0, color=colors[c], label=c, ec='black') for c in colors]
plt.legend(handles=legend_handles, bbox_to_anchor=(1.02, 1.02), loc='upper left')
plt.tight_layout()
I use a histogram to display the distribution. Everything works fine if the spacing of the bins is uniform. But if the interval is different, then the bar width is appropriate (as expected). Is there a way to set the width of the bar independent of the size of the bins ?
This is what i have
This what i trying to draw
from matplotlib import pyplot as plt
my_bins = [10, 20, 30, 40, 50, 120]
my_data = [5, 5, 6, 8, 9, 15, 25, 27, 33, 45, 46, 48, 49, 111, 113]
fig1 = plt.figure()
ax1 = fig1.add_subplot(121)
ax1.set_xticks(my_bins)
ax1.hist(my_data, my_bins, histtype='bar', rwidth=0.9,)
fig1.show()
I cannot mark your question as a duplicate, but I think my answer to this question might be what you are looking for?
I'm not sure how you'll make sense of the result, but you can use numpy.histogram to calculate the height of your bars, then plot those directly against an arbitrary x-scale.
x = np.random.normal(loc=50, scale=200, size=(2000,))
bins = [0,1,10,20,30,40,50,75,100]
fig = plt.figure()
ax = fig.add_subplot(211)
ax.hist(x, bins=bins, edgecolor='k')
ax = fig.add_subplot(212)
h,e = np.histogram(x, bins=bins)
ax.bar(range(len(bins)-1),h, width=1, edgecolor='k')
EDIT Here's with the adjustment to the x-tick labels so that the correspondence is easier to see.
my_bins = [10, 20, 30, 40, 50, 120]
my_data = [5, 5, 6, 8, 9, 15, 25, 27, 33, 45, 46, 48, 49, 111, 113]
fig = plt.figure()
ax = fig.add_subplot(211)
ax.hist(my_data, bins=my_bins, edgecolor='k')
ax = fig.add_subplot(212)
h,e = np.histogram(my_data, bins=my_bins)
ax.bar(range(len(my_bins)-1),h, width=1, edgecolor='k')
ax.set_xticks(range(len(my_bins)-1))
ax.set_xticklabels(my_bins[:-1])
I'm trying to generate a stacked horizontal bar chart in matplotlib. The issue I am facing is that the width of the bars does not fully fill the available width of the plotting area (additional space on the right).
Unfortunately I couldn't find any information on this online.
What could I do to resolve this?
Chart with additional space on the right of the bars
measures = ("A", "B", "C", "D", "A", "B", "C", "D", "A", "B")
measure_bars = y_pos = np.arange(len(measures))
yes_data = [10, 10, 10, 10, 15, 10, 10, 10, 10, 10]
number_of_answers = [20, 30, 20, 20, 20, 20, 20, 20, 20, 20]
font = {'fontname': 'Arial', 'color': '#10384f'}
yes_data = [i / j * 100 for i, j in zip(yes_data, number_of_answers)]
no_data = [100 - i for i in yes_data]
bar_width = 0.6
plt.rcParams['xtick.top'] = plt.rcParams['xtick.labeltop'] = True
plt.rcParams['xtick.bottom'] = plt.rcParams['xtick.labelbottom'] = False
fig = plt.figure()
plt.barh(measure_bars, yes_data, color='#89d329', height=bar_width, zorder=2)
plt.barh(measure_bars, no_data, left=yes_data, color='#ff3162', height=bar_width, zorder=3)
plt.grid(color=font["color"], zorder=0)
plt.yticks(measure_bars, measures, **font)
plt.title("TECHNICAL AND ORGANIZATIONAL MEASURES", fontweight="bold", size="16", x=0.5, y=1.1, **font)
ax = plt.axes()
ax.xaxis.set_major_formatter(PercentFormatter())
ax.spines['bottom'].set_color(font["color"])
ax.spines['top'].set_color(font["color"])
ax.spines['right'].set_color(font["color"])
ax.spines['left'].set_color(font["color"])
ax.xaxis.label.set_color(font["color"])
ax.tick_params(axis='x', colors=font["color"])
for tick in ax.get_xticklabels():
tick.set_fontname(font["fontname"])
ax.xaxis.set_ticks(np.arange(0.0, 100.1, 10))
plt.gca().legend(('Yes', 'No'), bbox_to_anchor=(0.7, 0), ncol=2, shadow=False)
plt.show()
Please add (somewhere in the middle)
ax.set_xlim(0, 1)
I'm using Python 3.6.3 and openpyxl 2.5.4
I wrote some code and noticed that setting my chart title with chart.title = "Test Heading" does nothing. As a sanity check I copied and running the example from here:
from openpyxl import Workbook
from openpyxl.chart import (
ScatterChart,
Reference,
Series,
)
wb = Workbook()
ws = wb.active
rows = [
['Size', 'Batch 1', 'Batch 2'],
[2, 40, 30],
[3, 40, 25],
[4, 50, 30],
[5, 30, 25],
[6, 25, 35],
[7, 20, 40],
]
for row in rows:
ws.append(row)
chart = ScatterChart()
chart.title = "Scatter Chart"
chart.style = 13
chart.x_axis.title = 'Size'
chart.y_axis.title = 'Percentage'
xvalues = Reference(ws, min_col=1, min_row=2, max_row=7)
for i in range(2, 4):
values = Reference(ws, min_col=i, min_row=1, max_row=7)
series = Series(values, xvalues, title_from_data=True)
chart.series.append(series)
ws.add_chart(chart, "A10")
wb.save("scatter.xlsx")
Sadly the title in my sample output is still missing:
Oddly, changing title_from_data=True to title_from_data=False also seems to have no effect on the contents of the chart.
This looks very much like a bug in the application you're using to view the file, which I suspect is LibreOffice.
I have a frequency analysis of words said in episodes of my favorite show. I'm making a plot.barh(s1e1_y, s1e1_x) but it's sorting by words instead of values.
The output of >>> s1e1_y
is
['know', 'go', 'now', 'here', 'gonna', 'can', 'them', 'think', 'come', 'time', 'got', 'elliot', 'talk', 'out', 'night', 'been', 'then', 'need', 'world', "what's"]
and >>>s1e1_x
[42, 30, 26, 25, 24, 22, 20, 19, 19, 18, 18, 18, 17, 17, 15, 15, 14, 14, 13, 13]
When the plots are actually plotted, the graph's y axis ticks are sorted alphabetically even though the plotting list is unsorted...
s1e1_wordlist = []
s1e1_count = []
for word, count in s1e01:
if((word[:-1] in excluded_words) == False):
s1e1_wordlist.append(word[:-1])
s1e1_count.append(int(count))
s1e1_sorted = sorted(list(sorted(zip(s1e1_count, s1e1_wordlist))),
reverse=True)
s1e1_20 = []
for i in range(0,20):
s1e1_20.append(s1e1_sorted[i])
s1e1_x = []
s1e1_y = []
for count, word in s1e1_20:
s1e1_x.append(word)
s1e1_y.append(count)
plot.figure(1, figsize=(20,20))
plot.subplot(341)
plot.title('Season1 : Episode 1')
plot.tick_params(axis='y',labelsize=8)
plot.barh(s1e1_x, s1e1_y)
From matplotlib 2.1 on you can plot categorical variables. This allows to plot plt.bar(["apple","cherry","banana"], [1,2,3]). However in matplotlib 2.1 the output will be sorted by category, hence alphabetically. This was considered as bug and is changed in matplotlib 2.2 (see this PR).
In matplotlib 2.2 the bar plot would hence preserve the order.
In matplotlib 2.1, you would plot the data as numeric data as in any version prior to 2.1. This means to plot the numbers against their index and to set the labels accordingly.
w = ['know', 'go', 'now', 'here', 'gonna', 'can', 'them', 'think', 'come',
'time', 'got', 'elliot', 'talk', 'out', 'night', 'been', 'then', 'need',
'world', "what's"]
n = [42, 30, 26, 25, 24, 22, 20, 19, 19, 18, 18, 18, 17, 17, 15, 15, 14, 14, 13, 13]
import matplotlib.pyplot as plt
import numpy as np
plt.barh(range(len(w)),n)
plt.yticks(range(len(w)),w)
plt.show()
Ok you seem to have a lot of spurious code in your example which isn't relevant to the problem as you've described it but assuming you don't want the y axis to sort alphabetically then you need to zip your two lists into a dataframe then plot the dataframe as follows
df = pd.DataFrame(list(zip(s1e1_y,s1e1_x))).set_index(1)
df.plot.barh()
This then produces the following