plotnine two legend labels - ggplot2

I have the following code:
import pandas as pd
import numpy as np
import plotnine as p9
d = {'Scenario': ['SO', 'SO', 'SO', 'SO', 'SO', 'SO', 'SO', 'SO', 'SO', 'SO', 'SO','SO', 'SO', 'SO', 'SO', 'SO', 'SO', 'SO', 'SO', 'SO', 'SO', 'SO','SO', 'SO', 'SO', 'SO'],
'database': ['ei1','ei1','ei1','ei1','ei1','ei1','ei1','ei2','ei2','ei2','ei2','ei2','ei2','ei2','ei1','ei1','ei1','ei1','ei1','ei1','ei2','ei2','ei2','ei2','ei2','ei2'],
'LCIA': ['Back','Back','Back','Back','Back','Back','Back','Back','Back','Back','Back','Back','Back','Back','Fore','Fore','Fore','Fore','Fore','Fore','Fore','Fore','Fore','Fore','Fore','Fore'],
'product':['X','X','Y','Y','Z','Z','Z','X','X','Y','Y','Z','Z','Z','W','W','S','S','T','R','W','W','S','S','T','R'],
'impact':['Climate','Marine','Climate','Marine','Climate','Marine','Freshwater','Climate','Marine','Climate','Marine','Climate','Marine','Freshwater','Climate','Marine','Climate','Marine','Marine','Marine','Climate','Marine','Climate','Marine','Marine','Marine'],
'value': [1,2,5,9,3,2,1,3,6,7,8,0,2,4,8,3,3,5,7,2,8,3,3,5,7,2]}
total_impacts = pd.DataFrame(d)
fig, plot = (p9.ggplot(total_impacts, p9.aes('database','value', fill='product')) +\
p9.geom_col() +\
p9.coord_flip() +\
p9.facet_wrap(['Scenario','impact'],scales='free_x', ncol=2) +\
p9.labels.ylab('') +\
p9.theme_light(15) +\
p9.scale_fill_brewer(palette='Blues') +\
p9.theme(figure_size=(15, 15),subplots_adjust={'hspace': 0.2}, legend_position="bottom")).draw(show=False, return_ggplot=True)
which yields this figure
I would like to have the Back and Fore legend titles (as shown in the desire outcome) and if possible with different color scales. Is this possible in plotnine ggplot? Thanks!

Related

Plot rectangle using defined x-axis in Matplotlib

I'd like to plot rectangles using defined xticks and ylim using Matplotlib, as shown in the example below:
import matplotlib.pyplot as plt
x = ['00:00', '01:00', '02:00', '03:00', '04:00' , '05:00', '06:00', '07:00', '08:00' ,'09:00' ,'10:00', '11:00', '12:00',
'13:00', '14:00', '15:00', '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00', '23:00']
plt.ylim([1,10])
With those limits, use the x index to print a rectangle following this:
rect = Rectangle((x[4], x[7]), 4, 8, color='yellow')
Finally, the idea is have multiples rectangles. There's a way to do that without use date/time functions?
The parameters for plt.Rectangle are ((x, y), width, height). You can draw a rectangle for example as follows:
import matplotlib.pyplot as plt
from matplotlib.colors import to_rgba
x = ['00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', '08:00', '09:00', '10:00', '11:00',
'12:00', '13:00', '14:00', '15:00', '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00', '23:00']
plt.figure(figsize=(15, 5))
plt.xticks(range(len(x)), x)
plt.ylim([1, 10])
x_start, x_end = 4, 7
y_start, y_end = 4, 8
ax = plt.gca()
ax.add_patch(plt.Rectangle((x_start, y_start), x_end - x_start, y_end - y_start,
facecolor=to_rgba('crimson', 0.5), edgecolor='black', lw=2))
plt.show()

Isin across 2 columns for groupby

How to use isin with or (?), when I know that my data to match in df1 will be distributed across 2 columns (Title, ID).
Below code works if you delete ' or df1[df1.ID.isin(df2[column])] '
import pandas as pd
df1 = pd.DataFrame({'Title': ['A1', 'A2', 'A3', 'C1', 'C2', 'C3'],
'ID': ['B1', 'B2', 'B3', 'D1', 'D2', 'D3'],
'Whole': ['full', 'full', 'full', 'semi', 'semi', 'semi']})
df2 = pd.DataFrame({'Group1': ['A1', 'A2', 'A3'],
'Group2': ['B1', 'B2', 'B3']})
df = pd.DataFrame()
for column in df2.columns:
d_group = (df1[df1.Title.isin(df2[column])] or df1[df1.ID.isin(df2[column])])
df3 = d_group.groupby('Whole')['Whole'].count()\
.rename(column, inplace=True)\
.reindex(['part', 'full', 'semi'], fill_value='-')
df = df.append(df3, ignore_index=False, sort=False)
print(df)
Desired output:
| full | part | semi
--------+---------+----------+----------
Group1 | 3 | - | -
Group2 | 3 | - | -
you need to use | instead of or and make sure you use the [] correctly to sub-select from the df you want. In general the notation is df[selection_filter]
import pandas as pd
df1 = pd.DataFrame({'Title': ['A1', 'A2', 'A3', 'C1', 'C2', 'C3'],
'ID': ['B1', 'B2', 'B3', 'D1', 'D2', 'D3'],
'Whole': ['full', 'full', 'full', 'semi', 'semi', 'semi']})
df2 = pd.DataFrame({'Group1': ['A1', 'A2', 'A3'],
'Group2': ['B1', 'B2', 'B3']})
df = pd.DataFrame()
for column in df2.columns:
d_group = df1[df1.Title.isin(df2[column]) | df1.ID.isin(df2[column])]
df3 = d_group.groupby('Whole')['Whole'].count()\
.rename(column, inplace=True)\
.reindex(['part', 'full', 'semi'], fill_value='-')
df = df.append(df3, ignore_index=False, sort=False)
print(df)

Matplotlib scatter plot color-coded by text, how to add legend?

I'm trying to color-code a scatter plot based on the string in a column. I can't figure out how to set up the legend.
Repeatable Example
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
## Dummy Data
x = [0, 0.03, 0.075, 0.108, 0.16, 0.26, 0.37, 0.49, 0.76, 1.05, 1.64,
0.015, 0.04, 0.085, 0.11, 0.165, 0.29, 0.37, 0.6, 0.78, 1.1]
y = [16.13, 0.62, 2.15, 41.083, 59.97, 13.30, 7.36, 6.80, 4.97, 3.53, 11.77,
30.21, 64.47, 57.64, 56.83, 46.69, 4.22, 30.35, 35.12, 5.22, 25.32]
label = ['a', 'a', 'c', 'a', 'c', 'b', 'c', 'c', 'c', 'b', 'c',
'c', 'c', 'a', 'b', 'a', 'a', 'a', 'b', 'c', 'c', 'c']
df = pd.DataFrame(
list(zip(x, y, label)),
columns =['x', 'y', 'label']
)
## Set up colors dictionary
mydict = {'a': 'darkviolet',
'b': 'darkgoldenrod',
'c': 'olive'}
## Plotting
plt.scatter(df.x, df.y, c=df['label'].map(mydict))
plt.legend(loc="upper right", frameon=True)
Current Output
Desired Output
Same plot as above, I just want to define the legend handle.
Thanks for any help
You can use matplotlib.patches.mpatches
Just add these lines of code to your script
import matplotlib.patches as mpatches
fake_handles = [mpatches.Patch(color=item) for item in mydict.values()]
label = mydict.keys()
plt.legend(fake_handles, label, loc='upper right', prop={'size': 10})
and you will get
You will make a list of legend handles as shown below. legendhandle will take the first element of the list of lines.
import matplotlib.pyplot as plt
import pandas as pd
## Dummy Data
x = [0, 0.03, 0.075, 0.108, 0.16, 0.26, 0.37, 0.49, 0.76, 1.05, 1.64,
0.015, 0.04, 0.085, 0.11, 0.165, 0.29, 0.37, 0.6, 0.78, 1.1]
y = [16.13, 0.62, 2.15, 41.083, 59.97, 13.30, 7.36, 6.80, 4.97, 3.53, 11.77,
30.21, 64.47, 57.64, 56.83, 46.69, 4.22, 30.35, 35.12, 5.22, 25.32]
label = ['a', 'a', 'c', 'a', 'c', 'b', 'c', 'c', 'c', 'b', 'c',
'c', 'c', 'a', 'b', 'a', 'a', 'a', 'b', 'c', 'c', 'c']
df = pd.DataFrame(
list(zip(x, y, label)),
columns =['x', 'y', 'label']
)
## Set up colors dictionary
mydict = {'a': 'darkviolet',
'b': 'darkgoldenrod',
'c': 'olive'}
legendhandle = [plt.plot([], marker="o", ls="", color=color)[0] for color in list(mydict.values())]
plt.scatter(df.x, df.y, c=df['label'].map(mydict))
plt.legend(legendhandle,list(mydict.keys()),loc="upper right", frameon=True)
plt.show()
Are you open to seaborn:
import seaborn as sns
sns.scatterplot(data=df, x='x',y='y',hue='label', palette=mydict)
Output:
With pandas/matplotlib only, you can do a loop:
fig, ax = plt.subplots()
for l,d in df.groupby('label'):
d.plot.scatter(x='x',y='y', label=l, c=mydict[l], ax=ax)
plt.legend()
Output:

How can rows of a pandas DataFrame all be plotted together as lines?

Let's say we have the following DataFrame:
import pandas as pd
df = pd.DataFrame(
[
['Norway' , 'beta', 30.0 , 31.0, 32.0, 32.4, 32.5, 32.1],
['Denmark' , 'beta', 75.7 , 49.1, 51.0, 52.3, 50.0, 47.9],
['Switzerland', 'beta', 46.9 , 44.0, 43.5, 42.3, 41.8, 43.4],
['Finland' , 'beta', 29.00, 29.8, 27.0, 26.0, 25.3, 24.8],
['Netherlands', 'beta', 30.2 , 30.1, 28.5, 28.2, 28.0, 28.0],
],
columns = [
'country',
'run_type',
'score A',
'score B',
'score C',
'score D',
'score E',
'score F'
]
)
df
How could the score values be plotted as lines, where each line corresponds to a country?
Since you tagged matplotlib, here is a solution using plt.plot(). The idea is to plot the lines row wise using iloc
import matplotlib.pyplot as plt
# define DataFrame here
df1 = df.filter(like='score')
for i in range(len(df1)):
plt.plot(df1.iloc[i], label=df['country'][i])
plt.legend()
plt.show()
Try to plot the transpose of the dataframe:
# the score columns, modify if needed
score_cols = df.columns[df.columns.str.contains('score')]
df.set_index('country')[score_cols].T.plot()
Output:

Looking to add multiple xtick labels for two parallel bar charts

I have a dataset that looks like this:
I want to do the following:
Make sure the bars do not overlap.
Treat each bar like a separate dataset, i.e. the labels on the x axis should be separate, one for the yellow series, one for the red series. These labels should be the words (I want to have two series of xtick labels in this chart) One for words_2, and one for words_1..
Current code:
import matplotlib.pyplot as plt
import numpy as np
import copy
import random
from random import randint
random.seed(11)
word_freq_1 = [('test', 510), ('Hey', 362), ("please", 753), ('take', 446), ('herbert', 325), ('live', 222), ('hate', 210), ('white', 191), ('simple', 175), ('harry', 172), ('woman', 170), ('basil', 153), ('things', 129), ('think', 126), ('bye', 124), ('thing', 120), ('love', 107), ('quite', 107), ('face', 107), ('eyes', 107), ('time', 106), ('himself', 105), ('want', 105), ('good', 105), ('really', 103), ('away',100), ('did', 100), ('people', 99), ('came', 97), ('say', 97), ('cried', 95), ('looked', 94), ('tell', 92), ('look', 91), ('world', 89), ('work', 89), ('project', 88), ('room', 88), ('going', 87), ('answered', 87), ('mr', 87), ('little', 87), ('yes', 84), ('silly', 82), ('thought', 82), ('shall', 81), ('circle', 80), ('hallward', 80), ('told', 77), ('feel', 76), ('great', 74), ('art', 74), ('dear',73), ('picture', 73), ('men', 72), ('long', 71), ('young', 70), ('lady', 69), ('let', 66), ('minute', 66), ('women', 66), ('soul', 65), ('door', 64), ('hand',63), ('went', 63), ('make', 63), ('night', 62), ('asked', 61), ('old', 61), ('passed', 60), ('afraid', 60), ('night', 59), ('looking', 58), ('wonderful', 58), ('gutenberg-tm', 56), ('beauty', 55), ('sir', 55), ('table', 55), ('turned', 54), ('lips', 54), ("one's", 54), ('better', 54), ('got', 54), ('vane', 54), ('right',53), ('left', 53), ('course', 52), ('hands', 52), ('portrait', 52), ('head', 51), ("can't", 49), ('true', 49), ('house', 49), ('believe', 49), ('black', 49), ('horrible', 48), ('oh', 48), ('knew', 47), ('curious', 47), ('myself', 47)]
word_freq_2 = [((tuple[0], randint(1,500))) for i,tuple in enumerate(word_freq_1)]
N = 25
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
words_1 = [x[0] for x in word_freq_1][:25]
values_1 = [int(x[1]) for x in word_freq_1][:25]
words_2 = [x[0] for x in word_freq_2][:25]
values_2 = [int(x[1]) for x in word_freq_2][:25]
print words_2
rects1 = ax.bar(ind, values_1, color='r')
rects2 = ax.bar(ind + width, values_2, width, color='y')
# add some text for labels, title and axes ticks
ax.set_ylabel('Words')
ax.set_title('Word Frequencies by Test and Training Set')
ax.set_xticks(ind + width)
ax.set_xticklabels(words_2,rotation=90)
ax.tick_params(axis='both', which='major', labelsize=6)
ax.tick_params(axis='both', which='minor', labelsize=6)
fig.tight_layout()
ax.legend((rects1[0], rects2[0]), ('Test', 'Train'))
plt.savefig('test.png')
I found a solution to this. The key is to set two types of xticks as minor and major. In addition, the overlapping bars was due to me not specifying the bar width for rects1.
rects1 = ax.bar(ind, values_1, width,color='r')
rects2 = ax.bar(ind + width, values_2, width, color='y')
ax.set_ylabel('Words')
ax.set_title('Word Frequencies by Test and Training Set')
ax.set_xticks(ind,minor=False)
ax.set_xticks(ind + width,minor=True)
ax.set_xticklabels(words_1,rotation=90,minor=False,ha='left')
ax.set_xticklabels(words_2,rotation=90,minor=True,ha='left')
ax.tick_params(axis='both', which='major', labelsize=6)
ax.tick_params(axis='both', which='minor', labelsize=6)
Which results in: