Jupyter notebook display two pandas tables side by side - pandas

I have two pandas dataframes and I would like to display them in Jupyter notebook.
Doing something like:
display(df1)
display(df2)
Shows them one below another:
I would like to have a second dataframe on the right of the first one. There is a similar question, but it looks like there a person is satisfied either with merging them in one dataframe of showing the difference between them.
This will not work for me. In my case dataframes can represent completely different (non-comparable elements) and the size of them can be different. Thus my main goal is to save space.

I have ended up writing a function that can do this:
[update: added titles based on suggestions (thnx #Antony_Hatchkins et al.)]
from IPython.display import display_html
from itertools import chain,cycle
def display_side_by_side(*args,titles=cycle([''])):
html_str=''
for df,title in zip(args, chain(titles,cycle(['</br>'])) ):
html_str+='<th style="text-align:center"><td style="vertical-align:top">'
html_str+=f'<h2 style="text-align: center;">{title}</h2>'
html_str+=df.to_html().replace('table','table style="display:inline"')
html_str+='</td></th>'
display_html(html_str,raw=True)
Example usage:
df1 = pd.DataFrame(np.arange(12).reshape((3,4)),columns=['A','B','C','D',])
df2 = pd.DataFrame(np.arange(16).reshape((4,4)),columns=['A','B','C','D',])
display_side_by_side(df1,df2,df1, titles=['Foo','Foo Bar']) #we left 3rd empty...

You could override the CSS of the output code. It uses flex-direction: column by default. Try changing it to row instead. Here's an example:
import pandas as pd
import numpy as np
from IPython.display import display, HTML
CSS = """
.output {
flex-direction: row;
}
"""
HTML('<style>{}</style>'.format(CSS))
You could, of course, customize the CSS further as you wish.
If you wish to target only one cell's output, try using the :nth-child() selector. For example, this code will modify the CSS of the output of only the 5th cell in the notebook:
CSS = """
div.cell:nth-child(5) .output {
flex-direction: row;
}
"""

Starting from pandas 0.17.1 the visualization of DataFrames can be directly modified with pandas styling methods
To display two DataFrames side by side you must use set_table_attributes with the argument "style='display:inline'" as suggested in ntg answer. This will return two Styler objects. To display the aligned dataframes just pass their joined HTML representation through the display_html method from IPython.
With this method is also easier to add other styling options. Here's how to add a caption, as requested here:
import numpy as np
import pandas as pd
from IPython.display import display_html
df1 = pd.DataFrame(np.arange(12).reshape((3,4)),columns=['A','B','C','D',])
df2 = pd.DataFrame(np.arange(16).reshape((4,4)),columns=['A','B','C','D',])
df1_styler = df1.style.set_table_attributes("style='display:inline'").set_caption('Caption table 1')
df2_styler = df2.style.set_table_attributes("style='display:inline'").set_caption('Caption table 2')
display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)

Combining approaches of gibbone (to set styles and captions) and stevi (adding space) I made my version of function, which outputs pandas dataframes as tables side-by-side:
from IPython.core.display import display, HTML
def display_side_by_side(dfs:list, captions:list):
"""Display tables side by side to save vertical space
Input:
dfs: list of pandas.DataFrame
captions: list of table captions
"""
output = ""
combined = dict(zip(captions, dfs))
for caption, df in combined.items():
output += df.style.set_table_attributes("style='display:inline'").set_caption(caption)._repr_html_()
output += "\xa0\xa0\xa0"
display(HTML(output))
Usage:
display_side_by_side([df1, df2, df3], ['caption1', 'caption2', 'caption3'])
Output:

My solution just builds a table in HTML without any CSS hacks and outputs it:
import pandas as pd
from IPython.display import display,HTML
def multi_column_df_display(list_dfs, cols=3):
html_table = "<table style='width:100%; border:0px'>{content}</table>"
html_row = "<tr style='border:0px'>{content}</tr>"
html_cell = "<td style='width:{width}%;vertical-align:top;border:0px'>{{content}}</td>"
html_cell = html_cell.format(width=100/cols)
cells = [ html_cell.format(content=df.to_html()) for df in list_dfs ]
cells += (cols - (len(list_dfs)%cols)) * [html_cell.format(content="")] # pad
rows = [ html_row.format(content="".join(cells[i:i+cols])) for i in range(0,len(cells),cols)]
display(HTML(html_table.format(content="".join(rows))))
list_dfs = []
list_dfs.append( pd.DataFrame(2*[{"x":"hello"}]) )
list_dfs.append( pd.DataFrame(2*[{"x":"world"}]) )
multi_column_df_display(2*list_dfs)

Here's another variation of the display_side_by_side() function introduced by #Anton Golubev that combines gibbone (to set styles and captions) and stevi (adding space), I added an extra argument to change spacing between tables at run-time.
from IPython.core.display import display, HTML
def display_side_by_side(dfs:list, captions:list, tablespacing=5):
"""Display tables side by side to save vertical space
Input:
dfs: list of pandas.DataFrame
captions: list of table captions
"""
output = ""
for (caption, df) in zip(captions, dfs):
output += df.style.set_table_attributes("style='display:inline'").set_caption(caption)._repr_html_()
output += tablespacing * "\xa0"
display(HTML(output))
display_side_by_side([df1, df2, df3], ['caption1', 'caption2', 'caption3'])
The tablespacing=5 default argument value (shown = 5 here) determines the vertical spacing between tables.

This adds (optional) headers, index and Series support to #nts's answer:
from IPython.display import display_html
def mydisplay(dfs, names=[], index=False):
def to_df(x):
if isinstance(x, pd.Series):
return pd.DataFrame(x)
else:
return x
html_str = ''
if names:
html_str += ('<tr>' +
''.join(f'<td style="text-align:center">{name}</td>' for name in names) +
'</tr>')
html_str += ('<tr>' +
''.join(f'<td style="vertical-align:top"> {to_df(df).to_html(index=index)}</td>'
for df in dfs) +
'</tr>')
html_str = f'<table>{html_str}</table>'
html_str = html_str.replace('table','table style="display:inline"')
display_html(html_str, raw=True)

Here is Jake Vanderplas' solution I came across just the other day:
import numpy as np
import pandas as pd
class display(object):
"""Display HTML representation of multiple objects"""
template = """<div style="float: left; padding: 10px;">
<p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
</div>"""
def __init__(self, *args):
self.args = args
def _repr_html_(self):
return '\n'.join(self.template.format(a, eval(a)._repr_html_())
for a in self.args)
def __repr__(self):
return '\n\n'.join(a + '\n' + repr(eval(a))
for a in self.args)
Credit: https://github.com/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/03.08-Aggregation-and-Grouping.ipynb

#zarak code is pretty small but affects the layout of the whole notebook. Other options are a bit messy for me.
I've added some clear CSS to this answer affecting only current cell output. Also you are able to add anything below or above dataframes.
from ipywidgets import widgets, Layout
from IPython import display
import pandas as pd
import numpy as np
# sample data
df1 = pd.DataFrame(np.random.randn(8, 3))
df2 = pd.DataFrame(np.random.randn(8, 3))
# create output widgets
widget1 = widgets.Output()
widget2 = widgets.Output()
# render in output widgets
with widget1:
display.display(df1.style.set_caption('First dataframe'))
df1.info()
with widget2:
display.display(df2.style.set_caption('Second dataframe'))
df1.info()
# add some CSS styles to distribute free space
box_layout = Layout(display='flex',
flex_flow='row',
justify_content='space-around',
width='auto'
)
# create Horisontal Box container
hbox = widgets.HBox([widget1, widget2], layout=box_layout)
# render hbox
hbox

I ended up using HBOX
import ipywidgets as ipyw
def get_html_table(target_df, title):
df_style = target_df.style.set_table_attributes("style='border:2px solid;font-size:10px;margin:10px'").set_caption(title)
return df_style._repr_html_()
df_2_html_table = get_html_table(df_2, 'Data from Google Sheet')
df_4_html_table = get_html_table(df_4, 'Data from Jira')
ipyw.HBox((ipyw.HTML(df_2_html_table),ipyw.HTML(df_4_html_table)))

Gibbone's answer worked for me! If you want extra space between the tables go to the code he proposed and add this "\xa0\xa0\xa0" to the following code line.
display_html(df1_styler._repr_html_()+"\xa0\xa0\xa0"+df2_styler._repr_html_(), raw=True)

I decided to add some extra functionality to Yasin's elegant answer, where one can choose both the number of cols and rows; any extra dfs are then added to the bottom.
Additionally one can choose in which order to fill the grid (simply change fill keyword to 'cols' or 'rows' as needed)
import pandas as pd
from IPython.display import display,HTML
def grid_df_display(list_dfs, rows = 2, cols=3, fill = 'cols'):
html_table = "<table style='width:100%; border:0px'>{content}</table>"
html_row = "<tr style='border:0px'>{content}</tr>"
html_cell = "<td style='width:{width}%;vertical-align:top;border:0px'>{{content}}</td>"
html_cell = html_cell.format(width=100/cols)
cells = [ html_cell.format(content=df.to_html()) for df in list_dfs[:rows*cols] ]
cells += cols * [html_cell.format(content="")] # pad
if fill == 'rows': #fill in rows first (first row: 0,1,2,... col-1)
grid = [ html_row.format(content="".join(cells[i:i+cols])) for i in range(0,rows*cols,cols)]
if fill == 'cols': #fill columns first (first column: 0,1,2,..., rows-1)
grid = [ html_row.format(content="".join(cells[i:rows*cols:rows])) for i in range(0,rows)]
display(HTML(html_table.format(content="".join(grid))))
#add extra dfs to bottom
[display(list_dfs[i]) for i in range(rows*cols,len(list_dfs))]
list_dfs = []
list_dfs.extend((pd.DataFrame(2*[{"x":"hello"}]),
pd.DataFrame(2*[{"x":"world"}]),
pd.DataFrame(2*[{"x":"gdbye"}])))
grid_df_display(3*list_dfs)
test output

Extension of antony's answer If you want to limit de visualization of tables to some numer of blocks by row, use the maxTables variable.
def mydisplay(dfs, names=[]):
count = 0
maxTables = 6
if not names:
names = [x for x in range(len(dfs))]
html_str = ''
html_th = ''
html_td = ''
for df, name in zip(dfs, names):
if count <= (maxTables):
html_th += (''.join(f'<th style="text-align:center">{name}</th>'))
html_td += (''.join(f'<td style="vertical-align:top"> {df.to_html(index=False)}</td>'))
count += 1
else:
html_str += f'<tr>{html_th}</tr><tr>{html_td}</tr>'
html_th = f'<th style="text-align:center">{name}</th>'
html_td = f'<td style="vertical-align:top"> {df.to_html(index=False)}</td>'
count = 0
if count != 0:
html_str += f'<tr>{html_th}</tr><tr>{html_td}</tr>'
html_str += f'<table>{html_str}</table>'
html_str = html_str.replace('table','table style="display:inline"')
display_html(html_str, raw=True)

Related

How to display all the informations of a scatterplot dot with pick event when using Seaborn and facet-grid

I have a Pandas database (below I am creating a random database to mimic mine).
I use Seaborn, facet-grid and scatterplot to plot the data the way I want : Epsilon1 as a function of no, I distinguish the data from the sub categories A and B using different subplots and colors. This part of the code works correctly.
Then I want that the user can click on any dot in order to display in the IPython console and in the status bar of the Matplotlib figure (as here) all the informations about this dot : that is to say all the values of the corresponding dataframe row: something like:
'no':5, 'Date':1997-12-15 03:50:41, 'A':A6, 'B':B4, 'Epsilon1':0.670635, 'Epsilon2':0.756461, 'Epsilon3':0.530825
I have made first tests using onpick event (not shown here) but all were unsuccessful.
Actually I can't get by with the function onpick(event) because I do not understand why print(event.ind) gives me a list of integers...
Here is my code
import pandas as pd
import numpy as np
import seaborn as sns
import random
# size of the database
n = 1000
nA = 6
nB = 5
no = np.arange(n)
date = np.random.randint(1e9, size=n).astype('datetime64[s]')
A = [''.join(['A',str(random.randint(1, nA))]) for j in range(n)]
B = [''.join(['B',str(random.randint(1, nB))]) for j in range(n)]
Epsilon1 = np.random.random_sample((n,))
Epsilon2 = np.random.random_sample((n,))
Epsilon3 = np.random.random_sample((n,))
data = pd.DataFrame({'no':no,
'Date':date,
'A':A,
'B':B,
'Epsilon1':Epsilon1,
'Epsilon2':Epsilon2,
'Epsilon3':Epsilon3})
def onpick(event):
print(event.ind)
def plot_Epsilon1_seaborn():
sns.set_theme()
g = sns.FacetGrid(data,
col="A",
col_wrap=4,
hue='B',
hue_order=data['B'].sort_values().drop_duplicates().to_list(),
palette="viridis",
col_order=data['A'].sort_values().drop_duplicates().to_list())
g.map(sns.scatterplot,
'no',
'Epsilon1',
picker=True)
g.add_legend()
g.fig.canvas.mpl_connect("pick_event", onpick)
if __name__ == '__main__':
plot_Epsilon1_seaborn()

How to make pandas DataFrame plot's appear at the right point in a Jupyter notebook?

I have a Jupyter notebook with %matplotlib widget as the first line.
The notebook contains several markdown cells providing a header structure and some explaining texts.
Also there I am generating some plots from pandas.DataFrames, which are grouped using dynamically generated sections.
Extracted (not executable in this way), it looks like:
%matplotlib widget
import pandas
from IPython.display import display_markdown
dictionary: dict[str, pandas.DataFrame] = {
"DataFrame 1": pandas.util.testing.makeDataFrame(),
"DataFrame 2": pandas.util.testing.makeDataFrame(),
}
group: str
dataframe: pandas.DataFrame
for group, dataframe in dictionary.items():
display_markdown("## %s" % (group), raw=True)
dataframe.plot()
However, when running the notebook, it first shows me all the created sub-sections and then, after the last one, all the plots.
How can I bring them in the intended order?
For the case that this is relevant: I am using the Jupyter extension of Visual Studio Code.
Minimal exeutable/ runnable example: https://colab.research.google.com/drive/1iTefKtR93MuzStgpNB3zIxx9S0pAhAO8#scrollTo=yRqBQywrCr7T
You are seeing the plots last because of the way matplotlib and Jupyter interact. Modern Jupyter puts the plots generated in a cell as a separate entity. To interweave them with markdown produced in the course of looping as the code runs procedurally you can suppress the output using %%capture in that cell, collect the plots, and arrange to show them how you want in another cell using display for both.
Demonstration:
You can the code the follows in sessions launched from here after running %pip install ipympl in a cell first:
Top cell
%%capture
import pandas
from IPython.display import display_markdown
dictionary = {
"DataFrame 1": pandas.util.testing.makeDataFrame(),
"DataFrame 2": pandas.util.testing.makeDataFrame(),
}
group: str
dataframe: pandas.DataFrame
title_n_plots =[]
for group, dataframe in dictionary.items():
#display_markdown("## %s" % (group), raw=True)
title_n_plots.append([group,dataframe.plot()])
That should display nothing.
Next cell
# Display how they should be associated
for x in title_n_plots:
display_markdown("## %s" % (x[0]), raw=True)
display(x[1].figure)
Option(s) for still using a single cell and code more like originally posted by adding text as a plot title instead of separate markdown
Of course, an option using the original code layout along the lines of your posted MRE and not suppressing anything could be achieved by adding real titles in the plots that would have stayed associated with the appropriate plot. Like so:
import pandas
from IPython.display import display_markdown
dictionary = {
"DataFrame 1": pandas.util.testing.makeDataFrame(),
"DataFrame 2": pandas.util.testing.makeDataFrame(),
}
group: str
dataframe: pandas.DataFrame
title_n_plots =[]
for group, dataframe in dictionary.items():
#ax = dataframe.plot(title = r"$\bf{" + group + "}$")
ax = dataframe.plot(title = r"$\bf{" + group[:-1] + "\ "+ group[-1:] + "}$")
#bold in title based on https://stackoverflow.com/a/44123579/8508004
#hack to fix space showing up before number in `group` based on https://stackoverflow.com/a/34703257/8508004
ax.title.set_size(40) # based on https://stackoverflow.com/a/67154403/8508004
Or, if you don't want the title centered, you can make it more like the 'display_markdown' example like so:
import pandas
from IPython.display import display_markdown
dictionary = {
"DataFrame 1": pandas.util.testing.makeDataFrame(),
"DataFrame 2": pandas.util.testing.makeDataFrame(),
}
group: str
dataframe: pandas.DataFrame
title_n_plots =[]
for group, dataframe in dictionary.items():
#ax = dataframe.plot(title = r"$\bf{" + group + "}$")
ax = dataframe.plot(title = r"$\bf{" + group[:-1] + "\ "+ group[-1:] + "}$")
#bold in title based on https://stackoverflow.com/a/44123579/8508004
#hack to fix space showing up before number in `group` based on https://stackoverflow.com/a/34703257/8508004
ax.title.set_size(27) # based on https://stackoverflow.com/a/67154403/8508004
ax.title.set_horizontalalignment("right") # based on https://stackoverflow.com/a/67154403/8508004 and
# https://stackoverflow.com/a/44411195/8508004 and that it shows on left-aligned when "right" supplied & vice versa

It is possible to insert image into pandas data frame?

I wanted to save a test image dataset into a pandas data frame. The Panda data frame contains the input image, input image class, and predicted output class.
Do you need something like this?
import pandas as pd
from IPython.core.display import display,HTML
# empty dataframe
df = pd.DataFrame()
# your images
df['images1'] = ['https://a.cdn-hotels.com/gdcs/production180/d124/9dc35ac0-af3d-4cce-a7cf-02132213f43a.jpg?impolicy=fcrop&w=800&h=533&q=medium',
'https://upload.wikimedia.org/wikipedia/commons/2/2b/NYC_Downtown_Manhattan_Skyline_seen_from_Paulus_Hook_2019-12-20_IMG_7347_FRD_%28cropped%29.jpg']
df['images2'] = ['https://post.medicalnewstoday.com/wp-content/uploads/sites/3/2020/02/322868_1100-800x825.jpg',
'https://i.guim.co.uk/img/media/684c9d087dab923db1ce4057903f03293b07deac/205_132_1915_1150/master/1915.jpg?width=1200&height=1200&quality=85&auto=format&fit=crop&s=14a95b5026c1567b823629ba35c40aa0']
display(df) # <-- At this point you have a dataframe with paths of images
# convert your links to html tags
def path_to_image_html(path):
return '<img src="'+ path + '" width="60" >'
pd.set_option('display.max_colwidth', None)
image_cols = ['images1', 'images2'] # If you have many columns define which columns will be used to convert to html
# Create the dictionariy to be passed as formatters
format_dict = {}
for image_col in image_cols:
format_dict[image_col] = path_to_image_html
display(HTML(df.to_html(escape=False ,formatters=format_dict)))

Plotly chart percentage with smileys

I would like o add a plot figure based on smileys like this one:
dat will come from a dataframe pandas : dataframe.value_counts(normalize=True)
Can some one give me some clues.
use colorscale in normal way for a heatmap
use anotation_text to assign an emoji to a value
import plotly.figure_factory as ff
import plotly.graph_objects as go
import pandas as pd
import numpy as np
df = pd.DataFrame([[j*10+i for i in range(10)] for j in range(10)])
e=["😃","🙂","😐","☚ī¸"]
fig = go.Figure(ff.create_annotated_heatmap(
z=df.values, colorscale="rdylgn", reversescale=False,
annotation_text=np.select([df.values>75, df.values>50, df.values>25, df.values>=0], e),
))
fig.update_annotations(font_size=25)
# allows emoji to use background color
fig.update_annotations(opacity=0.7)
update coloured emoji
fundamentally you need emojicons that can accept colour styling
for this I switched to Font Awesome. This then also requires switching to dash, plotly's cousin so that external CSS can be used (to use FA)
then build a dash HTML table applying styling logic for picking emoticon and colour
from jupyter_dash import JupyterDash
import dash_html_components as html
import pandas as pd
import branca.colormap
# Load Data
df = pd.DataFrame([[j*10+i for i in range(10)] for j in range(10)])
external_stylesheets = [{
'href': 'https://use.fontawesome.com/releases/v5.8.1/css/all.css',
'rel': 'stylesheet', 'crossorigin': 'anonymous',
'integrity': 'sha384-50oBUHEmvpQ+1lW4y57PTFmhCaXp0ML5d60M1M7uH2+nqUivzIebhndOJK28anvf',
}]
# possibly could use a a different library for this - simple way to map a value to a colormap
cm = branca.colormap.LinearColormap(["red","yellow","green"], vmin=0, vmax=100, caption=None)
def mysmiley(v):
sm = ["far fa-grin", "far fa-smile", "far fa-meh", "far fa-frown"]
return html.Span(className=sm[3-(v//25)], style={"color":cm(v),"font-size": "2em"})
# Build App
app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([
html.Table([html.Tr([html.Td(mysmiley(c)) for c in r]) for r in df.values])
])
# Run app and display result inline in the notebook
app.run_server(mode='inline')

Time series plot of categorical or binary variables in pandas or matplotlib

I have data that represent a time series of categorical variables. I want to display the transitions in categories below a traditional line plot of related continuous time series to show off context as time evolves. I'd like to know the best way to do this. My attempt was in terms of Rectangles. The appearance is a bit weird, and importantly the axis labels for the x axis don't render as dates.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from pandas.plotting import register_matplotlib_converters
import matplotlib.dates as mdates
register_matplotlib_converters()
t0 = pd.DatetimeIndex(["2017-06-01 00:00","2017-06-17 00:00","2017-07-03 00:00","2017-08-02 00:00","2017-08-09 00:00","2017-09-01 00:00"])
t1 = pd.DatetimeIndex(["2017-06-01 00:00","2017-08-15 00:00","2017-09-01 00:00"])
df0 = pd.DataFrame({"cat":[0,2,1,2,0,1]},index = t0)
df1 = pd.DataFrame({"op":[0,1,0]},index=t1)
# Create new plot
fig,ax = plt.subplots(1,figsize=(8,3))
data_layout = {
"cat" : {0: ('bisque','Low'),
1: ('lightseagreen','Medium'),
2: ('rebeccapurple','High')},
"op" : {0: ('darkturquoise','Open'),
1: ('tomato','Close')}
}
vars =("cat","op")
dfs = [df0,df1]
all_ticks = []
leg = []
for j,(v,d) in enumerate(zip(vars,dfs)):
dvals = d[v][:].astype("d")
normal = mpl.colors.Normalize(vmin=0, vmax=2.)
colors = plt.cm.Set1(0.75*normal(dvals.as_matrix()))
handles = []
for i in range(d.count()-1):
s = d[v].index.to_pydatetime()
level = d[v][i]
base = d[v].index[i]
w = s[i+1] - s[i]
patch=mpl.patches.Rectangle((base,float(j)),width=w,color=data_layout[v][level][0],height=1,fill=True)
ax.add_patch(patch)
for lev in data_layout[v]:
print data_layout[v][level]
handles.append(mpl.patches.Patch(color=data_layout[v][lev][0],label=data_layout[v][lev][1]))
all_ticks.append(j+0.5)
leg.append( plt.legend(handles=handles,loc = (3-3*j+1)))
plt.axhline(y=1.,linewidth=3,color="gray")
plt.xlim(pd.Timestamp(2017,6,1).to_pydatetime(),pd.Timestamp(2017,9,1).to_pydatetime())
plt.ylim(0,2)
ax.add_artist(leg[0]) # two legends on one axis
ax.format_xdata = mdates.DateFormatter('%Y-%m-%d') # This fails
plt.yticks(all_ticks,vars)
plt.show()
which produces this with no dates and has jittery lines:. How do I fix this? Is there a better way entirely?
This is a way to display dates on x-axis:
In your code substitute the line that fails with this one:
ax.xaxis.set_major_formatter((mdates.DateFormatter('%Y-%m-%d')))
But I don't remember how it should look like, can you show us the end-result again?