I have a Pandas database (below I am creating a random database to mimic mine).
I use Seaborn, facet-grid and scatterplot to plot the data the way I want : Epsilon1 as a function of no, I distinguish the data from the sub categories A and B using different subplots and colors. This part of the code works correctly.
Then I want that the user can click on any dot in order to display in the IPython console and in the status bar of the Matplotlib figure (as here) all the informations about this dot : that is to say all the values of the corresponding dataframe row: something like:
'no':5, 'Date':1997-12-15 03:50:41, 'A':A6, 'B':B4, 'Epsilon1':0.670635, 'Epsilon2':0.756461, 'Epsilon3':0.530825
I have made first tests using onpick event (not shown here) but all were unsuccessful.
Actually I can't get by with the function onpick(event) because I do not understand why print(event.ind) gives me a list of integers...
Here is my code
import pandas as pd
import numpy as np
import seaborn as sns
import random
# size of the database
n = 1000
nA = 6
nB = 5
no = np.arange(n)
date = np.random.randint(1e9, size=n).astype('datetime64[s]')
A = [''.join(['A',str(random.randint(1, nA))]) for j in range(n)]
B = [''.join(['B',str(random.randint(1, nB))]) for j in range(n)]
Epsilon1 = np.random.random_sample((n,))
Epsilon2 = np.random.random_sample((n,))
Epsilon3 = np.random.random_sample((n,))
data = pd.DataFrame({'no':no,
'Date':date,
'A':A,
'B':B,
'Epsilon1':Epsilon1,
'Epsilon2':Epsilon2,
'Epsilon3':Epsilon3})
def onpick(event):
print(event.ind)
def plot_Epsilon1_seaborn():
sns.set_theme()
g = sns.FacetGrid(data,
col="A",
col_wrap=4,
hue='B',
hue_order=data['B'].sort_values().drop_duplicates().to_list(),
palette="viridis",
col_order=data['A'].sort_values().drop_duplicates().to_list())
g.map(sns.scatterplot,
'no',
'Epsilon1',
picker=True)
g.add_legend()
g.fig.canvas.mpl_connect("pick_event", onpick)
if __name__ == '__main__':
plot_Epsilon1_seaborn()
I need two Windows that maintain size and focus when data changes.
Size and focus are intended to be adjusted manually .
I managed myself to do what I want, but a Warning is shown :
"MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance. In a future version, a new instance will always be created and returned. Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.
ax1 = window1.add_subplot(111)".
Can someone explain what is wrong with my code?
Thanks
import matplotlib.pyplot as plt
import numpy as np
def Creo_ventana(primero,data):
plt.ion()
window1=plt.figure('WINDOW1')
ax1 = window1.add_subplot(111)
window2=plt.figure('WINDOW2')
ax2 = window2.add_subplot(111)
y_min1, y_max1 = ax1.get_ylim()
x_min1, x_max1 = ax1.get_xlim()
size1=ax1.figure.get_size_inches()
y_min2, y_max2 = ax2.get_ylim()
x_min2, x_max2 = ax2.get_xlim()
size2=ax2.figure.get_size_inches()
if primero ==False:
ax1.cla()
ax1.set_xlim(x_min1, x_max1)
ax1.set_ylim(y_min1, y_max1)
ax1.figure.set_size_inches(size1)
ax1.autoscale(False)
ax1.autoscale_view (True,False,False)
ax1.set_adjustable('box')
ax1.plot(data)
window1.canvas.draw()
window1.canvas.flush_events()
if primero ==False:
ax2.cla()
ax2.set_xlim(x_min2, x_max2)
ax2.set_ylim(y_min2, y_max2)
ax2.figure.set_size_inches(size2)
ax2.autoscale(False)
ax2.autoscale_view (True,False,False)
ax2.set_adjustable('box')
ax2.plot(data)
window2.canvas.draw()
window2.canvas.flush_events()
if __name__ == "__main__":
primero= True
correr = 'y'
while correr =='y':
data = np.random.rand(100)*10
Creo_ventana(primero,data)
primero= False
correr=input ('otro? y/n :')
if not correr == 'y':
break
As a beginner in Python3, I’ve been following the Python Crash Course book. The following code prints the graph but does not ask for an input as intended. I have tried placing plt.show() after the if statement but the program ends up loading for a long time, eventually failing to display the graph. Is there a way to fix this?
Code:
import matplotlib.pyplot as plt
from random_walk import RandomWalk
while True:
rw = RandomWalk()
rw.fill_walk()
plt.style.use('classic')
fig, ax = plt.subplots()
point_numbers = range(rw.num_points)
ax.scatter(rw.x_values, rw.y_values,c=point_numbers, cmap=plt.cm.Blues, edgecolors='none', s=15)
plt.show()
The program does not print the following for input
keep_running = input("Make another walk? (y/n):")
if keep_running == 'n':
break
The following is saved on a separate file
from random import choice
class RandomWalk:
def __init__(self, num_points=5000):
"""Initialize attributes of a walk"""
self.num_points = num_points
self.x_values = [0]
self.y_values = [0]
def fill_walk(self):
""" calculating all the points in the walk"""
# Keep taking steps until the walk reaches the desired length.
while len(self.x_values) < self.num_points:
# Decide which direction to go and how far to go in that direction.
x_direction = choice([1,-1])
x_distance = choice([0,1,2,3,4])
x_step = x_direction*x_distance
y_direction = choice([1,-1])
y_distance = choice([0,1,2,3,4])
y_step = y_direction*y_distance
# Reject moves that go nowhere
if x_step == 0 and y_step == 0:
continue
# Calculate the new position
x = self.x_values[-1] + x_step
y = self.y_values[-1] + y_step
self.x_values.append(x)
self.y_values.append(y)
Im using a csv file as my data source. I want the graph to update based on the radio button selection i make, please find my source code below.
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input,Output
df = pd.read_csv('population2.csv')
fst_yvalues = df['PopEstimate2010']/1000000
scd_yvalues = df['PopEstimate2011']/1000000
trd_yvalues = df['PopEstimate2012']/1000000
app = dash.Dash()
app.layout = html.Div(children=[
html.H1('My first Interactive Graph'),
html.Div(dcc.RadioItems(id='radio_items',
options=[{'label':'PopEstimate2010','value':'pop2010'},
{'label':'PopEstimate2011','value': 'pop2011'},
{'label':'PopEstimate2011' ,'value':'pop2012'}],
value='pop2010')),
html.Br(),
html.Div(children=[
dcc.Graph(id='int_bar')])])
#app.callback(Output('int_bar','figure'),[Input('radio_items','value')])
def bar_chart(value):
trace = []`
if value == 'pop2010':
trarce = [go.Bar(x=df['Name'],y=fst_yvalues)]
elif value == 'pop2011':
trarce = [go.Bar(x=df['Name'],y=scd_yvalues)]
else:
trarce = [go.Bar(x=df['Name'],y=trd_yvalues)]
layout = go.Layout(title='MY FIRST GRAPH',
xaxis=dict(title='MY X-AXIS'),
yaxis=dict(title='MY Y-AXIS'),hovermode='closest')
figure = go.Figure(data=trace,layout=layout)enter code here
return figure
if __name__ == '__main__':
app.run_server(debug=True)
When i try to run this, it only gives me the layout but not the actual graph.
Below is the approach i have taken to get this to work :
#app.callback(Output('int_bar','figure'),[Input('radio_items','value')])
def make_bar_chart(value):
if value == 'pop2010':
figure = {'data': [go.Bar(x=df['Name'],y=fst_yvalues)],
'layout': go.Layout(title='MY FIRST GRAPH',
xaxis=dict(title='MY X-AXIS'),
yaxis=dict(title='MY Y-AXIS'),hovermode='closest')
}
...
...
return figure
if __name__ == '__main__':
app.run_server(debug=True)
I have two pandas dataframes and I would like to display them in Jupyter notebook.
Doing something like:
display(df1)
display(df2)
Shows them one below another:
I would like to have a second dataframe on the right of the first one. There is a similar question, but it looks like there a person is satisfied either with merging them in one dataframe of showing the difference between them.
This will not work for me. In my case dataframes can represent completely different (non-comparable elements) and the size of them can be different. Thus my main goal is to save space.
I have ended up writing a function that can do this:
[update: added titles based on suggestions (thnx #Antony_Hatchkins et al.)]
from IPython.display import display_html
from itertools import chain,cycle
def display_side_by_side(*args,titles=cycle([''])):
html_str=''
for df,title in zip(args, chain(titles,cycle(['</br>'])) ):
html_str+='<th style="text-align:center"><td style="vertical-align:top">'
html_str+=f'<h2 style="text-align: center;">{title}</h2>'
html_str+=df.to_html().replace('table','table style="display:inline"')
html_str+='</td></th>'
display_html(html_str,raw=True)
Example usage:
df1 = pd.DataFrame(np.arange(12).reshape((3,4)),columns=['A','B','C','D',])
df2 = pd.DataFrame(np.arange(16).reshape((4,4)),columns=['A','B','C','D',])
display_side_by_side(df1,df2,df1, titles=['Foo','Foo Bar']) #we left 3rd empty...
You could override the CSS of the output code. It uses flex-direction: column by default. Try changing it to row instead. Here's an example:
import pandas as pd
import numpy as np
from IPython.display import display, HTML
CSS = """
.output {
flex-direction: row;
}
"""
HTML('<style>{}</style>'.format(CSS))
You could, of course, customize the CSS further as you wish.
If you wish to target only one cell's output, try using the :nth-child() selector. For example, this code will modify the CSS of the output of only the 5th cell in the notebook:
CSS = """
div.cell:nth-child(5) .output {
flex-direction: row;
}
"""
Starting from pandas 0.17.1 the visualization of DataFrames can be directly modified with pandas styling methods
To display two DataFrames side by side you must use set_table_attributes with the argument "style='display:inline'" as suggested in ntg answer. This will return two Styler objects. To display the aligned dataframes just pass their joined HTML representation through the display_html method from IPython.
With this method is also easier to add other styling options. Here's how to add a caption, as requested here:
import numpy as np
import pandas as pd
from IPython.display import display_html
df1 = pd.DataFrame(np.arange(12).reshape((3,4)),columns=['A','B','C','D',])
df2 = pd.DataFrame(np.arange(16).reshape((4,4)),columns=['A','B','C','D',])
df1_styler = df1.style.set_table_attributes("style='display:inline'").set_caption('Caption table 1')
df2_styler = df2.style.set_table_attributes("style='display:inline'").set_caption('Caption table 2')
display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)
Combining approaches of gibbone (to set styles and captions) and stevi (adding space) I made my version of function, which outputs pandas dataframes as tables side-by-side:
from IPython.core.display import display, HTML
def display_side_by_side(dfs:list, captions:list):
"""Display tables side by side to save vertical space
Input:
dfs: list of pandas.DataFrame
captions: list of table captions
"""
output = ""
combined = dict(zip(captions, dfs))
for caption, df in combined.items():
output += df.style.set_table_attributes("style='display:inline'").set_caption(caption)._repr_html_()
output += "\xa0\xa0\xa0"
display(HTML(output))
Usage:
display_side_by_side([df1, df2, df3], ['caption1', 'caption2', 'caption3'])
Output:
My solution just builds a table in HTML without any CSS hacks and outputs it:
import pandas as pd
from IPython.display import display,HTML
def multi_column_df_display(list_dfs, cols=3):
html_table = "<table style='width:100%; border:0px'>{content}</table>"
html_row = "<tr style='border:0px'>{content}</tr>"
html_cell = "<td style='width:{width}%;vertical-align:top;border:0px'>{{content}}</td>"
html_cell = html_cell.format(width=100/cols)
cells = [ html_cell.format(content=df.to_html()) for df in list_dfs ]
cells += (cols - (len(list_dfs)%cols)) * [html_cell.format(content="")] # pad
rows = [ html_row.format(content="".join(cells[i:i+cols])) for i in range(0,len(cells),cols)]
display(HTML(html_table.format(content="".join(rows))))
list_dfs = []
list_dfs.append( pd.DataFrame(2*[{"x":"hello"}]) )
list_dfs.append( pd.DataFrame(2*[{"x":"world"}]) )
multi_column_df_display(2*list_dfs)
Here's another variation of the display_side_by_side() function introduced by #Anton Golubev that combines gibbone (to set styles and captions) and stevi (adding space), I added an extra argument to change spacing between tables at run-time.
from IPython.core.display import display, HTML
def display_side_by_side(dfs:list, captions:list, tablespacing=5):
"""Display tables side by side to save vertical space
Input:
dfs: list of pandas.DataFrame
captions: list of table captions
"""
output = ""
for (caption, df) in zip(captions, dfs):
output += df.style.set_table_attributes("style='display:inline'").set_caption(caption)._repr_html_()
output += tablespacing * "\xa0"
display(HTML(output))
display_side_by_side([df1, df2, df3], ['caption1', 'caption2', 'caption3'])
The tablespacing=5 default argument value (shown = 5 here) determines the vertical spacing between tables.
This adds (optional) headers, index and Series support to #nts's answer:
from IPython.display import display_html
def mydisplay(dfs, names=[], index=False):
def to_df(x):
if isinstance(x, pd.Series):
return pd.DataFrame(x)
else:
return x
html_str = ''
if names:
html_str += ('<tr>' +
''.join(f'<td style="text-align:center">{name}</td>' for name in names) +
'</tr>')
html_str += ('<tr>' +
''.join(f'<td style="vertical-align:top"> {to_df(df).to_html(index=index)}</td>'
for df in dfs) +
'</tr>')
html_str = f'<table>{html_str}</table>'
html_str = html_str.replace('table','table style="display:inline"')
display_html(html_str, raw=True)
Here is Jake Vanderplas' solution I came across just the other day:
import numpy as np
import pandas as pd
class display(object):
"""Display HTML representation of multiple objects"""
template = """<div style="float: left; padding: 10px;">
<p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
</div>"""
def __init__(self, *args):
self.args = args
def _repr_html_(self):
return '\n'.join(self.template.format(a, eval(a)._repr_html_())
for a in self.args)
def __repr__(self):
return '\n\n'.join(a + '\n' + repr(eval(a))
for a in self.args)
Credit: https://github.com/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/03.08-Aggregation-and-Grouping.ipynb
#zarak code is pretty small but affects the layout of the whole notebook. Other options are a bit messy for me.
I've added some clear CSS to this answer affecting only current cell output. Also you are able to add anything below or above dataframes.
from ipywidgets import widgets, Layout
from IPython import display
import pandas as pd
import numpy as np
# sample data
df1 = pd.DataFrame(np.random.randn(8, 3))
df2 = pd.DataFrame(np.random.randn(8, 3))
# create output widgets
widget1 = widgets.Output()
widget2 = widgets.Output()
# render in output widgets
with widget1:
display.display(df1.style.set_caption('First dataframe'))
df1.info()
with widget2:
display.display(df2.style.set_caption('Second dataframe'))
df1.info()
# add some CSS styles to distribute free space
box_layout = Layout(display='flex',
flex_flow='row',
justify_content='space-around',
width='auto'
)
# create Horisontal Box container
hbox = widgets.HBox([widget1, widget2], layout=box_layout)
# render hbox
hbox
I ended up using HBOX
import ipywidgets as ipyw
def get_html_table(target_df, title):
df_style = target_df.style.set_table_attributes("style='border:2px solid;font-size:10px;margin:10px'").set_caption(title)
return df_style._repr_html_()
df_2_html_table = get_html_table(df_2, 'Data from Google Sheet')
df_4_html_table = get_html_table(df_4, 'Data from Jira')
ipyw.HBox((ipyw.HTML(df_2_html_table),ipyw.HTML(df_4_html_table)))
Gibbone's answer worked for me! If you want extra space between the tables go to the code he proposed and add this "\xa0\xa0\xa0" to the following code line.
display_html(df1_styler._repr_html_()+"\xa0\xa0\xa0"+df2_styler._repr_html_(), raw=True)
I decided to add some extra functionality to Yasin's elegant answer, where one can choose both the number of cols and rows; any extra dfs are then added to the bottom.
Additionally one can choose in which order to fill the grid (simply change fill keyword to 'cols' or 'rows' as needed)
import pandas as pd
from IPython.display import display,HTML
def grid_df_display(list_dfs, rows = 2, cols=3, fill = 'cols'):
html_table = "<table style='width:100%; border:0px'>{content}</table>"
html_row = "<tr style='border:0px'>{content}</tr>"
html_cell = "<td style='width:{width}%;vertical-align:top;border:0px'>{{content}}</td>"
html_cell = html_cell.format(width=100/cols)
cells = [ html_cell.format(content=df.to_html()) for df in list_dfs[:rows*cols] ]
cells += cols * [html_cell.format(content="")] # pad
if fill == 'rows': #fill in rows first (first row: 0,1,2,... col-1)
grid = [ html_row.format(content="".join(cells[i:i+cols])) for i in range(0,rows*cols,cols)]
if fill == 'cols': #fill columns first (first column: 0,1,2,..., rows-1)
grid = [ html_row.format(content="".join(cells[i:rows*cols:rows])) for i in range(0,rows)]
display(HTML(html_table.format(content="".join(grid))))
#add extra dfs to bottom
[display(list_dfs[i]) for i in range(rows*cols,len(list_dfs))]
list_dfs = []
list_dfs.extend((pd.DataFrame(2*[{"x":"hello"}]),
pd.DataFrame(2*[{"x":"world"}]),
pd.DataFrame(2*[{"x":"gdbye"}])))
grid_df_display(3*list_dfs)
test output
Extension of antony's answer If you want to limit de visualization of tables to some numer of blocks by row, use the maxTables variable.
def mydisplay(dfs, names=[]):
count = 0
maxTables = 6
if not names:
names = [x for x in range(len(dfs))]
html_str = ''
html_th = ''
html_td = ''
for df, name in zip(dfs, names):
if count <= (maxTables):
html_th += (''.join(f'<th style="text-align:center">{name}</th>'))
html_td += (''.join(f'<td style="vertical-align:top"> {df.to_html(index=False)}</td>'))
count += 1
else:
html_str += f'<tr>{html_th}</tr><tr>{html_td}</tr>'
html_th = f'<th style="text-align:center">{name}</th>'
html_td = f'<td style="vertical-align:top"> {df.to_html(index=False)}</td>'
count = 0
if count != 0:
html_str += f'<tr>{html_th}</tr><tr>{html_td}</tr>'
html_str += f'<table>{html_str}</table>'
html_str = html_str.replace('table','table style="display:inline"')
display_html(html_str, raw=True)