using ipywidgets SelectMultiple on a dataframe - dataframe

import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display
a = ['Banking', 'Auto', 'Life', 'Electric', 'Technology', 'Airlines',
'Healthcare']
df = pd.DataFrame(np.random.randn(7, 4), columns = list('ABCD'))
df.index = a
df.head(7)
dropdown = widgets.SelectMultiple(
options=df.index,
description='Sector',
disabled=False,
layout={'height':'100px', 'width':'40%'})
display(dropdown)
I want to create a function where I can filter the df by Sector. i.e say I select Airlines, Banking and Electric from the display(dropdown) and it returns a dataframe of the selected sectors only.

Try something like this, I have used a global variable to demonstrate in this case, but I would normally wrap up the functionality in a class so you always have access to the filtered dataframe.
Rather than use interact I have used .observe on the Selection widget.
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
a = ['Banking', 'Auto', 'Life', 'Electric', 'Technology', 'Airlines',
'Healthcare']
df = pd.DataFrame(np.random.randn(7, 4), columns = list('ABCD'), index=a)
filtered_df = None
dropdown = widgets.SelectMultiple(
options=df.index,
description='Sector',
disabled=False,
layout={'height':'100px', 'width':'40%'})
def filter_dataframe(widget):
global filtered_df
selection = list(widget['new'])
with out:
clear_output()
display(df.loc[selection])
filtered_df = df.loc[selection]
out = widgets.Output()
dropdown.observe(filter_dataframe, names='value')
display(dropdown)
display(out)

Related

Creating 3D scatter chart in Taipy

I was wondering how one would create a 3D scatter chart in Taipy.
I tried this code initially:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1']=np.random.randint(0,3,100)
my_page ="""
Creation of a 3-D chart:
<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|mode=markers|color=cluster|>
"""
Gui(page=my_page).run()
This does indeed display a 3D plot, but the colors (clusters) do not show up.
Any hint?
Yes, you need some massaging of your dataframes to do it.
Here's a sample code that achieves this:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1']=np.random.randint(0,3,100)
# Create a list of 3 dataframes, one per cluster
datas = [df[df['cluster1']==i] for i in range(3)]
properties = {
}
# create dynamically the property list.
# str(i) points to a dataframe index
# "/x" points to the column value in the selected dataframe
for i in range(len(datas)):
properties[f"x[{i+1}]"] = str(i)+"/x"
properties[f"y[{i+1}]"] = str(i)+"/y"
properties[f"z[{i+1}]"] = str(i)+"/z"
properties[f'name[{i+1}]'] = str(i+1)
print(properties)
chart = "<|{datas}|chart|type=Scatter3D|properties={properties}|mode=markers|height=800px|>"
Gui(page=chart).run()
In fact, with the new release: Taipy 1.1, this is very easy to do in a few lines of code:
import pandas as pd
import numpy as np
from taipy import Gui
color_map={0:"blue",1:'green', 2:"red"}
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1'] = np.random.randint(0,3,100)
df['cluster_colors'] = df.apply(lambda row: color_map[row.cluster1], axis=1)
marker = {"color":"cluster_colors"}
chart = "<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|marker={marker}|mode=markers|height=800px|>"
Gui(page=chart).run()
If you want to leave it to Taipy to pick the colors for you, then you can simply use:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1'] = np.random.randint(0,3,100)
marker = {"color":"cluster1"}
chart = "<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|marker={marker}|mode=markers|height=800px|>"
Gui(page=chart).run()

Plotly chart percentage with smileys

I would like o add a plot figure based on smileys like this one:
dat will come from a dataframe pandas : dataframe.value_counts(normalize=True)
Can some one give me some clues.
use colorscale in normal way for a heatmap
use anotation_text to assign an emoji to a value
import plotly.figure_factory as ff
import plotly.graph_objects as go
import pandas as pd
import numpy as np
df = pd.DataFrame([[j*10+i for i in range(10)] for j in range(10)])
e=["😃","🙂","😐","☚ī¸"]
fig = go.Figure(ff.create_annotated_heatmap(
z=df.values, colorscale="rdylgn", reversescale=False,
annotation_text=np.select([df.values>75, df.values>50, df.values>25, df.values>=0], e),
))
fig.update_annotations(font_size=25)
# allows emoji to use background color
fig.update_annotations(opacity=0.7)
update coloured emoji
fundamentally you need emojicons that can accept colour styling
for this I switched to Font Awesome. This then also requires switching to dash, plotly's cousin so that external CSS can be used (to use FA)
then build a dash HTML table applying styling logic for picking emoticon and colour
from jupyter_dash import JupyterDash
import dash_html_components as html
import pandas as pd
import branca.colormap
# Load Data
df = pd.DataFrame([[j*10+i for i in range(10)] for j in range(10)])
external_stylesheets = [{
'href': 'https://use.fontawesome.com/releases/v5.8.1/css/all.css',
'rel': 'stylesheet', 'crossorigin': 'anonymous',
'integrity': 'sha384-50oBUHEmvpQ+1lW4y57PTFmhCaXp0ML5d60M1M7uH2+nqUivzIebhndOJK28anvf',
}]
# possibly could use a a different library for this - simple way to map a value to a colormap
cm = branca.colormap.LinearColormap(["red","yellow","green"], vmin=0, vmax=100, caption=None)
def mysmiley(v):
sm = ["far fa-grin", "far fa-smile", "far fa-meh", "far fa-frown"]
return html.Span(className=sm[3-(v//25)], style={"color":cm(v),"font-size": "2em"})
# Build App
app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([
html.Table([html.Tr([html.Td(mysmiley(c)) for c in r]) for r in df.values])
])
# Run app and display result inline in the notebook
app.run_server(mode='inline')

In Pandas, how can a DataFrame be binned by two columns, with the other columns changed to the means within those bins?

I've got the standard iris dataset projected down to two dimensions using UMAP, with the UMAP dimensions for the x and y positions of the 2D plot added as columns to the dataframe:
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
import umap # pip install umap-learn
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3), iris.target_names)))
_umap = umap.UMAP().fit_transform(iris.data)
iris_df['UMAP_x'] = _umap[:,0]
iris_df['UMAP_y'] = _umap[:,1]
iris_df.head()
I'd like to bin both the UMAP_x and UMAP_y columns into like 25 bins and then the other columns in the dataframe change to being the mean values of the columns in each of the bins. How might this be done? It feels like cut or resampling might lead to the answer, but I'm not sure how.
You can use cut to define bins and then use groupby with transform to calculate mean value for each bin.
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
import umap
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3), iris.target_names)))
_umap = umap.UMAP().fit_transform(iris.data)
iris_df['UMAP_x'] = _umap[:,0]
iris_df['UMAP_y'] = _umap[:,1]
# Define bins for UMAP_x and UMAP_y params
iris_df['UMAP_x_bin'] = pd.cut(iris_df['UMAP_x'], bins=25)
iris_df['UMAP_y_bin'] = pd.cut(iris_df['UMAP_y'], bins=25)
# Calculate mean value for each bin
iris_df['UMAP_x_mean'] = iris_df.groupby('UMAP_x_bin')['UMAP_x'].transform('mean')
iris_df['UMAP_y_mean'] = iris_df.groupby('UMAP_y_bin')['UMAP_y'].transform('mean')
iris_df.head()

Make a vbar with stdev from dataframes for mean and stdeviation

I have 2 data frames, 1 for mean and standard deviation and I am trying to make them into a bar chart in bokeh with error bars but I am stuck at how to ??groupby?? the 'Design' and Treatment'.
Basically, I am trying to get 3 bars per x-value (T0 to T2). The legend should show something like: 'mouse-yes', 'mouse-no', and 'cat-no'. How do I restructure the dict or dataframe to convert to something for vbar? And then how do I couple that with the stdev dataframe?
Also, is there a way to make the x_range in the figure automatically taking all original values from the 'Time' column? I'd like to be able to interchange the 'Time', 'Design', and 'Treatment' columns for the x-axis. I'm guessing this is where a pivot table comes in handy.
from bokeh.core.properties import value
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.transform import dodge
output_file("dodged_bars.html")
import pandas as pd
dat_mean=[['T0','mouse','yes',25],['T0','mouse','no',24],['T0','cat','no',23],['T1','mouse','yes',15],['T1','mouse','no',14],['T1','cat','no',13],['T2','mouse','yes',5],['T2','mouse','no',4],['T2','cat','no',3]]
df_mean= pd.DataFrame(dat_mean,columns = ["Time", "Design", "Treatment", "Mean for Cmpd1"])
dat_std=[['T0','mouse','yes',5],['T0','mouse','no',5],['T0','cat','no',5],['T1','mouse','yes',2.5],['T1','mouse','no',2.5],['T1','cat','no',2.5],['T2','mouse','yes',1],['T2','mouse','no',1],['T2','cat','no',1]]
df_std= pd.DataFrame(dat_std,columns = ["Time", "Design", "Treatment", "Mean for Cmpd1"])
data = df_mean.to_dict(orient='list')*
dates = df_mean['Time'].tolist()
source = ColumnDataSource(data=data)
p = figure(x_range=['T0', 'T1', 'T2'], y_range=(0, 30), plot_height=250, title="Bokeh plot",
toolbar_location=None, tools="")
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
show(p)
from bokeh.core.properties import value
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.transform import dodge
output_file("dodged_bars.html")
import pandas as pd
dat_mean=[['T0','mouse','yes',25],['T0','mouse','no',24],['T0','cat','no',23],['T1','mouse','yes',15],['T1','mouse','no',14],['T1','cat','no',13],['T2','mouse','yes',5],['T2','mouse','no',4],['T2','cat','no',3]]
df_mean= pd.DataFrame(dat_mean,columns = ["Time", "Design", "Treatment", "Mean for Cmpd1"])
dat_std=[['T0','mouse','yes',5],['T0','mouse','no',5],['T0','cat','no',5],['T1','mouse','yes',2.5],['T1','mouse','no',2.5],['T1','cat','no',2.5],['T2','mouse','yes',1],['T2','mouse','no',1],['T2','cat','no',1]]
df_std= pd.DataFrame(dat_std,columns = ["Time", "Design", "Treatment", "Mean for Cmpd1"])
time_seq=df_mean['Time'].drop_duplicates()
time_vals=time_seq.tolist()
MEANs=df_mean.groupby(["Design", "Treatment"])["Mean for Cmpd1"].apply(list).to_dict()
keys=[]
for h in range(len(MEANs)):
raw_key=list(MEANs.keys())[h]
keys.append(raw_key[0]+'_'+raw_key[1])
results = {'time_vals' : time_vals,
keys[0] : list(MEANs.values())[0],
keys[1] : list(MEANs.values())[1],
keys[2] : list(MEANs.values())[2]}
source = ColumnDataSource(data=results)
p = figure(x_range=['T0', 'T1', 'T2'], y_range=(0, 30), plot_height=250, title="Bokeh plot",
toolbar_location=None, tools="")
for hh in range(len(MEANs)):
p.vbar(x=dodge('time_vals', -0.25+.2*hh, range=p.x_range), top=keys[hh], width=0.2,
source=source,color=color[hh], legend=value(keys[hh]))
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
show(p)
This code works. Errorbars can be done with add_layout and Whisker functions

pd.describe() does not work

from abupy import ABuSymbolPd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
tsla_df = ABuSymbolPd.make_kl_df('usTSLA', n_folds=8)
tsla_df [['close', 'volume']].plot (subplots = True, style = ['r', 'g'],
grid = True)
print tsla_df [ ['close', 'volume']]
plt.show()
tsla_df.info()
tsla_df.describe(include = "all")
In above python code, I hope last code list the statistical of tsla_df, but it does not and also never give any error information. Anybody has any idea?