I was wondering how one would create a 3D scatter chart in Taipy.
I tried this code initially:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1']=np.random.randint(0,3,100)
my_page ="""
Creation of a 3-D chart:
<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|mode=markers|color=cluster|>
"""
Gui(page=my_page).run()
This does indeed display a 3D plot, but the colors (clusters) do not show up.
Any hint?
Yes, you need some massaging of your dataframes to do it.
Here's a sample code that achieves this:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1']=np.random.randint(0,3,100)
# Create a list of 3 dataframes, one per cluster
datas = [df[df['cluster1']==i] for i in range(3)]
properties = {
}
# create dynamically the property list.
# str(i) points to a dataframe index
# "/x" points to the column value in the selected dataframe
for i in range(len(datas)):
properties[f"x[{i+1}]"] = str(i)+"/x"
properties[f"y[{i+1}]"] = str(i)+"/y"
properties[f"z[{i+1}]"] = str(i)+"/z"
properties[f'name[{i+1}]'] = str(i+1)
print(properties)
chart = "<|{datas}|chart|type=Scatter3D|properties={properties}|mode=markers|height=800px|>"
Gui(page=chart).run()
In fact, with the new release: Taipy 1.1, this is very easy to do in a few lines of code:
import pandas as pd
import numpy as np
from taipy import Gui
color_map={0:"blue",1:'green', 2:"red"}
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1'] = np.random.randint(0,3,100)
df['cluster_colors'] = df.apply(lambda row: color_map[row.cluster1], axis=1)
marker = {"color":"cluster_colors"}
chart = "<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|marker={marker}|mode=markers|height=800px|>"
Gui(page=chart).run()
If you want to leave it to Taipy to pick the colors for you, then you can simply use:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1'] = np.random.randint(0,3,100)
marker = {"color":"cluster1"}
chart = "<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|marker={marker}|mode=markers|height=800px|>"
Gui(page=chart).run()
I would like o add a plot figure based on smileys like this one:
dat will come from a dataframe pandas : dataframe.value_counts(normalize=True)
Can some one give me some clues.
use colorscale in normal way for a heatmap
use anotation_text to assign an emoji to a value
import plotly.figure_factory as ff
import plotly.graph_objects as go
import pandas as pd
import numpy as np
df = pd.DataFrame([[j*10+i for i in range(10)] for j in range(10)])
e=["đ","đ","đ","âšī¸"]
fig = go.Figure(ff.create_annotated_heatmap(
z=df.values, colorscale="rdylgn", reversescale=False,
annotation_text=np.select([df.values>75, df.values>50, df.values>25, df.values>=0], e),
))
fig.update_annotations(font_size=25)
# allows emoji to use background color
fig.update_annotations(opacity=0.7)
update coloured emoji
fundamentally you need emojicons that can accept colour styling
for this I switched to Font Awesome. This then also requires switching to dash, plotly's cousin so that external CSS can be used (to use FA)
then build a dash HTML table applying styling logic for picking emoticon and colour
from jupyter_dash import JupyterDash
import dash_html_components as html
import pandas as pd
import branca.colormap
# Load Data
df = pd.DataFrame([[j*10+i for i in range(10)] for j in range(10)])
external_stylesheets = [{
'href': 'https://use.fontawesome.com/releases/v5.8.1/css/all.css',
'rel': 'stylesheet', 'crossorigin': 'anonymous',
'integrity': 'sha384-50oBUHEmvpQ+1lW4y57PTFmhCaXp0ML5d60M1M7uH2+nqUivzIebhndOJK28anvf',
}]
# possibly could use a a different library for this - simple way to map a value to a colormap
cm = branca.colormap.LinearColormap(["red","yellow","green"], vmin=0, vmax=100, caption=None)
def mysmiley(v):
sm = ["far fa-grin", "far fa-smile", "far fa-meh", "far fa-frown"]
return html.Span(className=sm[3-(v//25)], style={"color":cm(v),"font-size": "2em"})
# Build App
app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([
html.Table([html.Tr([html.Td(mysmiley(c)) for c in r]) for r in df.values])
])
# Run app and display result inline in the notebook
app.run_server(mode='inline')
I've got the standard iris dataset projected down to two dimensions using UMAP, with the UMAP dimensions for the x and y positions of the 2D plot added as columns to the dataframe:
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
import umap # pip install umap-learn
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3), iris.target_names)))
_umap = umap.UMAP().fit_transform(iris.data)
iris_df['UMAP_x'] = _umap[:,0]
iris_df['UMAP_y'] = _umap[:,1]
iris_df.head()
I'd like to bin both the UMAP_x and UMAP_y columns into like 25 bins and then the other columns in the dataframe change to being the mean values of the columns in each of the bins. How might this be done? It feels like cut or resampling might lead to the answer, but I'm not sure how.
You can use cut to define bins and then use groupby with transform to calculate mean value for each bin.
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
import umap
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3), iris.target_names)))
_umap = umap.UMAP().fit_transform(iris.data)
iris_df['UMAP_x'] = _umap[:,0]
iris_df['UMAP_y'] = _umap[:,1]
# Define bins for UMAP_x and UMAP_y params
iris_df['UMAP_x_bin'] = pd.cut(iris_df['UMAP_x'], bins=25)
iris_df['UMAP_y_bin'] = pd.cut(iris_df['UMAP_y'], bins=25)
# Calculate mean value for each bin
iris_df['UMAP_x_mean'] = iris_df.groupby('UMAP_x_bin')['UMAP_x'].transform('mean')
iris_df['UMAP_y_mean'] = iris_df.groupby('UMAP_y_bin')['UMAP_y'].transform('mean')
iris_df.head()
I have 2 data frames, 1 for mean and standard deviation and I am trying to make them into a bar chart in bokeh with error bars but I am stuck at how to ??groupby?? the 'Design' and Treatment'.
Basically, I am trying to get 3 bars per x-value (T0 to T2). The legend should show something like: 'mouse-yes', 'mouse-no', and 'cat-no'. How do I restructure the dict or dataframe to convert to something for vbar? And then how do I couple that with the stdev dataframe?
Also, is there a way to make the x_range in the figure automatically taking all original values from the 'Time' column? I'd like to be able to interchange the 'Time', 'Design', and 'Treatment' columns for the x-axis. I'm guessing this is where a pivot table comes in handy.
from bokeh.core.properties import value
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.transform import dodge
output_file("dodged_bars.html")
import pandas as pd
dat_mean=[['T0','mouse','yes',25],['T0','mouse','no',24],['T0','cat','no',23],['T1','mouse','yes',15],['T1','mouse','no',14],['T1','cat','no',13],['T2','mouse','yes',5],['T2','mouse','no',4],['T2','cat','no',3]]
df_mean= pd.DataFrame(dat_mean,columns = ["Time", "Design", "Treatment", "Mean for Cmpd1"])
dat_std=[['T0','mouse','yes',5],['T0','mouse','no',5],['T0','cat','no',5],['T1','mouse','yes',2.5],['T1','mouse','no',2.5],['T1','cat','no',2.5],['T2','mouse','yes',1],['T2','mouse','no',1],['T2','cat','no',1]]
df_std= pd.DataFrame(dat_std,columns = ["Time", "Design", "Treatment", "Mean for Cmpd1"])
data = df_mean.to_dict(orient='list')*
dates = df_mean['Time'].tolist()
source = ColumnDataSource(data=data)
p = figure(x_range=['T0', 'T1', 'T2'], y_range=(0, 30), plot_height=250, title="Bokeh plot",
toolbar_location=None, tools="")
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
show(p)
from bokeh.core.properties import value
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.transform import dodge
output_file("dodged_bars.html")
import pandas as pd
dat_mean=[['T0','mouse','yes',25],['T0','mouse','no',24],['T0','cat','no',23],['T1','mouse','yes',15],['T1','mouse','no',14],['T1','cat','no',13],['T2','mouse','yes',5],['T2','mouse','no',4],['T2','cat','no',3]]
df_mean= pd.DataFrame(dat_mean,columns = ["Time", "Design", "Treatment", "Mean for Cmpd1"])
dat_std=[['T0','mouse','yes',5],['T0','mouse','no',5],['T0','cat','no',5],['T1','mouse','yes',2.5],['T1','mouse','no',2.5],['T1','cat','no',2.5],['T2','mouse','yes',1],['T2','mouse','no',1],['T2','cat','no',1]]
df_std= pd.DataFrame(dat_std,columns = ["Time", "Design", "Treatment", "Mean for Cmpd1"])
time_seq=df_mean['Time'].drop_duplicates()
time_vals=time_seq.tolist()
MEANs=df_mean.groupby(["Design", "Treatment"])["Mean for Cmpd1"].apply(list).to_dict()
keys=[]
for h in range(len(MEANs)):
raw_key=list(MEANs.keys())[h]
keys.append(raw_key[0]+'_'+raw_key[1])
results = {'time_vals' : time_vals,
keys[0] : list(MEANs.values())[0],
keys[1] : list(MEANs.values())[1],
keys[2] : list(MEANs.values())[2]}
source = ColumnDataSource(data=results)
p = figure(x_range=['T0', 'T1', 'T2'], y_range=(0, 30), plot_height=250, title="Bokeh plot",
toolbar_location=None, tools="")
for hh in range(len(MEANs)):
p.vbar(x=dodge('time_vals', -0.25+.2*hh, range=p.x_range), top=keys[hh], width=0.2,
source=source,color=color[hh], legend=value(keys[hh]))
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
show(p)
This code works. Errorbars can be done with add_layout and Whisker functions