Creating 3D scatter chart in Taipy - pandas

I was wondering how one would create a 3D scatter chart in Taipy.
I tried this code initially:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1']=np.random.randint(0,3,100)
my_page ="""
Creation of a 3-D chart:
<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|mode=markers|color=cluster|>
"""
Gui(page=my_page).run()
This does indeed display a 3D plot, but the colors (clusters) do not show up.
Any hint?

Yes, you need some massaging of your dataframes to do it.
Here's a sample code that achieves this:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1']=np.random.randint(0,3,100)
# Create a list of 3 dataframes, one per cluster
datas = [df[df['cluster1']==i] for i in range(3)]
properties = {
}
# create dynamically the property list.
# str(i) points to a dataframe index
# "/x" points to the column value in the selected dataframe
for i in range(len(datas)):
properties[f"x[{i+1}]"] = str(i)+"/x"
properties[f"y[{i+1}]"] = str(i)+"/y"
properties[f"z[{i+1}]"] = str(i)+"/z"
properties[f'name[{i+1}]'] = str(i+1)
print(properties)
chart = "<|{datas}|chart|type=Scatter3D|properties={properties}|mode=markers|height=800px|>"
Gui(page=chart).run()

In fact, with the new release: Taipy 1.1, this is very easy to do in a few lines of code:
import pandas as pd
import numpy as np
from taipy import Gui
color_map={0:"blue",1:'green', 2:"red"}
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1'] = np.random.randint(0,3,100)
df['cluster_colors'] = df.apply(lambda row: color_map[row.cluster1], axis=1)
marker = {"color":"cluster_colors"}
chart = "<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|marker={marker}|mode=markers|height=800px|>"
Gui(page=chart).run()
If you want to leave it to Taipy to pick the colors for you, then you can simply use:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1'] = np.random.randint(0,3,100)
marker = {"color":"cluster1"}
chart = "<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|marker={marker}|mode=markers|height=800px|>"
Gui(page=chart).run()

Related

How make scatterplot in pandas readable

I've been playing with Titanic dataset and working through some visualisations in Pandas using this tutorial. https://www.kdnuggets.com/2023/02/5-pandas-plotting-functions-might-know.html
I have a visual of scatterplot having used this code.
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('train.csv')
I was confused by bootstrap plot result so went on to scatterplot.
pd.plotting.scatter_matrix(df, figsize=(10,10), )
plt.show()
I can sort of interpret it but I'd like to put the various variables at top and bottom of every column. Is that doable?
You can use:
fig, ax = plt.subplots(4, 3, figsize=(20, 15))
sns.scatterplot(x = 'bedrooms', y = 'price', data = dataset, whis=1.5, ax=ax[0, 0])
sns.scatterplot(x = 'bathrooms', y = 'price', data = dataset, whis=1.5, ax=ax[0, 1])

Building a histogram

How can a distribution histogram similar to this one be constructed based on the data from the table?
enter image description here
enter image description here
Code python:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel('Data.xlsx')
print(df)
df.plot.hist(df)
plt.show()
It isn't clear exactly what the x and y axes of your desired plot are. Hopefully this will get you started. Sometimes trying to comeup with a MRE will help you solve your own problem.
import random
import pandas as pd
import matplotlib.pyplot as plt
#######################################
# generate some random data for a MWE #
#######################################
random.seed(22)
data = [random.randint(0, 100) for _ in range(0, 10)]
data = pd.Series(sorted(data))
freqs = [random.uniform(0, 1) for _ in range(0, 10)]
freqs = sorted(freqs)
freqs = pd.Series(freqs)
df = pd.DataFrame()
df['data'] = data
df['frequencies'] = freqs
###############################################
# Desired bar plot using pandas built in plot #
###############################################
df.plot(x='data', y='frequencies', kind='bar')
plt.show()

How to wrap text in a dataframe's table (converted to .png)

I am having an issue where I cannot format my tables. The text is too long to just edit the dimensions or the text size. How can I quickly change this so you can see all the text when I have the data for each column more filled in? I am looking for a wrap text kind of function but I don't know if that is possible the way I'm doing it. Is there another way you'd recommend? I'm changing the table into a .png to insert into an Excel file. It has to be a .png so it's an object and doesn't mess with the size of the rows and columns in Excel.
import matplotlib.pyplot as plt
import xlsxwriter as xl
import numpy as np
import yfinance as yf
import pandas as pd
import datetime as dt
import mplfinance as mpf
import pandas_datareader
from pandas_datareader import data as pdr
yf.pdr_override()
import numpy as np
Individualreport = "C:\\Users\\Ashley\\FromPython.xlsx"
Ticklist = pd.read_excel("C:\\Users\\Ashley\\Eyes Trial Data Center.xlsx",sheet_name='Tickers', header=None)
stocks = Ticklist.values.ravel()
PipelineData = pd.read_excel("C:\\Users\\Ashley\\Eyes Trial Data Center.xlsx", sheet_name='Pipeline', header=None)
writer = pd.ExcelWriter(Individualreport, engine='xlsxwriter')
for i in stocks:
#write pipeline data
t = PipelineData.loc[(PipelineData[0]==i)]
print(t)
def render_mpl_table(data, col_width=10, row_height=1, font_size=10, wrap=True,
header_color='#40466e', row_colors=['#f1f1f2', 'w'], edge_color='w',
bbox=[0, 0, 1, 1], header_columns=0,
ax=None, **kwargs):
if ax is None:
size = (np.array(data.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
fig, ax = plt.subplots(figsize=size)
ax.axis('off')
mpl_table = ax.table(cellText=data.values, bbox=bbox, colLabels=data.columns, **kwargs)
mpl_table.auto_set_font_size(False)
#mpl_table.set_fontsize(font_size)
for k, cell in mpl_table._cells.items():
cell.set_edgecolor(edge_color)
if k[0] == 0 or k[1] < header_columns:
cell.set_text_props(weight='bold', color='w')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(row_colors[k[0]%len(row_colors) ])
return ax.get_figure(), ax
fig,ax = render_mpl_table(t, header_columns=0, col_width=2.0)
fig.savefig(str(i)+'pipe.png')
I think I needed to use an additional package, haven't tried with this example, but worked in another similar example I did.
from textwrap import wrap
label = ("label text that is getting put in the graph")
label = [ '\n'.join(wrap(l, 20)) for l in label ]
#20 is number of characters per line

using ipywidgets SelectMultiple on a dataframe

import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display
a = ['Banking', 'Auto', 'Life', 'Electric', 'Technology', 'Airlines',
'Healthcare']
df = pd.DataFrame(np.random.randn(7, 4), columns = list('ABCD'))
df.index = a
df.head(7)
dropdown = widgets.SelectMultiple(
options=df.index,
description='Sector',
disabled=False,
layout={'height':'100px', 'width':'40%'})
display(dropdown)
I want to create a function where I can filter the df by Sector. i.e say I select Airlines, Banking and Electric from the display(dropdown) and it returns a dataframe of the selected sectors only.
Try something like this, I have used a global variable to demonstrate in this case, but I would normally wrap up the functionality in a class so you always have access to the filtered dataframe.
Rather than use interact I have used .observe on the Selection widget.
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
a = ['Banking', 'Auto', 'Life', 'Electric', 'Technology', 'Airlines',
'Healthcare']
df = pd.DataFrame(np.random.randn(7, 4), columns = list('ABCD'), index=a)
filtered_df = None
dropdown = widgets.SelectMultiple(
options=df.index,
description='Sector',
disabled=False,
layout={'height':'100px', 'width':'40%'})
def filter_dataframe(widget):
global filtered_df
selection = list(widget['new'])
with out:
clear_output()
display(df.loc[selection])
filtered_df = df.loc[selection]
out = widgets.Output()
dropdown.observe(filter_dataframe, names='value')
display(dropdown)
display(out)

Add a category without data in it to a plot in seaborn

I am making plotting some data as a catplot like this:
ax = sns.catplot(x='Kind', y='VAF', hue='Sample', jitter=True, data=df, legend=False)
The trouble is that some of the categories of 'VAF' contain no data, and the corresponding label is not added to the plot. Is there a way to retain the label but just not plot any points for it?
Here is a reproducible example to help explain:
x=pd.DataFrame({'Data':[1,3,4,6,3,2],'Number':['One','One','One','One','Three','Three']})
plt.figure()
ax = sns.catplot(x='Number', y='Data', jitter=True, data=x)
In this plot you can see that on the x-axis, samples One and Three are displayed. But imagine that there is also a sample Two that just had no data points in it. How can I display One, Two, and Three on the x-axis?
Order parameter
Of course one would need to know which categories are expected. Given a list of expected categories, one can use the order parameter to supply the expected categories.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.DataFrame({'Data':[1,3,4,6,3,2],
'Number':['One','One','One','One','Three','Three']})
exp_cats = ["One", "Two", "Three"]
ax = sns.stripplot(x='Number', y='Data', jitter=True, data=df, order=exp_cats)
plt.show()
Alternatives
The above works with matplotlib 2.2.3, but not with 3.0. It works again with the current development version (hence 3.1). For the moment, there are the following alternatives:
A. Looping over categories
Given a list of expected categories, one can just loop over them and plot a scatter of each category.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'Data':[1,3,4,6,3,2],
'Number':['One','One','One','One','Three','Three']})
exp_cats = ["One", "Two", "Three"]
for i, cat in enumerate(exp_cats):
cdf = df[df["Number"] == cat]
x = np.zeros(len(cdf))+i+.2*(np.random.rand(len(cdf))-0.5)
plt.scatter(x, cdf["Data"].values)
plt.xticks(range(len(exp_cats)), exp_cats)
plt.show()
B. Map categories to numbers.
You can map the expected categories to numbers and plot numbers instead of categories.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'Data':[1,3,4,6,3,2],
'Number':['One','One','One','One','Three','Three']})
exp_cats = ["One", "Two", "Three"]
df["IntNumber"] = df["Number"].map(dict(zip(exp_cats, range(len(exp_cats)))))
plt.scatter(df["IntNumber"] + .2*(np.random.rand(len(df))-0.5), df["Data"].values,
c = df["IntNumber"].values.astype(int))
plt.xticks(range(len(exp_cats)), exp_cats)
plt.show()
C. Appending missing categories to the dataframe
Finally you may append nan values to the dataframe to make sure each expected category appears in it.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.DataFrame({'Data':[1,3,4,6,3,2],
'Number':['One','One','One','One','Three','Three']})
exp_cats = ["One", "Two", "Three"]
dfa = df.append(pd.DataFrame({'Data':[np.nan]*len(exp_cats), 'Number':exp_cats}))
ax = sns.stripplot(x='Number', y='Data', jitter=True, data=dfa, order=exp_cats)
plt.show()