Related
so i want to make callback graph that can display the graph within theres two widget there (radio and selectbox), but i have some problem in here, the radio button (gender clasify) theres no change when i choose the gender, the graph still stay at same graph, and i want to input the selectbox with variable of the 'City' but theres somethin error message "DuplicateWidgetID"
this is my code:
with st.expander('Favorite product by Gender within city'):
column1, column2 = st.columns([3,1])
#Variables
#male_product = df[df['gender'] == 'Male'].groupby(['product_line','gender']).count()['quantity'].sort_values(ascending=False).reset_index()
#female_product = df[df['gender'] == 'Female'].groupby(['product_line','gender']).count()['quantity'].sort_values(ascending=False).reset_index()
#Callback
selected_gender = st.radio('What is your Gender:', ['Male', 'Female'], index = 0)
select_city = column2.selectbox('Select City', df.sort_values('City').City.unique())
male_product=px.histogram(df.sort_values('product_line') ,x='product_line', y='gross_income', color = 'product_line',)
female_product=px.histogram(df.sort_values('product_line') ,x='product_line', y='gross_income', color = 'product_line',)
if selected_gender == 'Male':
st.write('What men buy most!')
st.plotly_chart(male_product, use_container_width=True)
else:
st.write('What female buy most!')
st.plotly_chart(female_product, use_container_width=True)
and the display graph is
but theres will be error when i entry "select_city" to the code and theres will be notification like this:
thanks for your attention, and can someone help me.
This involves creating a dataframe with gender and city filters. I just created a sample data to demonstrate the solution.
Code
import streamlit as st
import plotly.express as px
import pandas as pd
data = {
'City': ['c1', 'c2', 'c3', 'c1', 'c3', 'c2', 'c1'],
'product_line': ['p1', 'p2', 'p3', 'p3', 'p2', 'p1', 'p4'],
'quantity': [8, 4, 3, 12, 5, 6, 4],
'gross_income': [250, 150, 300, 250, 300, 400, 500],
'gender': ['Male', 'Female', 'Male', 'Male', 'Female', 'Female', 'Male']
}
df = pd.DataFrame(data)
st.write(df)
with st.expander('Favorite product by Gender within city'):
column1, column2 = st.columns([3,1])
# Allow the user to select a gender.
selected_gender = st.radio('What is your Gender:', df.gender.unique(), index = 0)
# Apply gender filter.
gender_product = df[df['gender'] == selected_gender]
# Allow the user to select a city.
select_city = column2.selectbox('Select City', df.sort_values('City').City.unique())
# Apply city filter
city_gender_product = gender_product[gender_product['City'] == select_city]
# Use the city_gender_product dataframe as it has filters for gender and city.
fig = px.histogram(city_gender_product.sort_values('product_line') ,x='product_line', y='gross_income', color = 'product_line',)
if selected_gender == 'Male':
st.write('What men buy most!')
else:
st.write('What female buy most!')
st.plotly_chart(fig, use_container_width=True)
Output
1. Initial view or Male/city c1
2. Male/c3
3. Female/c2
I have this DataFrame to groupby key:
df = pd.DataFrame({
'key': ['1', '1', '1', '2', '2', '3', '3', '4', '4', '5'],
'data1': [['A', 'B', 'C'], 'D', 'P', 'E', ['F', 'G', 'H'], ['I', 'J'], ['K', 'L'], 'M', 'N', 'O']
'data2': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
})
df
I want to make the groupby key and sum data2, it's ok for this part.
But concerning data1, I want to :
If a list doesn't exist yet:
Single values don't change when key was not duplicated
Single values assigned to a key are combined into a new list
If a list already exist:
Other single values are append to it
Other lists values are append to it
The resulting DataFrame should then be :
dfgood = pd.DataFrame({
'key': ['1', '2', '3', '4', '5'],
'data1': [['A', 'B', 'C', 'D', 'P'], ['F', 'G', 'H', 'E'], ['I', 'J', 'K', 'L'], ['M', 'N'], 'O']
'data2': [6, 9, 13, 17, 10]
})
dfgood
In fact, I don't really care about the order of data1 values into the lists, it could also be any structure that keep them together, even a string with separators or a set, if it's easier to make it go the way you think best to do this.
I thought about two solutions :
Going that way :
dfgood = df.groupby('key', as_index=False).agg({
'data1' : lambda x: x.iloc[0].append(x.iloc[1]) if type(x.iloc[0])==list else list(x),
'data2' : sum,
})
dfgood
It doesn't work because of index out of range in x.iloc[1].
I also tried, because data1 was organized like this in another groupby from the question on this link:
dfgood = df.groupby('key', as_index=False).agg({
'data1' : lambda g: g.iloc[0] if len(g) == 1 else list(g)),
'data2' : sum,
})
dfgood
But it's creating new lists from preexisting lists or values and not appending data to already existing lists.
Another way to do it, but I think it's more complicated and there should be a better or faster solution :
Turning data1 lists and single values into individual series with apply,
use wide_to_long to keep single values for each key,
Then groupby applying :
dfgood = df.groupby('key', as_index=False).agg({
'data1' : lambda g: g.iloc[0] if len(g) == 1 else list(g)),
'data2' : sum,
})
dfgood
I think my problem is that I don't know how to use lambdas correctly and I try stupid things like x.iloc[1] in the previous example. I've looked at a lot of tutorial about lambdas, but it's still fuzzy in my mind.
There is problem combinations lists with scalars, possible solution is create first lists form scalars and then flatten them in groupby.agg:
dfgood = (df.assign(data1 = df['data1'].apply(lambda y: y if isinstance(y, list) else [y]))
.groupby('key', as_index=False).agg({
'data1' : lambda x: [z for y in x for z in y],
'data2' : sum,
})
)
print (dfgood)
key data1 data2
0 1 [A, B, C, D, P] 6
1 2 [E, F, G, H] 9
2 3 [I, J, K, L] 13
3 4 [M, N] 17
4 5 [O] 10
Another idea is use flatten function for flatten only lists, not strings:
#https://stackoverflow.com/a/5286571/2901002
def flatten(foo):
for x in foo:
if hasattr(x, '__iter__') and not isinstance(x, str):
for y in flatten(x):
yield y
else:
yield x
dfgood = (df.groupby('key', as_index=False).agg({
'data1' : lambda x: list(flatten(x)),
'data2' : sum}))
You could explode to get individual rows, then aggregate again with groupby+agg after taking care of masking the duplicated values in data2 (to avoid summing duplicates):
(df.explode('data1')
.assign(data2=lambda d: d['data2'].mask(d.duplicated(['key', 'data2']), 0))
.groupby('key')
.agg({'data1': list, 'data2': 'sum'})
)
output:
data1 data2
key
1 [A, B, C, D, P] 6
2 [E, F, G, H] 9
3 [I, J, K, L] 13
4 [M, N] 17
5 [O] 10
The data frame :
df = pd.DataFrame({'A': ['cust1', 'cust1', 'cust2', 'cust1',
'cust2', 'cust1', 'cust2', 'cust2','cust2','cust1'],
'B': ['true', 'true', 'true', 'false',
'false', 'false', 'false', 'true','false','true']})
Ouput : ['cust2']
First get counts by crosstab and then filter index values by columns with boolean indexing, for greater is used Series.gt:
df1 = pd.crosstab(df['A'], df['B'])
print (df1)
B false true
A
cust1 2 3
cust2 3 2
c = df1.index[df1['false'].gt(df1['true'])].tolist()
#if True, False are boolean
#c = df1.index[df1[False].gt(df1[True])].tolist()
print (c)
['cust2']]
df[df['B']=='false'].groupby(['A']).count().sort_values(by['A'],ascending=False).index[0]
Explanation: Take all values with only 'False', groupby 'A' and count. Now sort the value in descending order and get the first index('A') value.
It seems like the case of multi -indexing so you can use index to isolate the greater value :
list = list(dataframe.index[dataframe['false'].gt(dataframe['true'])])
i want insert this names list ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] on my data frame export from this URL https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv
I want creat a dataframe or list with names
Thks
df = pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv',
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'])
If I set the format of the first column in a ListCtrl to align centre (or align right) nothing happens. It works for the other columns.
This only happens on Windows - I have tested it on Linux and it works fine.
Does anyone know if there is a work-round or other solution?
Here is an example based on code found at http://zetcode.com/wxpython/
import wx
import sys
packages = [('jessica alba', 'pomona', '1981'), ('sigourney weaver', 'new york', '1949'),
('angelina jolie', 'los angeles', '1975'), ('natalie portman', 'jerusalem', '1981'),
('rachel weiss', 'london', '1971'), ('scarlett johansson', 'new york', '1984' )]
class Actresses(wx.Frame):
def __init__(self, parent, id, title):
wx.Frame.__init__(self, parent, id, title, size=(380, 230))
hbox = wx.BoxSizer(wx.HORIZONTAL)
panel = wx.Panel(self, -1)
self.list = wx.ListCtrl(panel, -1, style=wx.LC_REPORT)
self.list.InsertColumn(0, 'name', wx.LIST_FORMAT_CENTRE,width=140)
self.list.InsertColumn(1, 'place', wx.LIST_FORMAT_CENTRE,width=130)
self.list.InsertColumn(2, 'year', wx.LIST_FORMAT_CENTRE, 90)
for i in packages:
index = self.list.InsertStringItem(sys.maxint, i[0])
self.list.SetStringItem(index, 1, i[1])
self.list.SetStringItem(index, 2, i[2])
hbox.Add(self.list, 1, wx.EXPAND)
panel.SetSizer(hbox)
self.Centre()
self.Show(True)
app = wx.App()
Actresses(None, -1, 'actresses')
app.MainLoop()
I have found that this works (notice I start inserting the columns at 1 rather than 0):
self.list = wx.ListCtrl(panel, -1, style=wx.LC_REPORT)
self.list.InsertColumn(1, 'name', wx.LIST_FORMAT_CENTRE,width=140)
self.list.InsertColumn(2, 'place', wx.LIST_FORMAT_CENTRE,width=130)
self.list.InsertColumn(3, 'year', wx.LIST_FORMAT_CENTRE, 90)
Not sure why this works, but it does. Hopefully, there will be no repercussions from doing this.
Thanks to robots.jpg for inspiring the idea.
Windows definitely treats the first column differently. One workaround is to create an empty column 0 and hide it:
class Actresses(wx.Frame):
def __init__(self, parent, id, title):
wx.Frame.__init__(self, parent, id, title, size=(380, 230))
#...
self.list = wx.ListCtrl(panel, -1, style=wx.LC_REPORT)
self.list.InsertColumn(0, '', width=0)
self.list.InsertColumn(1, 'name', wx.LIST_FORMAT_CENTRE,width=140)
self.list.InsertColumn(2, 'place', wx.LIST_FORMAT_CENTRE,width=130)
self.list.InsertColumn(3, 'year', wx.LIST_FORMAT_CENTRE, width=90)
for i in packages:
index = self.list.InsertStringItem(sys.maxint, '')
self.list.SetStringItem(index, 1, i[0])
self.list.SetStringItem(index, 2, i[1])
self.list.SetStringItem(index, 3, i[2])
# catch resize event
self.list.Bind(wx.EVT_LIST_COL_BEGIN_DRAG, self.OnColDrag)
#...
def OnColDrag(self, evt):
if evt.m_col == 0:
evt.Veto()
I can't think of any major side-effects from doing this, but let me know if I'm wrong. I guess GetItemText() or anything else that assumes there is useful data in the first column would no longer be useful.
Edit - added code to prevent resizing column 0.