Why doesn't the class attribute update when I change the input value? Tkinter Python 3 - dataframe

I am trying to develop a simple application with tkinter and I want to create a class that performs a search in a dataframe, the search's value it is linked to the information typed in an entry that belongs to another class, however when I run the code, the value of the search is not modified when changing the value of the input. Here the code so far
from tkinter import *
import numpy as np
import pandas as pd
cat = {'N°_Cel': pd.Series(['3007441467', '3164992937', '3212400100', '3258172402', '3169995802'], index= [0, 1, 2, 3, 4]),
'Names': pd.Series(['Jairo Moreno', 'Laura Mesa', 'Camilo Baquero', 'Daniela Smith', 'Ximena Sterling'], index= [0, 1, 2, 3, 4]),
'Adress': pd.Series(['Cll 158 # 96A 25','Cll 132 # 100A 63', 'Cll 158 # 100A 63', 'Cll 148 # 97A 41', 'Cll 172A # 8 20'], index= [0, 1, 2, 3, 4]),
'Schedule': pd.Series(['N.A.', 'Evening.', 'N.A.', 'After 14', 'Morning'], index= [0, 1, 2, 3, 4]),
'Payment': pd.Series(['Credit', 'Cash', 'Credit', 'Credit', 'Cash'], index= [0, 1, 2, 3, 4])}
customer_cat = pd.DataFrame(cat)
class Search_Client():
def __init__(self, info, dataframe, colname):
self.info = info
self.dataframe = dataframe
self.colname = colname
def search(self):
if self.info == '':
result = ''
else:
result = self.dataframe.loc[self.dataframe[self.colname]==self.info]
if result.empty == True:
result = ''
else:
results = result
print(results)
class Order_Document():
def __init__(self, parent):
self.parent = parent
self.parent.geometry('')
self.parent.title('Order document')
self.cellphone_number = StringVar()
Label(self.parent, text= 'Order Document', bg= 'light gray', font= 'Verdana 13', relief= RIDGE).grid(row= 0, column= 0, columnspan= 3) # Se debe centrar al finalizar el diseño de la ventana
Label(self.parent, text= 'Cellphone Number: ', bg = 'white smoke', font= 'Arial 11 bold', justify= 'left').grid(row= 5, column= 0, sticky= 'we')
Entry(self.parent, textvariable= self.cellphone_number, font= 'Arial 11').grid(row=5, column= 1, sticky= 'we')
self.search_client = Search_Client(self.cellphone_number.get(), customer_cat, 'N°_Cel')
Button(self.parent, text= 'Buscar', command= self.search_client.search, bg= 'gold', font= 'Verdana 13').grid(row= 5, column= 3, sticky= 'we')
app = Tk()
ejec = Order_Document(app)
app.mainloop()
I don't know what I could be doing wrong, I hope you can help me.

Related

How can I create a legend for my scatter plot which matches the colours used in the plot?

I've created a scatter plot (actually two similar subplots) using matplotlib.pyplot which I'm using for stylometric text analysis. The code I'm using to make the plot is as follows:
import matplotlib.pyplot as plt
import numpy as np
clusters = 4
two_d_matrix = np.array([[0.00617068, -0.53451777], [-0.01837677, -0.47131886], ...])
my_labels = [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
fig, (plot1, plot2) = plt.subplots(1, 2, sharex=False, sharey=False, figsize=(20, 10))
plot1.axhline(0, color='#afafaf')
plot1.axvline(0, color='#afafaf')
for i in range(clusters):
try:
plot1.scatter(two_d_matrix[i:, 0], two_d_matrix[i:, 1], s=30, c=my_labels, cmap='viridis')
except (KeyError, ValueError) as e:
pass
plot1.legend(my_labels)
plot1.set_title("My First Plot")
plot2.axhline(0, color='#afafaf')
plot2.axvline(0, color='#afafaf')
for i in range(clusters):
try:
plot2.scatter(two_d_matrix[i:, 0], two_d_matrix[i:, 1], s=30, c=my_labels, cmap='viridis')
except (KeyError, ValueError) as e:
pass
plot2.legend(my_labels)
plot2.set_title("My Second Plot")
plt.show()
Because there are four distinct values in my_labels there are four colours which appear on the plot, these should correspond to the four clusters I expected to find.
The problem is that the legend only has three values, corresponding to the first three values in my_labels. It also appears that the legend isn't displaying a key for each colour, but for each of the axes and then for one of the colours. This means that the colours appearing in the plot are not matched to what appears in the legend, so the legend is inaccurate. I have no idea why this is happening.
Ideally, the legend should display one colour for each unique value in my_labels, so it should look like this:
How can I get the legend to accurately display all the values it should be showing, i.e. one for each colour which appears in the plot?
Before calling plot1.legend or plot2.legend, you can pass label = None to plot1.axhline or axvline (and similarly to plot2.axhline or plot2.axvline.) This will make sure it doesn't interfere with plotting legends of the scatter points and also not label those lines.
To get labels for all categories of scatter points, you'll have to call plot1.scatter or plot2.scatter by passing the label and choosing only values from two_d_matrix whose index matches with the index of label in my_labels.
You can do it as follows:
import matplotlib.pyplot as plt
import numpy as np
# Generate some (pseudo) random data which is reproducible
generator = np.random.default_rng(seed=121)
matrix = generator.uniform(size=(40, 2))
matrix = np.sort(matrix)
clusters = 4
my_labels = np.array([0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])
fig, ax = plt.subplots(1, 1)
# Select data points wisely
for i in range(clusters):
pos = np.where(my_labels == i)
ax.scatter(matrix[pos, 0], matrix[pos, 1], s=30, cmap='viridis', label=i)
ax.axhline(0, color='#afafaf', label=None)
ax.axvline(0, color='#afafaf', label=None)
ax.legend()
ax.set_title("Expected output")
plt.show()
This gives:
Comparison of current output and expected output
Observe how data points selection (done inside the for loops in the code below) affects the output:
Code:
import matplotlib.pyplot as plt
import numpy as np
# Generate some (pseudo) random data which is reproducible
generator = np.random.default_rng(seed=121)
matrix = generator.uniform(size=(40, 2))
matrix = np.sort(matrix)
clusters = 4
my_labels = np.array([0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])
fig, ax = plt.subplots(1, 2)
# Question plot
for i in range(clusters):
ax[0].scatter(matrix[i:, 0], matrix[i:, 1], s=30, cmap='viridis', label=i)
ax[0].axhline(0, color='#afafaf', label=None)
ax[0].axvline(0, color='#afafaf', label=None)
ax[0].legend()
ax[0].set_title("Current output (with label = None)")
# Answer plot
for i in range(clusters):
pos = np.where(my_labels == i) # <- choose index of data points based on label position in my_labels
ax[1].scatter(matrix[pos, 0], matrix[pos, 1], s=30, cmap='viridis', label=i)
ax[1].axhline(0, color='#afafaf', label=None)
ax[1].axvline(0, color='#afafaf', label=None)
ax[1].legend()
ax[1].set_title("Expected output")
plt.show()

append one list Index to another list Index of different length

Getting this error : line 341, in upload
lst3 = lst1.extend(lst2)
AttributeError: 'Index' object has no attribute 'extend'
{'Date': ['2_April','3_May','1_June','5_July', '3_April', '9_July'],
'State': ['BR', 'JH', 'HR', 'JH', 'BR', 'PB'],
'Blank': ['nan','nan','nan','nan','nan','nan'],
'District' : ['BS', 'GW', 'AM', 'RN', 'PB', 'GR'],
'nan': [Sub-dist, 0, 2, 2, 9, 8],
'nan': [Month, 1, 1, 2, 3, 4],
'nan': [Year, 2, 0, 0, 0, 0],
'nan': [Facility,2, 3, 4, 0, 0],
}
# Partial list of headers
lst1 = df_.columns[:18]
# converting first row to list and picking elements after 18th column
lst2 = df_.iloc[1, 18:].to_numpy()
lst2 = pd.Index(lst2)
# extending lst to lst2
lst3 = lst1.extend(lst2)
print(lst3)
# After merging two list Indexes above updating column of dataframe
df_.columns = lst3
# Partial list of headers
lst1 = df_.columns[:18].values
# converting first row to list
lst2 = df_.iloc[1, 18:].to_numpy()
lst2 = pd.Index(lst2).values
import numpy
lst3 = numpy.concatenate((lst1, lst2))
print(lst3)
df_.columns = lst3
df_ = df_[2:]
Use values to convert indices/series into a list, like this
lst2 = pd.Index(lst2).values

Select appropriate colors in stacked Seaborn barplot

I want to create a stacked barplot using Seaborn with this MiltiIndex DataFrame
header = pd.MultiIndex.from_product([['#'],
['TE', 'SS', 'M', 'MR']])
dat = ([[100, 20, 21, 35], [100, 12, 5, 15]])
df = pd.DataFrame(dat, index=['JC', 'TTo'], columns=header)
df = df.stack()
df = df.sort_values('#', ascending=False).sort_index(level=0, sort_remaining=False)
The code I'm using for the plot is:
fontP = FontProperties()
fontP.set_size('medium')
colors = {'TE': 'green', 'SS': 'blue', 'M': 'yellow', 'MR': 'red'}
kwargs = {'alpha':0.5}
plt.figure(figsize=(12, 9))
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[0]], '#'],
color=colors[df2.index[0][1]], **kwargs)
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[1]], '#'],
color=colors[df2.index[1][1]], **kwargs)
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[2]], '#'],
color=colors[df2.index[2][1]], **kwargs)
bottom_plot = sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[3]], '#'],
color=colors[df2.index[3][1]], **kwargs)
bar1 = plt.Rectangle((0, 0), 1, 1, fc='green', edgecolor="None")
bar2 = plt.Rectangle((0, 0), 0, 0, fc='yellow', edgecolor="None")
bar3 = plt.Rectangle((0, 0), 2, 2, fc='red', edgecolor="None")
bar4 = plt.Rectangle((0, 0), 3, 3, fc='blue', edgecolor="None")
l = plt.legend([bar1, bar2, bar3, bar4], [
"TE", "M",
'MR', 'SS'
],
bbox_to_anchor=(0.95, 1),
loc='upper left',
prop=fontP)
l.draw_frame(False)
sns.despine()
bottom_plot.set_ylabel("#")
axes = plt.gca()
axes.yaxis.grid()
And I get:
My problem is the order of the colors in the second bar ('TTo'), I want the colors to be automatically selected based on the level 1 index value (['TE', 'SS', 'M', 'MR']) so that they are ordered correctly. Further down the one with the highest value with its corresponding color, in front the next one with the next highest value and its color and so on, as the first bar shows ('JC).
Maybe there is a simpler way to do this in Seaborn than the one I'm using...
I'm not sure how to create such a plot with seaborn. Here is a way to create it with a loop through the rows and adding one matplotlib bar at each step:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
sns.set()
header = pd.MultiIndex.from_product([['#'],
['TE', 'SS', 'M', 'MR']])
dat = ([[100, 20, 21, 35], [100, 12, 5, 15]])
df = pd.DataFrame(dat, index=['JC', 'TTo'], columns=header)
df = df.stack()
df = df.sort_values('#', ascending=False).sort_index(level=0, sort_remaining=False)
colors = {'TE': 'green', 'SS': 'blue', 'M': 'yellow', 'MR': 'red'}
prev_index0 = None
for (index0, index1), quantity in df.itertuples():
if index0 != prev_index0:
bottom = 0
plt.bar(index0, quantity, fc=colors[index1], ec='none', bottom=bottom, label=index1)
bottom += quantity
prev_index0 = index0
legend_handles = [plt.Rectangle((0, 0), 0, 0, color=colors[c], label=c) for c in colors]
plt.legend(handles=legend_handles)
plt.show()
To plot the bars back to front without stacking, the code can be simplified:
colors = {'TE': 'forestgreen', 'SS': 'cornflowerblue', 'M': 'gold', 'MR': 'crimson'}
for (index0, index1), quantity in df.itertuples():
plt.bar(index0, quantity, fc=colors[index1], ec='none', label=index1)
legend_handles = [plt.Rectangle((0, 0), 0, 0, color=colors[c], label=c, ec='black') for c in colors]
plt.legend(handles=legend_handles, bbox_to_anchor=(1.02, 1.02), loc='upper left')
plt.tight_layout()

Use pandas cut function in Dask

How can I use pd.cut() in Dask?
Because of the large dataset, I am not able to put the whole dataset into memory before finishing the pd.cut().
Current code that is working in Pandas but needs to be changed to Dask:
import pandas as pd
d = {'name': [1, 5, 1, 10, 5, 1], 'amount': [1, 5, 3, 8, 4, 1]}
df = pd.DataFrame(data=d)
#Groupby name and add column sum (of amounts) and count (number of grouped rows)
df = (df.groupby('name')['amount'].agg(['sum', 'count']).reset_index().sort_values(by='name', ascending=True))
print(df.head(15))
#Groupby bins and chnage sum and count based on grouped rows
df = df.groupby(pd.cut(df['name'],
bins=[0,4,8,100],
labels=['namebin1', 'namebin2', 'namebin3']))['sum', 'count'].sum().reset_index()
print(df.head(15))
Output:
name sum count
0 namebin1 5 3
1 namebin2 9 2
2 namebin3 8 1
I tried:
import pandas as pd
import dask.dataframe as dd
d = {'name': [1, 5, 1, 10, 5, 1], 'amount': [1, 5, 3, 8, 4, 1]}
df = pd.DataFrame(data=d)
df = dd.from_pandas(df, npartitions=2)
df = df.groupby('name')['amount'].agg(['sum', 'count']).reset_index()
print(df.head(15))
df = df.groupby(df.map_partitions(pd.cut,
df['name'],
bins=[0,4,8,100],
labels=['namebin1', 'namebin2', 'namebin3']))['sum', 'count'].sum().reset_index()
print(df.head(15))
Gives error:
TypeError("cut() got multiple values for argument 'bins'",)
The reason why you're seeing this error is that pd.cut() is being called with the partition as the first argument which it doesn't expect (see the docs).
You can wrap it in a custom function and call that instead, like so:
import pandas as pd
import dask.dataframe as dd
def custom_cut(partition, bins, labels):
result = pd.cut(x=partition["name"], bins=bins, labels=labels)
return result
d = {'name': [1, 5, 1, 10, 5, 1], 'amount': [1, 5, 3, 8, 4, 1]}
df = pd.DataFrame(data=d)
df = dd.from_pandas(df, npartitions=2)
df = df.groupby('name')['amount'].agg(['sum', 'count']).reset_index()
df = df.groupby(df.map_partitions(custom_cut,
bins=[0,4,8,100],
labels=['namebin1', 'namebin2', 'namebin3']))[['sum', 'count']].sum().reset_index()
df.compute()
name sum count
namebin1 5 3
namebin2 9 2
namebin3 8 1

matplotlib advanced stacked bar

matplotlib plot bars
It can be regular like http://matplotlib.org/examples/api/barchart_demo.html
Let's define this as [M, F]
It can be stacked like http://matplotlib.org/examples/pylab_examples/bar_stacked.html
Let's define this as [M + F]
Now how to plot [M, F + other]
If I understand you correctly, you want to have a stack plot with more than two elements stacked? If yes, that goes pretty straight forward as in the example you posted:
#!/usr/bin/env python
# a stacked bar plot with errorbars
import numpy as np
import matplotlib.pyplot as plt
N = 5
menMeans = [20, 35, 30, 35, 27]
womenMeans = [25, 32, 34, 20, 25]
otherMeans = [5, 2, 4, 8, 5]
menStd = [2, 3, 4, 1, 2]
womenStd = [3, 5, 2, 3, 3]
otherStd = [1, 1, 1, 1, 1]
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars: can also be len(x) sequence
p1 = plt.bar(ind, menMeans, width, color='r', yerr=womenStd)
p2 = plt.bar(ind, womenMeans, width, color='y',
bottom=menMeans, yerr=menStd)
p3 = plt.bar(ind, otherMeans, width, color='b',
bottom=[menMeans[j] + womenMeans[j] for j in range(len(menMeans)) ],
yerr=otherStd)
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.xticks(ind+width/2., ('G1', 'G2', 'G3', 'G4', 'G5') )
plt.yticks(np.arange(0,81,10))
plt.legend( (p1[0], p2[0], p3[0]), ('Men', 'Women', 'Other') )
plt.show()