Groupby value_counts giving keyerror - pandas

I am trying to plot countries whose scale has changes over time.
this is the dataset i am using :'https://www.kaggle.com/datasets/whenamancodes/the-global-hunger-index'
wasting = pd.read_csv('/kaggle/input/the-global-hunger-index/share-of-children-with-a-weight-too-low-for-their-height-wasting.csv')
# rename the column
wasting.rename(columns={'Prevalence of wasting, weight for height (% of children under 5)':'Wasting'},inplace=True)
#create new column with pd.cut
bins = [0,9.9,19.99,34.99,49.99,np.inf]
labels = ['Low','Moderate','Serious','Alarming','Extremely Alarming']
wasting['W_Scale'] = pd.cut(wasting['Wasting'],bins=bins,labels=labels,right=False).astype('category')
wasting.head()
wasting.isna().sum()
#selecting countries with w_scale greater than 1
wasting_entity_scale = wasting.groupby('Entity').filter(lambda x: x['W_Scale'].nunique()>1)
wasting_entity_scale = wasting_entity_scale.groupby(['Year','Entity'])['W_Scale'].value_counts().reset_index(name='count')
wasting_entity_scale = wasting_entity_scale[wasting_entity_scale['count']>0]
wasting_entity_scale = wasting_entity_scale.reset_index(drop=True)
#until this point everything is fine.
traces = {}
for i, (loc, d) in enumerate(wasting_entity_scale.groupby("Entity")):
# use meta so that we know which country a trace belongs to
fig = px.histogram(
d, x="Year", y="Entity", color="level_2"
).update_traces(meta=loc, visible=(i == 0))
traces[loc] = fig.data
l = fig.layout
# integrate all the traces
fig = go.Figure([t for a in traces.values() for t in a]).update_layout(l)
# now buuld menu using meta to know which traces should be visible per country
fig.update_layout(
updatemenus=[
{
"active": 0,
"buttons": [
{
"label": c,
"method": "update",
"args": [
{"visible": [t.meta == c for t in fig.data]},
{"title": c},
],
}
for c in traces.keys()
],
}
]
)
when i try to plot it, it shows this error:
KeyError: 'Serious'
Can someone please teach me what is it that i am doing wrong.
Thank you.

Related

How to include a matplotlib graph for an interactive dashboard?

I want to include a line chart (constructed with matplotlib) in an interactive dashboard. My graph describes the evolution for one year of the frequency of the word "France" in 7 media for Central Africa. The database is called: "df_france_pivot".
What I've seen so far is that first of all I have to transform my plot into an object with the go.figure function. So I tried this code:
`app = dash.Dash()
def update_graph():
plt.style.use('seaborn-darkgrid')
fig, ax = plt.subplots()
ax.set_prop_cycle(color=['304558', 'FE9235', '526683', 'FE574B', 'FFD104', '6BDF9C'])
num=0
for column in df_france_pivot.drop('month_year', axis=1):
num+=1
plt.plot(df_france_pivot['month_year'], df_france_pivot[column], marker='',
linewidth=1, alpha=0.9, label=column)
plt.xticks(rotation=45)
plt.legend(loc=0, prop={'size': 9},bbox_to_anchor=(1.05, 1.0), title='Media in South Africa')
plt.title("Frequency of the word 'France' in the media ", loc='left', fontsize=12, fontweight=0, color='orange')
plt.xlabel("Time")
plt.ylabel("Percentage")
figure = go.Figure(fig)
return figure
app.layout = html.Div(id = 'parent', children = [
html.H1(id = 'H1', children = 'Styling using html components', style = {'textAlign':'center',\
'marginTop':40,'marginBottom':40}),
dcc.Graph(id = 'line_plot', figure = update_graph())
]
)`
When running it I got this response: Output exceeds the size limit. Open the full output data in a text editor. Is it because my linechart is more complex i.e. with 7 lines?
Thank you in advance!

Handling errors within loops through exceptions

Tried my first python program to read temp sensor and output to influxdb
Occasionally temp sensor gives error "IndexError: list index out of range" and loop ends
I want loop to wait 15 seconds on this error and then continue the loop (sensor usually corrects itself by then on the next read)
My code:
import os
import glob
import time
import urllib
import urllib2
import httplib
import json
from influxdb import InfluxDBClient
client = InfluxDBClient(host='192.168.1.7', port=8086)
#client.get_list_database()
client.switch_database('influxdb1')
os.system('modprobe w1-gpio')
os.system('modprobe w1-therm')
base_dir = '/sys/devices/w1_bus_master1/'
device_folder = glob.glob(base_dir + '28*')[0]
while True:
device_file = device_folder + '/w1_slave'
def read_temp_raw():
f = open(device_file, 'r')
lines = f.readlines()
f.close()
return lines
def read_temp():
lines = read_temp_raw()
while lines[0].strip()[-3:] != 'YES':
time.sleep(0.2)
lines = read_temp_raw()
equals_pos = lines[1].find('t=')
if equals_pos != -1:
temp_string = lines[1][equals_pos+2:]
temp_c = float(temp_string) / 1000.0
return temp_c
temp = float(read_temp())
json_body = [
{
"measurement": "YOUR_MEASUREMENT",
"tags": {
"Device": "YOUR_DEVICE",
"ID": "YOUR_ID"
},
"fields": {
"outside_temp": temp,
}
}
]
client.write_points(json_body)
time.sleep(60)
******************************************************
which works ok :)
When I edit the code to catch the exception.....
******************************************************
while True:
except IndexError:
time.sleep(15)
continue
device_file = device_folder + '/w1_slave' # store the details
def read_temp_raw():
f = open(device_file, 'r')
lines = f.readlines() # read the device details
f.close()
return lines
def read_temp():
lines = read_temp_raw()
while lines[0].strip()[-3:] != 'YES':
time.sleep(0.2)
lines = read_temp_raw()
equals_pos = lines[1].find('t=')
if equals_pos != -1:
temp_string = lines[1][equals_pos+2:]
temp_c = float(temp_string) / 1000.0
return temp_c
temp = float(read_temp())
json_body = [
{
"measurement": "YOUR_MEASUREMENT",
"tags": {
"Device": "YOUR_DEVICE",
"ID": "YOUR_ID"
},
"fields": {
"outside_temp": temp,
}
}
]
client.write_points(json_body)
time.sleep(60)
************************************************************
I get following error...
File "temptoinfluxdb2.py", line 22
except IndexError:
^
SyntaxError: invalid syntax
Where am i going wrong please?
You will always need to use the except block in combination with a try block.
So the code in the try block is executed until an exception (in that case IndexError) occurs.
try:
# Execution block
except IndexError:
# Error handling
You could also use a more general approach with except Exception as e, which catches not just the IndexError but any exception.
Check the official documentation for further information.

How to Plot in 3D Principal Component Analysis Visualizations, using the fast PCA script from this answer

I found this fast script here in Stack Overflow for perform PCA with a given numpy array.
I don't know how to plot this in 3D, and also plot in 3D the Cumulative Explained Variances and the Number of Components. This fast script was perform with covariance method, and not with singular value decomposition, maybe that's the reason why I can't get my Cumulative Variances?
I tried to plotting with this, but it doesn't work.
This is the code and my output:
from numpy import array, dot, mean, std, empty, argsort
from numpy.linalg import eigh, solve
from numpy.random import randn
from matplotlib.pyplot import subplots, show
def cov(X):
"""
Covariance matrix
note: specifically for mean-centered data
note: numpy's `cov` uses N-1 as normalization
"""
return dot(X.T, X) / X.shape[0]
# N = data.shape[1]
# C = empty((N, N))
# for j in range(N):
# C[j, j] = mean(data[:, j] * data[:, j])
# for k in range(j + 1, N):
# C[j, k] = C[k, j] = mean(data[:, j] * data[:, k])
# return C
def pca(data, pc_count = None):
"""
Principal component analysis using eigenvalues
note: this mean-centers and auto-scales the data (in-place)
"""
data -= mean(data, 0)
data /= std(data, 0)
C = cov(data)
E, V = eigh(C)
key = argsort(E)[::-1][:pc_count]
E, V = E[key], V[:, key]
U = dot(data, V)
print(f'Eigen Values: {E}')
print(f'Eigen Vectors: {V}')
print(f'Key: {key}')
print(f'U: {U}')
print(f'shape: {U.shape}')
return U, E, V
data = dftransformed.transpose() # df tranpose and convert to numpy
trans = pca(data, 3)[0]
fig, (ax1, ax2) = subplots(1, 2)
ax1.scatter(data[:50, 0], data[:50, 1], c = 'r')
ax1.scatter(data[50:, 0], data[50:, 1], c = 'b')
ax2.scatter(trans[:50, 0], trans[:50, 1], c = 'r')
ax2.scatter(trans[50:, 0], trans[50:, 1], c = 'b')
show()
I understand the eigen values & eigen vectors, but I can't understand this key value, the user didn't comment this section of code in the answer, anyone knows what means each variable printed?
output:
Eigen Values: [126.30390621 68.48966957 26.03124927]
Eigen Vectors: [[-0.05998409 0.05852607 -0.03437937]
[ 0.00807487 0.00157143 -0.12352761]
[-0.00341751 0.03819162 0.08697668]
...
[-0.0210582 0.06601974 -0.04013712]
[-0.03558994 0.02953385 0.01885872]
[-0.06728424 -0.04162485 -0.01508154]]
Key: [439 438 437]
U: [[-12.70954048 8.97405411 -2.79812235]
[ -4.90853527 4.36517107 0.54129243]
[ -2.49370123 0.48341147 7.26682759]
[-16.07860635 6.16100749 5.81777637]
[ -1.81893291 6.48443689 -5.8655646 ]
[ 9.03939039 2.64196391 4.22056618]
[-14.71731064 9.19532016 -2.79275543]
[ 1.60998654 8.37866823 0.86207034]
[ -4.4503797 10.12688097 -5.12453656]
[ 12.16293556 2.2594413 -2.11730311]
[-15.76505125 9.48537581 -2.73906772]
[ -2.54289959 9.86768111 -4.84802992]
[ -5.78214902 9.21901651 -8.13594627]
[ -1.35428398 5.85550586 6.30553987]
[ 12.87261987 0.96283606 -3.26982121]
[ 24.57767477 -4.28214631 6.29510659]
[ 4.13941679 3.3688288 3.01194055]
[ -2.98318764 1.32775227 7.62610929]
[ -4.44461549 -1.49258339 1.39080386]
[ -0.10590795 -0.3313904 8.46363066]
[ 6.05960739 1.03091753 5.10875657]
[-21.27737352 -3.44453629 3.25115921]
[ -1.1183025 0.55238687 10.75611405]
[-10.6359291 7.58630341 -0.55088259]
[ 4.52557492 -8.05670864 2.23113833]
[-11.07822559 1.50970501 4.66555889]
[ -6.89542628 -19.24672805 -3.71322812]
[ -0.57831362 -17.84956249 -5.52002876]
[-12.70262277 -14.05542691 -2.72417438]
[ -7.50263129 -15.83723295 -3.2635125 ]
[ -7.52780216 -17.60790567 -2.00134852]
[ -5.34422731 -17.29394266 -2.69261597]
[ 9.40597893 0.21140292 2.05522806]
[ 12.12423431 -2.80281266 7.81182024]
[ 19.51224195 4.7624575 -11.20523383]
[ 22.38102384 0.82486072 -1.64716468]
[ -8.60947699 4.12597477 -6.01885407]
[ 9.56268414 1.18190655 -5.44074124]
[ 14.97675455 3.31666971 -3.30012109]
[ 20.47530869 -1.95896058 -1.91238615]]
shape: (40, 3)
trans = pca(data, 3)[0] is the U data, since [0] selects the first index of the returned data, and pca returns U, E, V
ax2.scatter(trans[:50, 0], trans[:50, 1], c = 'r') plots the first 50 rows of column 0 against the first 50 rows of column 1, and ax2.scatter(trans[50:, 0], trans[50:, 1], c = 'b') does the same for rows from 50 to the end. This from the sample data given in this fast script, but your data only has shape: (40, 3) (e.g. only 40 rows of data).
In order to plot trans as a 3d scatter plot, extract each of the 3 columns into a separate variable and plot as a scatter plot.
# imports as shown in the linked answer
from numpy import array, dot, mean, std, empty, argsort
from numpy.linalg import eigh, solve
from numpy.random import randn
from matplotlib.pyplot import subplots, show
# other imports
import numpy as np
# test data from linked answer (e.g. this fast script)
np.random.seed(365) # makes data repeatable
data = array([randn(8) for k in range(150)]) # creates array with shape (150, 8)
data[:50, 2:4] += 5 # adds 5 to first 50 rows of columns 2:4
data[50:, 2:5] += 5 # adds 5 to to rows from 50 of columns 2:5
# function call
trans = pca(data, 3)[0] # [0] gets U returned by pca(...)
# extract each column to a separate variable
x = trans[:, 0] # all rows of column 0
y = trans[:, 1] # all rows of column 1
z = trans[:, 2] # all rows of column 2
# plot 3d scatter plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z)

Grouping and heading pandas dataframe

I have the following dataframe of securities and computed a 'liquidity score' in the last column, where 1 = liquid, 2 = less liquid, and 3 = illiquid. I want to group the securities (dynamically) by their liquidity. Is there a way to group them and include some kind of header for each group? How can this be best achieved. Below is the code and some example, how it is supposed to look like.
import pandas as pd
df = pd.DataFrame({'ID':['XS123', 'US3312', 'DE405'], 'Currency':['EUR', 'EUR', 'USD'], 'Liquidity score':[2,3,1]})
df = df.sort_values(by=["Liquidity score"])
print(df)
# 1 = liquid, 2 = less liquid,, 3 = illiquid
Add labels for liquidity score
The following replaces labels for numbers in Liquidity score:
df['grp'] = df['Liquidity score'].replace({1:'Liquid', 2:'Less liquid', 3:'Illiquid'})
Headers for each group
As per your comment, find below a solution to do this.
Let's illustrate this with a small data example.
df = pd.DataFrame({'ID':['XS223', 'US934', 'US905', 'XS224', 'XS223'], 'Currency':['EUR', 'USD', 'USD','EUR','EUR',]})
Insert a header on specific rows using np.insert.
df = pd.DataFrame(np.insert(df.values, 0, values=["Liquid", ""], axis=0))
df = pd.DataFrame(np.insert(df.values, 2, values=["Less liquid", ""], axis=0))
df.columns = ['ID', 'Currency']
Using Pandas styler, we can add a background color, change font weight to bold and align the text to the left.
df.style.hide_index().set_properties(subset = pd.IndexSlice[[0,2], :], **{'font-weight' : 'bold', 'background-color' : 'lightblue', 'text-align': 'left'})
You can add a new column like this:
df['group'] = np.select(
[
df['Liquidity score'].eq(1),
df['Liquidity score'].eq(2)
],
[
'Liquid','Less liquid'
],
default='Illiquid'
)
And try setting as index, so you can filter using the index:
df.set_index(['grouping','ID'], inplace=True)
df.loc['Less liquid',:]

Create a drop down menu in plotly

I have the following dataframe:
# Create DataFrame
df = pd.DataFrame({"Col_A_date":[2018-09-04,2018-09-05,2018-09-04,2018-09-05],
"Col_B_hour":[7,7,8,8],
"Col_C":[1,1,2,2],
"Col_value":[1.9,2.2,2.6,3.8]
})
I want to create a graph where col_A is shown as drop drown menus (2018-09-04 and 2018-09-05), Col_B is x-axis, Col_value is y-axis and Col_C as traces. So I can see the data for different dates in the same graph. Is it possible to do using plotly?
Yep, it is possible. Updated with your explanations.
If I am correct understand what you need, that`s code deal what you want:
# import libraries
import pandas as pd
import plotly
import plotly.graph_objs as go
# Create DataFrame
df = pd.DataFrame({"Col_A_date":["2018-09-04","2018-09-05","2018-09-04","2018-09-05"],
"Col_B_hour":[7,7,8,8],
"Col_C":[1,1,2,2],
"Col_value":[1.9,2.2,2.6,3.8]
})
# create four df for traces
df1 = df.loc[(df["Col_A_date"] == "2018-09-04") & (df["Col_C"] == 1)]
df2 = df.loc[(df["Col_A_date"] == "2018-09-04") & (df["Col_C"] == 2)]
df3 = df.loc[(df["Col_A_date"] == "2018-09-05") & (df["Col_C"] == 1)]
df4 = df.loc[(df["Col_A_date"] == "2018-09-05") & (df["Col_C"] == 2)]
print(df1,df2,df3,df4)
# Create traces
trace1 = go.Bar(x=list(df1["Col_B_hour"]),
y=list(df1["Col_value"]),
name="1",
text = list(df1["Col_value"]),
textposition="auto",
hoverinfo="name",
marker=dict(color="rgb(0,102,204)")
)
trace2 = go.Bar(x=list(df2["Col_B_hour"]),
y=list(df2["Col_value"]),
name="2",
text=list(df2["Col_value"]),
textposition="auto",
hoverinfo="name",
marker=dict(color="rgb(255,128,0)")
)
trace3 = go.Bar(x=list(df3["Col_B_hour"]),
y=list(df3["Col_value"]),
name="3",
text = list(df3["Col_value"]),
textposition="auto",
hoverinfo="name",
marker=dict(color="rgb(255,178,102)")
)
trace4 = go.Bar(x=list(df4["Col_B_hour"]),
y=list(df4["Col_value"]),
name="4",
text=list(df4["Col_value"]),
textposition="auto",
hoverinfo="name",
marker=dict(color="rgb(255,255,153)")
)
# Pull traces to data
data = [trace1,trace2,trace3,trace4]
# Specify dropout parameters
updatemenus = list([
dict(active=-1,
buttons=list([
dict(label = "4 Aug 1",
method = "update",
args = [{"visible": [True, False, False, False]},
{"title": "4 Aug 1"}]),
dict(label = "4 Aug 2",
method = "update",
args = [{"visible": [False, True, False, False]},
{"title": "4 Aug 2"}]),
dict(label = "5 Aug 1",
method = "update",
args = [{"visible": [False, False, True, False]},
{"title": "5 Aug 1"}]),
dict(label = "5 Aug 2",
method = "update",
args = [{"visible": [False, False, False, True]},
{"title": "5 Aug 2"}]),
dict(label = "All",
method = "update",
args = [{"visible": [True, True, True, True]},
{"title": "All"}]),
dict(label = "Reset",
method = "update",
args = [{"visible": [False, False, False, False]},
{"title": "Reset"}])
]),
)
])
# Set layout
layout = dict(title="Dropdown",
showlegend=False,
xaxis=dict(title="Hours"),
yaxis=dict(title="Number"),
updatemenus=updatemenus)
# Create fig
fig = dict(data=data, layout=layout)
# Plot the plotly plot
plotly.offline.plot(fig, filename="update_dropdown.html")
Here how choice All looks like:
And first trace:
Here some useful links from docs: about bar charts; hover text; dropdown menu. Do not be afraid to look at the plotly documentation - there are excellent examples of how to use this package correctly.