How to solve "KeyError: "None of [Index(['Umur'], dtype='object')] are in the [index]"? - pandas

I have a CSV file for the code I wrote.
I have an assignment to filter and plot data. I didn't really understand, so I just copied the code from my lecturer's presentation file, but I made the CSV file myself. When I tried to run the plot, it didn't work. This is what it said.
I want to make a bar chart to show the number of people with the same age. If it's possible, how do I make a pie chart instead, and show the percentage?
btw, "Umur" means Age
import pandas as pd
from pathlib import Path
df = pd.read_csv('inicsvanakbisdig.txt')
filepath = Path('tugaspertemuan12afk.csv')
df.to_csv(filepath)
#Column Selection
df1 = df['Nama']
print(df1)
#Select row where 'Umur' is equal to 20
df2 = df.loc[df['Umur'] == 20]
print(df2)
#Drop Kolom 'Umur'
df3 = df.drop(['Umur'], axis=1)
print(df3)
#Computes a summary of statistics
df4 = df.describe()
print(df4)
#Plot
import matplotlib.pyplot as plt
df5 = df.loc[['Umur']]
p = df5['Umur'].sort_index()
p.plot(kind = 'bar',title = 'Umur anak bisdig', xlabel = "Umur", ylabel = "Counter")
plt.show()

Single label in loc accesses row, you might want
df5 = df.loc[:, ['Umur']]
# or
df5 = df[['Umur']]

Related

Create multiple DataFrames using data from an api

I'm using the world bank API to analyze data and I want to create multiple data frames with the same indicators for different countries.
import wbgapi as wb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time as t_lib
#Variables
indicators = ['AG.PRD.LVSK.XD', 'AG.YLD.CREL.KG', 'NY.GDP.MINR.RT.ZS', 'SP.POP.TOTL.FE.ZS']
countries = ['PRT', 'BRL', 'ARG']
time = range(1995, 2021)
#Code
def create_df(country):
df = wb.data.DataFrame(indicators, country, time, labels = True).reset_index()
columns = [ item for item in df['Series'] ]
columns
df = df.T
df.columns = columns
df.drop(['Series', 'series'], axis= 0, inplace = True)
df = df.reset_index()
return df
list_of_dfs = []
for n in range(len(countries)):
var = create_df(countries[n])
list_of_dfs.append(var)
Want I really wanted is to create a data frame with a different name for a different country and to store them in a list or dict like: [df_1, df_2, df_3...]
EDIT:
I'm trying this now:
a_dictionary = {}
for n in range(len(countries)):
a_dictionary["key%s" %n] = create_df(countries[n])
It was suppose to work but still get the same error in the 2nd loop:
APIResponseError: APIError: JSON decoding error (https://api.worldbank.org/v2/en/sources/2/series/AG.PRD.LVSK.XD;AG.YLD.CREL.KG;NY.GDP.MINR.RT.ZS;SP.POP.TOTL.FE.ZS/country/BRL/time/YR1995;YR1996;YR1997;YR1998;YR1999;YR2000;YR2001;YR2002;YR2003;YR2004;YR2005;YR2006;YR2007;YR2008;YR2009;YR2010;YR2011;YR2012;YR2013;YR2014;YR2015;YR2016;YR2017;YR2018;YR2019;YR2020?per_page=1000&page=1&format=json)
UPDATE:
Thanks to notiv I noticed the problema was in "BRA" instead of "BRL".
I'm also putting here a new approach that works as well by creating a master dataframe and then slicing it by country to create the desired dataframes:
df = wb.data.DataFrame(indicators, countries, time, labels = True).reset_index()
columns = [ item for item in df['Series'] ]
columns
df = df.T
df.columns = columns
df.drop(['Series', 'series'], axis= 0, inplace = True)
df = df.reset_index()
df
a_dictionary = {}
for n in range(len(countries)):
new_df = df.loc[: , (df == countries[n]).any()]
new_df['index'] = df['index']
new_df.set_index('index', inplace = True)
new_df.drop(['economy', 'Country'], axis= 0, inplace = True)
a_dictionary["eco_df%s" %n] = new_df
for loop in range(len(countries)):
for n in range(len(a_dictionary[f'eco_df{loop}'].columns)):
sns.set_theme(style="dark")
g = sns.relplot( data= a_dictionary[f'eco_df{loop}'], x= a_dictionary[f'eco_df{loop}'].index, y= a_dictionary[f'eco_df{loop}'].iloc[:,n], kind="line", palette="crest",
height=5, aspect=1.61, legend=False).set(title=countries[loop])
g.set_axis_labels("Years")
g.set_xticklabels(rotation=45)
g.tight_layout()
At the end I used the dataframes to create a chart for each indicator for each country.
Many thanks for the help

How to make Line graph using csv file. with 4 columns. and the year is by decade

I'm a newbie and wanna do a line graph.
I wanna do a line graph of decades on the x axis and # of religion on the y-axis but two lines, one religious schools, and one non-religious.
here is my csv file.
https://drive.google.com/file/d/16XuvoQKSrSMaUPsfHOWY6cBy1ry6UNz6/view?usp=sharing
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('ReligiousRate.csv', dtype='string')
df_religious = df[['Religious', 'founded1']]
df_non_religious = df[['Non-Religious', 'founded2']]
dfs = [df_religious, df_non_religious]
names = ['Religious Schools', 'Non Religious Schools']
counts = []
for df, name in zip(dfs, names):
df.columns = ['Name', 'Founded']
df['Founded'] = pd.to_datetime(df['Founded'], yearfirst=True)
df = df.set_index('Founded')
df_decades = df.resample('10AS').count()
df_decades.columns = [name]
counts.append(df_decades)
sns.set_palette(["#090364", "#ff0000"])
sns.lineplot(data=pd.concat(counts), dashes=False)
plt.show()
Output:

How to plot a spectrum with plotly

I want to plot a spectrum that is given by an array of masses and intensities. For each pair I want to plot a thin line. When I zoom in, the width of the lines should not change. The bar plot does almost what I need.
import plotly.graph_objects as go
fig = go.Figure(data=[go.Bar(
x=df['mz_array'],
y=df['intensity'],
width = 1
)])
fig.show()
However, when I zoom in the bars change their widths.
I modified the dataframe and then used px.line to plot the spectrum:
def plot_spectrum(df, annot_threshold=1e4, threshold=0):
df = df[df.intensity > threshold].reset_index()
df1 = df.copy()
df1['Text'] = df1.mz_array.astype(str)
df1.loc[df1.intensity < annot_threshold, 'Text'] = None
df1.Text.notnull().sum()
df2 = df.copy()
df2['intensity'] = 0
df3 = pd.concat([df1, df2]).sort_values(['index', 'intensity'])
fig = px.line(df3, x='mz_array', y='intensity', color='index', text='Text')
fig.update_layout(showlegend=False)
fig.update_traces(line=dict(width=1, color='grey'))
fig.update_traces(textposition='top center')
return fig

Python pie chart / Show several columns combined

I have dataframe with 2 columns:
Col1- managers' name
Col2 - their profit
I want plot a pie chart where I can show most profitable 5 managers seperately , and others in one slice as 'others'
How about that:
With automatic labeling of the pie pieces using autopct argument.
import pandas as pd
import matplotlib.pyplot as plt
data = {'managers':['mike1','mike2','mike3','mike4','mike5','mike6','mike7'],
'profit':[110,60,40,30,10,5,5],
}
df = pd.DataFrame(data)
df = df.sort_values(by = 'profit', ascending = False)
top_5 = df.iloc[:5]
others = df.iloc[5:]['profit'].sum()
df2 = pd.DataFrame([['others',others]], columns = ['managers','profit'])
all_data = top_5.append(df2, ignore_index=True)
all_data.index = all_data['managers']
#func to lable the pieces
def auto_func(val):
return(round(val))
all_data.plot.pie(y = 'profit', autopct = auto_func)
# ax = plt.gca()
plt.show()

Figures names in Pandas Boxplots

I created 2 boxplots using pandas.
Then each figure gets referenced with plt.gcf()
When trying to show the plots, only the last boxplot gets shown. Its like fig1 is getting overwritten.
What is the correct way of showing both boxplots?
This is the sample code
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dates = pd.date_range('20000101', periods=10)
df = pd.DataFrame(index=dates)
df['A'] = np.cumsum(np.random.randn(10))
df['B'] = np.random.randint(-1,2,size=10)
df['C'] = range(1,11)
df['D'] = range(12,22)
# first figure
ax_boxplt1 = df[['A','B']].boxplot()
fig1 = plt.gcf()
# second figure
ax_boxplt2 = df[['C','D']].boxplot()
fig2 = plt.gcf()
# print figures
figures = [fig1,fig2]
for fig in figures:
print(fig)
Create a figure with two axes and plot to each of them separately
fig, axes = plt.subplots(2)
dates = pd.date_range('20000101', periods=10)
df = pd.DataFrame(index=dates)
df['A'] = np.cumsum(np.random.randn(10))
df['B'] = np.random.randint(-1,2,size=10)
df['C'] = range(1,11)
df['D'] = range(12,22)
# first figure
df[['A','B']].boxplot(ax=axes[0]) # Added `ax` parameter
# second figure
df[['C','D']].boxplot(ax=axes[1]) # Added `ax` parameter
plt.show()
In order to get two figures, define the figure before plotting to it. You can use a number enumerate the figures.
plt.figure(1)
# do something with the first figure
plt.figure(2)
# do something with the second figure
Complete example:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dates = pd.date_range('20000101', periods=10)
df = pd.DataFrame(index=dates)
df['A'] = np.cumsum(np.random.randn(10))
df['B'] = np.random.randint(-1,2,size=10)
df['C'] = range(1,11)
df['D'] = range(12,22)
# first figure
fig1=plt.figure(1)
ax_boxplt1 = df[['A','B']].boxplot()
# second figure
fig2=plt.figure(2)
ax_boxplt2 = df[['C','D']].boxplot()
plt.show()