Python program Convert middle digits into its corresponding alphabets form in between start position and end position - python-3.8

take string and two integers from user and convert middle digits into its corresponding alphabetical form in between start and end position
def convert_digits( input_string, start_position, end_position ) :
new_string = input_string[:start_position]
digit_mapping = {
'0': 'ZERO',
'1': 'ONE',
'2': 'TWO',
'3': 'THREE',
'4': 'FOUR',
'5': 'FIVE',
'6': 'SIX',
'7': 'SEVEN',
'8': 'EIGHT',
'9': 'NINE'
}
for index in range(start_position, end_position):
if input_string[index].isdigit():
mapped = digit_mapping[input_string[index]]
new_string = mapped
new_string += input_string[end_position + 1:]
return new_string

def convert_digits(input_string, start_position, end_position):
len_str = len(input_string)
if end_position > len_str:
end_position = len_str
if start_position < 0:
start_position = 0
elif start_position >= end_position:
start_position = end_position - 1
input_string = input_string[start_position:end_position]
digit_mapping = {
'0': 'ZERO',
'1': 'ONE',
'2': 'TWO',
'3': 'THREE',
'4': 'FOUR',
'5': 'FIVE',
'6': 'SIX',
'7': 'SEVEN',
'8': 'EIGHT',
'9': 'NINE'
}
input_string = list(input_string)
for index, char in enumerate(input_string):
if char.isdigit():
input_string[index] = digit_mapping[char]
return "".join(input_string)
print(convert_digits("ajoegh12ioh12oih", 0, 14))

Related

How to replace column values using na.replace() method while using more than one dictionary?

While replacing values of a column in a df using replace method how can we make use of the dictionary to do the same. There are 5 dictionaries that have to get applied to a dataframe. I am having problems with the syntax of doing this in one step. This dataset will be growing so i am trying to find the most efficent way to chain the replace method or create a list array with the column names.
ab_normaldict= { '0': 'Normal' , '1': 'Abnormal' , '999': 'Not Done'}
ethnicitydict = { '1': 'Hispanic or Latino' , '2': 'Not Hispanic or Latino' , '3':' Unknown'}
racedict = { '1': 'American Indian or Alaska Native' , '2':'Asian' , '3': 'Black or African American' , '4': 'Native Hawaiian or Other Pacific Islander' , '5': 'White' , '7': 'More than one race' , '6': 'Unknown/Other'}
sexdict = { '1': 'Female' , '2': 'Male' , '888': 'Other' , '999': 'Unknown'}
df1= spark.createDataFrame([
(person1, "0", "1", "2", "1"),
(person2, "1", "2", "1", "2"),
(person3, "999", "2", "3" , "1" ),
(person4,'Null', "1", "6", "1")])\
.toDF("id", "abnormal", "ethnicity, "racedict", "sex")
I saw that the syntax is:
df1.na.replace(to_replace= ab_normaldict,'abnormal')
df2=df1
df2.na.replace(to_replace=sexdict, 'sex')
but i need something like below so i don't have to keep creating a new dataframe
df1.na.replace(to_replace= ab_normaldict,'abnormal').na.replace(to_replace=sexdict, 'sex')```
Your code works just fine with me (except the positional argument)
(df1
.na.replace(ab_normaldict, 'abnormal')
.na.replace(sexdict, 'sex')
.show()
)
+---+--------+---------+--------+--------+
| id|abnormal|ethnicity|racedict| sex|
+---+--------+---------+--------+--------+
| 1| Normal| Abnormal| Male|Abnormal|
| 2|Abnormal| Male|Abnormal| Male|
| 3|Not Done| Male| 3|Abnormal|
| 4| Null| Abnormal| 6|Abnormal|
+---+--------+---------+--------+--------+

Showing only none in output instead of changing numeric value to capitilized alphabets

def convert_digits(input_string, start_position, end_position):
# The ending index was required as it was not returning the whole sentence
new_string = input_string[:end_position]
newstring = " "
# return new_string
digit_mapping = {
'0': 'ZERO',
'1': 'ONE',
'2': 'TWO',
'3': 'THREE',
'4': 'FOUR',
'5': 'FIVE',
'6': 'SIX',
'7': 'SEVEN',
'8': 'EIGHT',
'9': 'NINE'
}
if start_position >= 1:
if end_position <= len(new_string):
if start_position < end_position:
for index in range(start_position - 1, end_position):
if input_string[index].isdigit():
mapped = digit_mapping[input_string[index]]
newstring += " " + mapped + " "
else:
newstring += input_string[index]
else:
return "INVALID"
else:
return "INVALID"
else:
return "INVALID"
return newstring
if name == 'main':
print(convert_digits("you are a 4king 5shole", 1, 21))
Use this code.
Your problem was in line 39, you add 2 tabs place 1.
def convert_digits(input_string, start_position, end_position):
# The ending index was required as it was not returning the whole sentence
new_string = input_string[:end_position]
newstring = " "
# return new_string
digit_mapping = {
'0': 'ZERO',
'1': 'ONE',
'2': 'TWO',
'3': 'THREE',
'4': 'FOUR',
'5': 'FIVE',
'6': 'SIX',
'7': 'SEVEN',
'8': 'EIGHT',
'9': 'NINE'
}
if start_position >= 1:
if end_position <= len(new_string):
if start_position < end_position:
for index in range(start_position - 1, end_position):
if input_string[index].isdigit():
mapped = digit_mapping[input_string[index]]
newstring += " " + mapped + " "
else:
newstring += input_string[index]
else:
return "INVALID"
else:
return "INVALID"
else:
return "INVALID"
return newstring
if __name__ == '__main__':
print(convert_digits("you are a 4king 5shole", 1, 21))

Converting dataframe to dictionary in pyspark without using pandas

Following up this question and dataframes, I am trying to convert a dataframe into a dictionary. In pandas I was using this:
dictionary = df_2.unstack().to_dict(orient='index')
However, I need to convert this code to pyspark. Can anyone help me with this? As I understand from previous questions such as this I would indeed need to use pandas, but the dataframe is way too big for me to be able to do this. How can I solve this?
EDIT:
I have now tried the following approach:
dictionary_list = map(lambda row: row.asDict(), df_2.collect())
dictionary = {age['age']: age for age in dictionary_list}
(reference) but it is not yielding what it is supposed to.
In pandas, what I was obtaining was the following:
df2 is the dataframe from the previous post. You can do a pivot first, and then convert to dictionary as described in your linked post.
import pyspark.sql.functions as F
df3 = df2.groupBy('age').pivot('siblings').agg(F.first('count'))
list_persons = [row.asDict() for row in df3.collect()]
dict_persons = {person['age']: person for person in list_persons}
{15: {'age': 15, '0': 1.0, '1': None, '3': None}, 10: {'age': 10, '0': None, '1': None, '3': 1.0}, 14: {'age': 14, '0': None, '1': 1.0, '3': None}}
Or another way:
df4 = df3.fillna(float('nan')).groupBy().pivot('age').agg(F.first(F.struct(*df3.columns[1:])))
result_dict = eval(df4.select(F.to_json(F.struct(*df4.columns))).head()[0])
{'10': {'0': 'NaN', '1': 'NaN', '3': 1.0}, '14': {'0': 'NaN', '1': 1.0, '3': 'NaN'}, '15': {'0': 1.0, '1': 'NaN', '3': 'NaN'}}

Pandas extract value from a key-value pair

I have a Datafrmae with output as shown below, I am trying to extract specific text
id,value
101,*sample value as shown below*
I am trying to extract the value corresponding to key in this text
Expected output
id, key, id_new
101,Ticket-123, 1001
Given below is how the data looks like:
{
'fields': {
'status': {
'statusCategory': {
'colorName': 'yellow',
'name': 'In Progress',
'key': 'indeterminate',
'id': 4
},
'description': '',
'id': '11000',
'name': 'In Progress'
},
'summary': 'Sample Text'
},
'key': 'Ticket-123',
'id': '1001'
}
Use Series.str.get:
df['key'] = df['value'].str.get('key')
df['id_new'] = df['value'].str.get('id')
print (df)
id value key id_new
0 101 {'fields': {'status': {'statusCategory': {'col... Ticket-123 1001
Tested Dataframe:
v = {
'fields': {
'status': {
'statusCategory': {
'colorName': 'yellow',
'name': 'In Progress',
'key': 'indeterminate',
'id': 4
},
'description': '',
'id': '11000',
'name': 'In Progress'
},
'summary': 'Sample Text'
},
'key': 'Ticket-123',
'id': '1001'
}
df = pd.DataFrame({'id':101, 'value':[v]})

pandas same attribute comparison

I have the following dataframe:
df = pd.DataFrame([{'name': 'a', 'label': 'false', 'score': 10},
{'name': 'a', 'label': 'true', 'score': 8},
{'name': 'c', 'label': 'false', 'score': 10},
{'name': 'c', 'label': 'true', 'score': 4},
{'name': 'd', 'label': 'false', 'score': 10},
{'name': 'd', 'label': 'true', 'score': 6},
])
I want to return names that have the "false" label score value higher than the score value of the "true" label with at least the double. In my example, it should return only the "c" name.
First you can pivot the data, and look at the ratio, filter what you want:
new_df = df.pivot(index='name',columns='label', values='score')
new_df[new_df['false'].div(new_df['true']).gt(2)]
output:
label false true
name
c 10 4
If you only want the label, you can do:
new_df.index[new_df['false'].div(new_df['true']).gt(2)].values
which gives
array(['c'], dtype=object)
Update: Since your data is result of orig_df.groupby().count(), you could instead do:
orig_df['label'].eq('true').groupby('name').mean()
and look at the rows with values <= 1/3.