Constructing a pandas DataFrame with columns and sub-columns from dict - pandas

I have a dict of the following form
dict = {
"Lightweight_model_20221103_downscale_1536px_RecOut": {
"CRR": "75.379",
"Sum Time": 33132,
"Sum Detection Time": 18406,
"images": {
"uk_UA_02 (1).jpg": {
"Time": "877",
"Time_detection": "469"
},
"uk_UA_02 (10).jpg": {
"Time": "914",
"Time_detection": "323"
},
"uk_UA_02 (11).jpg": {
"Time": "1169",
"Time_detection": "428"
},
"uk_UA_02 (12).jpg": {
"Time": "881",
"Time_detection": "371"
},
"uk_UA_02 (13).jpg": {
"Time": "892",
"Time_detection": "335"
}
}
},
"Lightweight_model_20221208_RecOut": {
"CRR": "71.628",
"Sum Time": 41209,
"Sum Detection Time": 25301,
"images": {
"uk_UA_02 (1).jpg": {
"Time": "916",
"Time_detection": "573"
},
"uk_UA_02 (10).jpg": {
"Time": "927",
"Time_detection": "442"
},
"uk_UA_02 (11).jpg": {
"Time": "1150",
"Time_detection": "513"
},
"uk_UA_02 (12).jpg": {
"Time": "1126",
"Time_detection": "531"
},
"uk_UA_02 (13).jpg": {
"Time": "921",
"Time_detection": "462"
}
}
}
}
and I want to make DataFrame with sub-columns in output like on image
[![enter image description here][1]][1]
but I don't understand how to open subdicts in ['images']
when I use code
df = pd.DataFrame.from_dict(dict, orient='index')
df_full = pd.concat([df.drop(['images'], axis=1), df['images'].apply(pd.Series)], axis=1)
receive dictionaries in columns whit filenames
[![result][2]][2]
how to open nested dicts and convert them to sub-columns
[1]: https://i.stack.imgur.com/hGrKo.png
[2]: https://i.stack.imgur.com/8LlUW.png

Here is one way to do it with the help of Pandas json_normalize, MultiIndex.from_product, and concat methods:
import pandas as pd
df = pd.DataFrame.from_dict(dict, orient='index')
# Save first columns and add a second empty level header
tmp = df[["CRR", "Sum Time", "Sum Detection Time"]]
tmp.columns = [tmp.columns, ["", "", ""]]
dfs= [tmp]
# Process "images" column
df = pd.DataFrame.from_dict(df["images"].to_dict(), orient='index')
# Create new second level column header for each column in df
for col in df.columns:
tmp = pd.json_normalize(df[col])
tmp.index = df.index
tmp.columns = pd.MultiIndex.from_product([[col], tmp.columns])
dfs.append(tmp)
# Concat everything in a new dataframe
new_df = pd.concat(dfs, axis=1)
Then:
print(new_df)
Outputs:

Related

Plotly Animation with slider

I want to add two moving points represent the location of two trains according to the day. My day data is as shown in pic starting from 0 to 7. However, in the resulting animation, the slider does not slide into the integer day. It jumped from 1.75 to 2.25 or 2.75 to 3.25 automatically. Can anyone help me to solve that?
trainpath info
import plotly.graph_objects as go
import pandas as pd
dataset = pd.read_csv('trainpath.csv')
days = []
for k in range(len(dataset['day'])):
if dataset['day'][k] not in days:
days.append(dataset['day'][k])
t1 = [-1, 0, 1, 1, 1, 0, -1, -1, -1]
k1 = [-20, -20, -20, 0, 20, 20, 20, 0, -20]
# make list of trains
trains = []
for train in dataset["train"]:
if train not in trains:
trains.append(train)
# make figure
fig_dict = {
"data": [go.Scatter(x=t1, y=k1,
mode="lines",
line=dict(width=2, color="blue")),
go.Scatter(x=t1, y=k1,
mode="lines",
line=dict(width=2, color="blue"))],
"layout": {},
"frames": []
}
# fill in most of layout
fig_dict['layout']['title'] = {'text':'Train Animation'}
fig_dict["layout"]["xaxis"] = {"range": [-10, 10], "title": "xlocation", 'autorange':False, 'zeroline':False}
fig_dict["layout"]["yaxis"] = {"range": [-22, 22], "title": "ylocation", 'autorange':False, 'zeroline':False}
fig_dict["layout"]["hovermode"] = "closest"
fig_dict["layout"]["updatemenus"] = [
{
"buttons": [
{
"args": [None, {"frame": {"duration": 500, "redraw": False},
"fromcurrent": True, "transition": {"duration": 300,
"easing": "quadratic-in-out"}}],
"label": "Play",
"method": "animate"
},
{
"args": [[None], {"frame": {"duration": 0, "redraw": False},
"mode": "immediate",
"transition": {"duration": 0}}],
"label": "Pause",
"method": "animate"
}
],
"direction": "left",
"pad": {"r": 10, "t": 87},
"showactive": False,
"type": "buttons",
"x": 0.1,
"xanchor": "right",
"y": 0,
"yanchor": "top"
}
]
sliders_dict = {
"active": 0,
"yanchor": "top",
"xanchor": "left",
"currentvalue": {
"font": {"size": 20},
"prefix": "Day:",
"visible": True,
"xanchor": "right"
},
"transition": {"duration": 300, "easing": "cubic-in-out"},
"pad": {"b": 10, "t": 50},
"len": 0.9,
"x": 0.1,
"y": 0,
"steps": []
}
# make data
day = 0
for train in trains:
dataset_by_date = dataset[dataset['day']==day]
dataset_by_date_and_train = dataset_by_date[dataset_by_date['train']==train]
data_dict = {
'x': list(dataset_by_date_and_train['x']),
'y': list(dataset_by_date_and_train['y']),
'mode': 'markers',
'text': train,
'marker': {
'sizemode': 'area',
'sizeref': 20,
'size': 20,
# 'size': list(dataset_by_date_and_train['quantity']) # this section can be used to increase or decrease the marker size to reflect the material quantity
},
'name': train
}
fig_dict['data'].append(data_dict)
# make frames
for day in days:
frame={'data': [go.Scatter(x=t1, y=k1,
mode="lines",
line=dict(width=2, color="blue")),
go.Scatter(x=t1, y=k1,
mode="lines",
line=dict(width=2, color="blue"))], 'name':str(day)}
for train in trains:
dataset_by_date = dataset[dataset['day'] == day]
dataset_by_date_and_train = dataset_by_date[dataset_by_date['train'] == train]
data_dict = {
'x': list(dataset_by_date_and_train['x']),
'y': list(dataset_by_date_and_train['y']),
'mode': 'markers',
'text': train,
'marker': {
'sizemode': 'area',
'sizeref': 20,
'size': 20,
# 'size': list(dataset_by_date_and_train['quantity']) # this section can be used to increase or decrease the marker size to reflect the material quantity
},
'name': train
}
frame['data'].append(data_dict)
fig_dict['frames'].append(frame)
slider_step = {'args': [
[day],
{'frame': {'duration':300, 'redraw':False},
'mode': 'immediate',
'transition': {'duration':3000}}
],
'label': day,
'method': 'animate'}
sliders_dict["steps"].append(slider_step)
if day == 7:
print('H')
fig_dict["layout"]["sliders"] = [sliders_dict]
fig = go.Figure(fig_dict)
fig.show()

prevent text from overlapping data points and other text

I'm trying to find an intelligent solution to how text / annotations are placed into a matplotlib plt so they don't over lap with the data point being annotated. Code snip below. Apologies for long dict at the top. So far I've found adjustText which looks very promising, but I can't seem to get it working in this instance. The code below uses adjust_text(), but at the moment all text is being placed together in one part of the ax and I don't understand why. If you run without adjust_text() it places text roughly where it should be, but text is overlapping the data point in places, which I want to avoid. Grateful for any help.
fig, ax = plt.subplots(figsize=(10, 8))
dl_data = {
"Center": {
"axis": (0, 0),
"tp": (0, 0),
"r": 21.37311395187889,
"colour": "#ffffff",
"text": "Center",
"fill": "solid",
"ec": "#808080",
"alignment": ("center", "center"),
},
"First": {
"r": 6.758772077825972,
"wlc": 45.681000000000004,
"text": "First",
"colour": "#FFFFFF",
"fill": "dashed",
"ec": "#808080",
"alignment": ("center", "center"),
"axis": (-68.82111180215705, -1.2642233142341064e-14),
"tp": (-68.82111180215705, -1.2642233142341064e-14),
},
"Second": {
"r": 18.979199140111263,
"wlc": 360.21000000000004,
"text": "Second",
"colour": "#FFFFFF",
"fill": "dashed",
"ec": "#808080",
"alignment": ("center", "center"),
"axis": (-34.41055590107855, 59.600831137357034),
"tp": (-34.41055590107855, 59.600831137357034),
},
"P1": {
"r": 4.779173568725037,
"wlc": 2.6,
"colour": "#92a700",
"text": "P1, £3",
"fill": "solid",
"ec": "#92a700",
"axis": (-80.83697480558055, -1.4849511367261418e-14),
"alignment": ("right", "top"),
"tp": (-87.6161483743056, -1.6094825349031936e-14),
},
"P2": {
"r": 4.779173568725037,
"wlc": 0,
"colour": "#ffba00",
"text": "P2 has a long\nName, £0\n\n",
"fill": "solid",
"ec": "#ffba00",
"axis": (-13.031791598544089, 30.17548646933409),
"alignment": ("left", "top"),
"tp": (-9.047093352116576, 24.691019844418072),
},
"P3": {
"r": 4.779173568725037,
"wlc": 0.21,
"colour": "#92a700",
"text": "P3 has a very,\nlong long long,\nname, £0 \n",
"fill": "solid",
"ec": "#92a700",
"axis": (-55.78932020361301, 30.175486469334082),
"alignment": ("right", "top"),
"tp": (-59.77401845004052, 24.691019844418065),
},
"P4": {
"r": 15.811388300841896,
"wlc": 250,
"colour": "#e77200",
"text": "P4 also\nhas a longish\nname, £250\n",
"fill": "solid",
"ec": "#e77200",
"axis": (-34.41055590107855, 95.97255740839438),
"alignment": ("center", "center"),
"tp": (-34.41055590107855, 113.78394570923628),
},
"P5": {
"r": 4.779173568725037,
"wlc": 6.6,
"colour": "#92a700",
"text": "P5 is medium,\n£7\n\n",
"fill": "solid",
"ec": "#92a700",
"axis": (-69.00212318005225, 70.8403126698613),
"alignment": ("right", "top"),
"tp": (-75.44950037768407, 72.9351925104148),
},
"P6": {
"r": 10.16857905510893,
"wlc": 103.4,
"colour": "#92a700",
"text": "P6 is a very long name\nlike P4 is also,\n£100\n",
"fill": "solid",
"ec": "#92a700",
"axis": (0.181011377895139, 70.8403126698613),
"alignment": ("left", "top"),
"tp": (11.754017782309209, 74.600610395285),
},
}
ts = []
x_list = []
y_list = []
for c in dl_data.keys():
circle = plt.Circle(
dl_data[c]["axis"], # x, y position
radius=dl_data[c]["r"],
fc=dl_data[c]["colour"], # face colour
ec=dl_data[c]["ec"], # edge colour
zorder=2,
)
ax.add_patch(circle)
x = dl_data[c]["axis"][0]
y = dl_data[c]["axis"][1]
text = dl_data[c]["text"]
if c in ["Center", "First", "Second"]:
pass
else:
ts.append(ax.text(x, y, dl_data[c]["text"]))
x_list.append(x)
y_list.append(y)
adjust_text(
ts,
x=x_list,
y=y_list,
force_points=0.1,
arrowprops=dict(arrowstyle="->", color="red"),
)
plt.axis("scaled")
plt.axis("off")
plt.show()
There are two issues:
adjust_text must called after all drawing is completed, i.e. plt.axis("scaled") must come before adjust_text, see docs:
Call adjust_text the very last, after all plotting (especially
anything that can change the axes limits) has been done.
You must pass your circles as additional objects to be avoided: add_objects=objects
ts = []
x_list = []
y_list = []
objects = []
for c in dl_data.keys():
circle = plt.Circle(
dl_data[c]["axis"], # x, y position
radius=dl_data[c]["r"],
fc=dl_data[c]["colour"], # face colour
ec=dl_data[c]["ec"], # edge colour
zorder=2,
)
objects.append(circle)
ax.add_patch(circle)
x = dl_data[c]["axis"][0]
y = dl_data[c]["axis"][1]
text = dl_data[c]["text"]
if c in ["Center", "First", "Second"]:
pass
else:
ts.append(ax.text(x, y, dl_data[c]["text"].strip()))
x_list.append(x)
y_list.append(y)
plt.axis("scaled")
plt.axis("off")
adjust_text(
ts,
add_objects=objects,
arrowprops=dict(arrowstyle="->", color="red"),
)
I couldn't manage to move the P6 text away from the green and orange circles, though.

tf.dataSync() does not return tensor from BlazeFaceModel in a readable form

I am using BlazeFaceModel to detect faces before sending the faces to another model using Tensorflow.js
When I am using a custom model and trying to get the tensor output I used the code below and it worked at returning the tensors.
const returnTensors = true;
const faces = await blazeModel.estimateFaces(tensor, returnTensors);
if (faces !== null) {
// Download the tensors to view the shape
const face = faces.dataSync();
face.forEach((pred, i) => {
console.log(`x: ${i}, pred: ${pred}`);
});
}
But it throws the following error when applying on the tensor output from BlazeFaceModel:
faces.dataSync is not a function. (In 'faces.dataSync()', 'faces.dataSync' is undefined)
Output from console.log(faces)
Array [
Object {
"bottomRight": Tensor {
"dataId": Object {},
"dtype": "float32",
"id": 60793,
"isDisposedInternal": false,
"kept": false,
"rankType": "1",
"scopeId": 116528,
"shape": Array [
2,
],
"size": 2,
"strides": Array [],
},
"landmarks": Tensor {
"dataId": Object {},
"dtype": "float32",
"id": 60795,
"isDisposedInternal": false,
"kept": false,
"rankType": "2",
"scopeId": 116532,
"shape": Array [
6,
2,
],
"size": 12,
"strides": Array [
2,
],
},
"probability": Tensor {
"dataId": Object {},
"dtype": "float32",
"id": 60785,
"isDisposedInternal": false,
"kept": false,
"rankType": "1",
"scopeId": 116495,
"shape": Array [
1,
],
"size": 1,
"strides": Array [],
},
"topLeft": Tensor {
"dataId": Object {},
"dtype": "float32",
"id": 60792,
"isDisposedInternal": false,
"kept": false,
"rankType": "1",
"scopeId": 116526,
"shape": Array [
2,
],
"size": 2,
"strides": Array [],
},
},
]
faces is not a tensor. It is an array of json with key values where the values are tensor. If you would like to get all the tensors at once in an array, Object.values(faces[0]) can be used
tensors = Object.values(faces[0]) // array of tensor
tensors.map(t => t.dataSync()) // download the value of the tensor to a js array
// alternatively they can all be converted to a big tensor before using only once dataSync()

Filtering down a Karate test response object to get a sub-list?

Given this feature file:
Feature: test
Scenario: filter response
* def response =
"""
[
{
"a": "a",
"b": "a",
"c": "a",
},
{
"d": "ab",
"e": "ab",
"f": "ab",
},
{
"g": "ac",
"h": "ac",
"i": "ac",
}
]
"""
* match response[1] contains { e: 'ab' }
How can I filter the response down so that it is equal to:
{
"d": "ab",
"e": "ab",
"f": "ab",
}
Is there a built-in way to do this? In the same way as you can filter a List using a Java stream?
Sample code:
Feature: test
Scenario: filter response
* def response =
"""
[
{
"a": "a",
"b": "a",
"c": "a",
},
{
"d": "ab",
"e": "ab",
"f": "ab",
},
{
"g": "ac",
"h": "ac",
"i": "ac",
}
]
"""
* def filt = function(x){ return x.e == 'ab' }
* def items = get response[*]
* def res = karate.filter(items, filt)
* print res

C3JS Acces value shown on X axis

I have simple bar chart like this:
Here is my C3JS
var chart = c3.generate({
data: {
json:[{"A": 67, "B": 10, "site": "Google", "C": 12}, {"A": 10, "B": 20, "site": "Amazon", "C": 12}, {"A": 25, "B": 10, "site": "Stackoverflow", "C": 8}, {"A": 20, "B": 22, "site": "Yahoo", "C": 12}, {"A": 76, "B": 30, "site": "eBay", "C": 9}],
mimeType: 'json',
keys: {
x: 'site',
value: ['A','B','C']
},
type: 'bar',
selection: {
enabled: true
},
onselected: function(d,element)
{
alert('selected x: '+chart.selected()[0].x+' value: '+chart.selected()[0].value+' name: '+chart.selected()[0].name);
},
groups: [
['A','B','C']
]
},
axis: {
x: {
type: 'category'
}
}
});
After some chart elemnt is selected (clicked), alert shows X and Value and Name attributes of first selected element. For example "selected x: 0 value: 67 name: A" after I click on left-top chart element. How can I get value shown on X axis? In this case it is "Google".
Property categories is populated when the x-axis is declared to be of type category as it is in this case. So to get the data from the x-axis you needs to call the .categories() function.
onselected: function(d,element){alert(chart.categories()[d.index]);}
https://jsfiddle.net/4bos2qzx/1/