Merge two GeoJSON into one in a dataframe - pandas

I have a dataframe containing GeoJSON:
data = {'geojson': {0: '{"type":"LineString","coordinates":[[1,4],[2,5]]}',
1: '{"type":"LineString","coordinates":[[3,6],[4,7]]}'},
'checkpoint': {0: 6, 1: 0},'lom_name': {0: 'marathon19', 1: 'marathon19'}}
df = pd.DataFrame.from_dict(data)
The desired result is:
geojson lob_name
{"type":"LineString","coordinates":[[1,4],[2,5],[3,6],[4,7]]} marathon19
I tried df = df.groupby(['geojson']).apply(list) not really giving something i need

You could use the following approach using geopandas dissolve function on geojson data (a non-geometric approach would involve aggregating, concatenating and parsing the df) :
import json
import geopandas as gpd
data = { "type": "FeatureCollection",
"features": [
{ "type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [
[3,6],[4,7]
]
},
"properties": {
"checkpoint": 0,
"prop1": 'marathon19'
}
},
{ "type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [
[1,4],[2,5]
]
},
"properties": {
"checkpoint": 0,
"prop1": 'marathon19'
}
}
]
}
# create geodataframe from geojson-features
gdf = gpd.GeoDataFrame.from_features(data)
# dissolve line strings
gdf = gdf.dissolve()
# convert back to geojson
gdf.to_json()
If the input lines are non-contiguous (like in your example data) you still need to create a linestring out of the resulting multilinestring (see for example and caveats here):
import shapely
# Put the sub-line coordinates into a list of sublists
outcoords = [list(i.coords) for i in gdf.iloc()[0].geometry]
# Flatten the list of sublists and use it to make a new line
outline = shapely.geometry.LineString([i for sublist in outcoords for i in sublist])
# set geometry of gdf to linestring
gdf = gdf.set_geometry([outline])

Related

Groupby value_counts giving keyerror

I am trying to plot countries whose scale has changes over time.
this is the dataset i am using :'https://www.kaggle.com/datasets/whenamancodes/the-global-hunger-index'
wasting = pd.read_csv('/kaggle/input/the-global-hunger-index/share-of-children-with-a-weight-too-low-for-their-height-wasting.csv')
# rename the column
wasting.rename(columns={'Prevalence of wasting, weight for height (% of children under 5)':'Wasting'},inplace=True)
#create new column with pd.cut
bins = [0,9.9,19.99,34.99,49.99,np.inf]
labels = ['Low','Moderate','Serious','Alarming','Extremely Alarming']
wasting['W_Scale'] = pd.cut(wasting['Wasting'],bins=bins,labels=labels,right=False).astype('category')
wasting.head()
wasting.isna().sum()
#selecting countries with w_scale greater than 1
wasting_entity_scale = wasting.groupby('Entity').filter(lambda x: x['W_Scale'].nunique()>1)
wasting_entity_scale = wasting_entity_scale.groupby(['Year','Entity'])['W_Scale'].value_counts().reset_index(name='count')
wasting_entity_scale = wasting_entity_scale[wasting_entity_scale['count']>0]
wasting_entity_scale = wasting_entity_scale.reset_index(drop=True)
#until this point everything is fine.
traces = {}
for i, (loc, d) in enumerate(wasting_entity_scale.groupby("Entity")):
# use meta so that we know which country a trace belongs to
fig = px.histogram(
d, x="Year", y="Entity", color="level_2"
).update_traces(meta=loc, visible=(i == 0))
traces[loc] = fig.data
l = fig.layout
# integrate all the traces
fig = go.Figure([t for a in traces.values() for t in a]).update_layout(l)
# now buuld menu using meta to know which traces should be visible per country
fig.update_layout(
updatemenus=[
{
"active": 0,
"buttons": [
{
"label": c,
"method": "update",
"args": [
{"visible": [t.meta == c for t in fig.data]},
{"title": c},
],
}
for c in traces.keys()
],
}
]
)
when i try to plot it, it shows this error:
KeyError: 'Serious'
Can someone please teach me what is it that i am doing wrong.
Thank you.

Error: Size(XX) must match the product of shape x,x,x,x

This is a newbie question, but any help will be appreciated.
I'm having a problem with a 3D tensor in TensorFlow.JS (node), with the following code:
const tf = require('#tensorflow/tfjs-node');
(async ()=>{
let list = [
{
xs: [
[
[ 0.7910133603149169, 0.7923634491520086, 0.79166712455722, 0.7928027625311359, 0.4426631841175303, 0.018719529693542337 ],
[ 0.7890709817505044, 0.7943561081665688, 0.7915865358198619, 0.7905450669351226, 0.4413258183256521, 0.04449784810703526 ],
[ 0.7940229392692819, 0.7924745639669473, 0.7881395357356101, 0.7880208892359736, 0.40902353356570315, 0.14643954229459097 ],
[ 0.801474878324385, 0.8003822349633881, 0.7969969705961001, 0.7939094034872144, 0.40227041242732126, 0.03893523221469505 ],
[ 0.8022503526561848, 0.8011600386679555, 0.7974621873981194, 0.8011488339557422, 0.43008361179994464, 0.11210020422004835 ],
],
[
[ 0.8034111510684465, 0.7985390234525179, 0.7949321830852709, 0.7943788081438548, 0.5739870761673189, 0.13358267460835263 ],
[ 0.805714476773561, 0.8072996569653942, 0.8040745782073486, 0.8035592212810225, 0.5899031300445114, 0.03229758335964042 ],
[ 0.8103322733081704, 0.8114317495511435, 0.8073606480159334, 0.8057140734135828, 0.5842202187553198, 0.01986941729798157 ],
[ 0.815132106874313, 0.8122641403791668, 0.8104353115275772, 0.8103395749739932, 0.5838313552472632, 0.03332674037143093 ],
[ 0.8118480102237944, 0.8166500561770489, 0.8128943005604122, 0.8147644523703373, 0.601619389872815, 0.04807286626501376 ],
]
],
ys: 1
}
];
const ds = tf.data.generator(async () => {
let index = 0;
return {
next: async () => {
if(index >= list.length) return { done : true };
let doc = list[index];
index++;
return {
value: {
xs : doc.xs,
ys : doc.ys
},
done: false
};
}
};
}).batch(1);
let model = tf.sequential();
model.add(tf.layers.dense({units: 60, activation: 'relu', inputShape: [2, 5, 6]}));
model.compile({
optimizer: tf.train.adam(),
loss: 'sparseCategoricalCrossentropy',
metrics: ['accuracy']
});
await model.fitDataset(ds, {epochs: 1});
return true;
})().then(console.log).catch(console.error);
This code generate the following error:
Error: Size(60) must match the product of shape 1,2,5,60
at Object.inferFromImplicitShape
I didn't understand why the layer is changing the last value of the inputShape from 6 to 60 (which is the expected output units for this layer).
Just to confirm, as far I know the units should be the product of: batchSize * x * y * z, in the example case: 1 * 2 * 5 * 6 = 60
Thank you!
Software specification:
tfjs-node: v1.2.11
Node JS: v11.2.0
OS: Ubuntu 18.04.2
Ok, the problem is that a fully connected layer (ts.layer.dense) expect a tensor1d as input, as described in this other question: Why do we flatten the data before we feed it into tensorflow?
So, to do the trick, the tensor must be re-shaped before the fully connected layer, as:
return {
value: {
xs : ts.reshape(doc.xs, [-1]),
ys : doc.ys
},
done: false
};
Where the -1 in ts.reshape(tensor, [-1]), means to the transformation function flatten the tensor.
For a visual demonstration, here a YouTube video: CNN Flatten Operation Visualized

How do I create a surface plot with matplotlib of a closed loop revolve about an axis given coordinate data of the 2D profile?

I have the closed loop stored as a two column by N row numpy array.
The last row of the array is the same as the first row, implying that it is, indeed, a closed loop.
The number of angular divisions in the rotation (as in, "slices of pie" so to speak) ought be set by a variable called 'angsteps'
The profile in question is plotted in the x-y coordinate plane, and is rotated about the 'x-axis'.
You can find the profile in question plotted here. https://i.imgur.com/yJoKIEp.png
I apologize for the lack of code, but the profile data has so many interdependencies that I can't post the code that generates it without basically taking a shortcut to plugging the github page for it.
a downsampled version of the curve data looks like this.
bulkmat = [[ 5.2 0. ]
[ 0.381 0. ]
[ 0.381 3.164 ]
[ 2. 3.164 ]
[ 2. 4.1 ]
[ 3.78 4.1 ]
[ 3.78 6.477 ]
[ 1.898 6.477 ]
[ 1.898 7. ]
[ 3.18 7. ]
[ 3.18 9.6 ]
[ 1.898 9.6 ]
[ 1.898 9.6 ]
[ 2.31987929 12.42620027]
[ 3.4801454 15.24663923]
[ 5.22074074 17.97407407]
[ 7.38360768 20.521262 ]
[ 9.81068861 22.80096022]
[ 12.34392593 24.72592593]
[ 14.825262 26.20891632]
[ 17.09663923 27.16268861]
[ 19. 27.5 ]
[ 19. 27.5 ]
[ 19.62962963 27.44718793]
[ 20.18518519 27.29972565]
[ 20.66666667 27.07407407]
[ 21.07407407 26.7866941 ]
[ 21.40740741 26.45404664]
[ 21.66666667 26.09259259]
[ 21.85185185 25.71879287]
[ 21.96296296 25.34910837]
[ 22. 25. ]
[ 22. 25. ]
[ 21.12125862 24.17043472]
[ 18.91060645 23.59946824]
[ 15.97201646 22.9218107 ]
[ 12.84280513 21.85346069]
[ 9.96762011 20.14089993]
[ 7.67242798 17.51028807]
[ 6.13850192 13.61665735]
[ 5.37640942 7.99310742]
[ 5.2 0. ]]
The following would be an example of a solid of revolution plotted around the z axis. As input we take some points and then create the necessary 2D arrays from them.
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as axes3d
# input xy coordinates
xy = np.array([[1,0],[2,1],[2,2],[1,1.5],[1,0]])
# radial component is x values of input
r = xy[:,0]
# angular component is one revolution of 60 steps
phi = np.linspace(0, 2*np.pi, 60)
# create grid
R,Phi = np.meshgrid(r,phi)
# transform to cartesian coordinates
X = R*np.cos(Phi)
Y = R*np.sin(Phi)
# Z values are y values, repeated 60 times
Z = np.tile(xy[:,1],len(Y)).reshape(Y.shape)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, projection='3d')
ax2 = fig.add_axes([0.05,0.7,0.15,.2])
ax2.plot(xy[:,0],xy[:,1], color="k")
ax.plot_surface(X, Y, Z, alpha=0.5, color='gold', rstride=1, cstride=1)
plt.show()

Python Pandas How to read json without sorting by the index?

If i have a json file like this, and import it to dataframe, the column order is always sort -0.8, -0.9. i want the order maintain as it is defined in the json which is -0.9, -0.8
{
"-0.90": {
"A": 1.0,
"B": 0.4935585804
},
"-0.80": {
"A": 1.0,
"B": 0.4935585804
}
}
You can load load your json data as an OrderedDict to preserve the order of the keys, and then use the DataFrame.from_dict constructor:
import json
from collections import OrderedDict
s = """{
"-0.90": {
"A": 1.0,
"B": 0.4935585804
},
"-0.80": {
"A": 1.0,
"B": 0.4935585804
}
}"""
data = json.loads(s, object_pairs_hook=OrderedDict)
pd.DataFrame.from_dict(data)
-0.90 -0.80
A 1.000000 1.000000
B 0.493559 0.493559

transposing data in array using numpy

I have list as following and need to be tranposed to a numpy array
samplelist= [ [ ['Name-1','Name-2','Name-3'] , ['Age-1','Age-2','Age-3'] ],
[ ['new_Name_1','new_Name_2','new_Name_3'], ['new_Age_1','new_Age_2','new_Age_3'] ]
]
Expected Result:
samplearray = [ [ ['Name-1','Age-1'], ['Name-2','Age-2'], ['Name-3','Age-3'] ],
[ ['new_Name_1','new_Age_1], ['new_Name_2','new_Age_2'], ['new_Name_3','new_Age_3'] ]
]
np.transpose results:
np.transpose(a)
array([[['Name-1', 'new_Name_1'],
['Age-1', 'new_Age_1']],
[['Name-2', 'new_Name_2'],
['Age-2', 'new_Age_2']],
[['Name-3', 'new_Name_3'],
['Age-3', 'new_Age_3']]],
dtype='|S10')
samplelist is a 3-D array.
In [58]: samplelist.shape
Out[58]: (2, 2, 3)
Using transpose swaps the first and last axes (0 and 2):
In [55]: samplelist.T
Out[55]:
array([[['Name-1', 'new_Name_1'],
['Age-1', 'new_Age_1']],
[['Name-2', 'new_Name_2'],
['Age-2', 'new_Age_2']],
[['Name-3', 'new_Name_3'],
['Age-3', 'new_Age_3']]],
dtype='|S10')
In [57]: samplelist.swapaxes(0,2)
Out[57]:
array([[['Name-1', 'new_Name_1'],
['Age-1', 'new_Age_1']],
[['Name-2', 'new_Name_2'],
['Age-2', 'new_Age_2']],
[['Name-3', 'new_Name_3'],
['Age-3', 'new_Age_3']]],
dtype='|S10')
To get the desired array, swap axes 1 and 2:
import numpy as np
samplelist = np.array([
[ ['Name-1','Name-2','Name-3'] , ['Age-1','Age-2','Age-3'] ],
[ ['new_Name_1','new_Name_2','new_Name_3'], ['new_Age_1','new_Age_2','new_Age_3'] ]
])
print(samplelist.swapaxes(1,2))
# [[['Name-1' 'Age-1']
# ['Name-2' 'Age-2']
# ['Name-3' 'Age-3']]
# [['new_Name_1' 'new_Age_1']
# ['new_Name_2' 'new_Age_2']
# ['new_Name_3' 'new_Age_3']]]