Merge two GeoJSON into one in a dataframe

Merge two GeoJSON into one in a dataframe - pandas

I have a dataframe containing GeoJSON:
data = {'geojson': {0: '{"type":"LineString","coordinates":[[1,4],[2,5]]}',
1: '{"type":"LineString","coordinates":[[3,6],[4,7]]}'},
'checkpoint': {0: 6, 1: 0},'lom_name': {0: 'marathon19', 1: 'marathon19'}}
df = pd.DataFrame.from_dict(data)
The desired result is:
geojson lob_name
{"type":"LineString","coordinates":[[1,4],[2,5],[3,6],[4,7]]} marathon19
I tried df = df.groupby(['geojson']).apply(list) not really giving something i need

You could use the following approach using geopandas dissolve function on geojson data (a non-geometric approach would involve aggregating, concatenating and parsing the df) :
import json
import geopandas as gpd
data = { "type": "FeatureCollection",
"features": [
{ "type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [
[3,6],[4,7]
]
},
"properties": {
"checkpoint": 0,
"prop1": 'marathon19'
}
},
{ "type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [
[1,4],[2,5]
]
},
"properties": {
"checkpoint": 0,
"prop1": 'marathon19'
}
}
]
}
# create geodataframe from geojson-features
gdf = gpd.GeoDataFrame.from_features(data)
# dissolve line strings
gdf = gdf.dissolve()
# convert back to geojson
gdf.to_json()
If the input lines are non-contiguous (like in your example data) you still need to create a linestring out of the resulting multilinestring (see for example and caveats here):
import shapely
# Put the sub-line coordinates into a list of sublists
outcoords = [list(i.coords) for i in gdf.iloc()[0].geometry]
# Flatten the list of sublists and use it to make a new line
outline = shapely.geometry.LineString([i for sublist in outcoords for i in sublist])
# set geometry of gdf to linestring
gdf = gdf.set_geometry([outline])

Related

Groupby value_counts giving keyerror

I am trying to plot countries whose scale has changes over time.
this is the dataset i am using :'https://www.kaggle.com/datasets/whenamancodes/the-global-hunger-index'
wasting = pd.read_csv('/kaggle/input/the-global-hunger-index/share-of-children-with-a-weight-too-low-for-their-height-wasting.csv')
# rename the column
wasting.rename(columns={'Prevalence of wasting, weight for height (% of children under 5)':'Wasting'},inplace=True)
#create new column with pd.cut
bins = [0,9.9,19.99,34.99,49.99,np.inf]
labels = ['Low','Moderate','Serious','Alarming','Extremely Alarming']
wasting['W_Scale'] = pd.cut(wasting['Wasting'],bins=bins,labels=labels,right=False).astype('category')
wasting.head()
wasting.isna().sum()
#selecting countries with w_scale greater than 1
wasting_entity_scale = wasting.groupby('Entity').filter(lambda x: x['W_Scale'].nunique()>1)
wasting_entity_scale = wasting_entity_scale.groupby(['Year','Entity'])['W_Scale'].value_counts().reset_index(name='count')
wasting_entity_scale = wasting_entity_scale[wasting_entity_scale['count']>0]
wasting_entity_scale = wasting_entity_scale.reset_index(drop=True)
#until this point everything is fine.
traces = {}
for i, (loc, d) in enumerate(wasting_entity_scale.groupby("Entity")):
# use meta so that we know which country a trace belongs to
fig = px.histogram(
d, x="Year", y="Entity", color="level_2"
).update_traces(meta=loc, visible=(i == 0))
traces[loc] = fig.data
l = fig.layout
# integrate all the traces
fig = go.Figure([t for a in traces.values() for t in a]).update_layout(l)
# now buuld menu using meta to know which traces should be visible per country
fig.update_layout(
updatemenus=[
{
"active": 0,
"buttons": [
{
"label": c,
"method": "update",
"args": [
{"visible": [t.meta == c for t in fig.data]},
{"title": c},
],
}
for c in traces.keys()
],
}
]
)
when i try to plot it, it shows this error:
KeyError: 'Serious'
Can someone please teach me what is it that i am doing wrong.
Thank you.

Error: Size(XX) must match the product of shape x,x,x,x

This is a newbie question, but any help will be appreciated.
I'm having a problem with a 3D tensor in TensorFlow.JS (node), with the following code:
const tf = require('#tensorflow/tfjs-node');
(async ()=>{
let list = [
{
xs: [
[
[ 0.7910133603149169, 0.7923634491520086, 0.79166712455722, 0.7928027625311359, 0.4426631841175303, 0.018719529693542337 ],
[ 0.7890709817505044, 0.7943561081665688, 0.7915865358198619, 0.7905450669351226, 0.4413258183256521, 0.04449784810703526 ],
[ 0.7940229392692819, 0.7924745639669473, 0.7881395357356101, 0.7880208892359736, 0.40902353356570315, 0.14643954229459097 ],
[ 0.801474878324385, 0.8003822349633881, 0.7969969705961001, 0.7939094034872144, 0.40227041242732126, 0.03893523221469505 ],
[ 0.8022503526561848, 0.8011600386679555, 0.7974621873981194, 0.8011488339557422, 0.43008361179994464, 0.11210020422004835 ],
],
[
[ 0.8034111510684465, 0.7985390234525179, 0.7949321830852709, 0.7943788081438548, 0.5739870761673189, 0.13358267460835263 ],
[ 0.805714476773561, 0.8072996569653942, 0.8040745782073486, 0.8035592212810225, 0.5899031300445114, 0.03229758335964042 ],
[ 0.8103322733081704, 0.8114317495511435, 0.8073606480159334, 0.8057140734135828, 0.5842202187553198, 0.01986941729798157 ],
[ 0.815132106874313, 0.8122641403791668, 0.8104353115275772, 0.8103395749739932, 0.5838313552472632, 0.03332674037143093 ],
[ 0.8118480102237944, 0.8166500561770489, 0.8128943005604122, 0.8147644523703373, 0.601619389872815, 0.04807286626501376 ],
]
],
ys: 1
}
];
const ds = tf.data.generator(async () => {
let index = 0;
return {
next: async () => {
if(index >= list.length) return { done : true };
let doc = list[index];
index++;
return {
value: {
xs : doc.xs,
ys : doc.ys
},
done: false
};
}
};
}).batch(1);
let model = tf.sequential();
model.add(tf.layers.dense({units: 60, activation: 'relu', inputShape: [2, 5, 6]}));
model.compile({
optimizer: tf.train.adam(),
loss: 'sparseCategoricalCrossentropy',
metrics: ['accuracy']
});
await model.fitDataset(ds, {epochs: 1});
return true;
})().then(console.log).catch(console.error);
This code generate the following error:
Error: Size(60) must match the product of shape 1,2,5,60
at Object.inferFromImplicitShape
I didn't understand why the layer is changing the last value of the inputShape from 6 to 60 (which is the expected output units for this layer).
Just to confirm, as far I know the units should be the product of: batchSize * x * y * z, in the example case: 1 * 2 * 5 * 6 = 60
Thank you!
Software specification:
tfjs-node: v1.2.11
Node JS: v11.2.0
OS: Ubuntu 18.04.2

Ok, the problem is that a fully connected layer (ts.layer.dense) expect a tensor1d as input, as described in this other question: Why do we flatten the data before we feed it into tensorflow?
So, to do the trick, the tensor must be re-shaped before the fully connected layer, as:
return {
value: {
xs : ts.reshape(doc.xs, [-1]),
ys : doc.ys
},
done: false
};
Where the -1 in ts.reshape(tensor, [-1]), means to the transformation function flatten the tensor.
For a visual demonstration, here a YouTube video: CNN Flatten Operation Visualized

How do I create a surface plot with matplotlib of a closed loop revolve about an axis given coordinate data of the 2D profile?

I have the closed loop stored as a two column by N row numpy array.
The last row of the array is the same as the first row, implying that it is, indeed, a closed loop.
The number of angular divisions in the rotation (as in, "slices of pie" so to speak) ought be set by a variable called 'angsteps'
The profile in question is plotted in the x-y coordinate plane, and is rotated about the 'x-axis'.
You can find the profile in question plotted here. https://i.imgur.com/yJoKIEp.png
I apologize for the lack of code, but the profile data has so many interdependencies that I can't post the code that generates it without basically taking a shortcut to plugging the github page for it.
a downsampled version of the curve data looks like this.
bulkmat = [[ 5.2 0. ]
[ 0.381 0. ]
[ 0.381 3.164 ]
[ 2. 3.164 ]
[ 2. 4.1 ]
[ 3.78 4.1 ]
[ 3.78 6.477 ]
[ 1.898 6.477 ]
[ 1.898 7. ]
[ 3.18 7. ]
[ 3.18 9.6 ]
[ 1.898 9.6 ]
[ 1.898 9.6 ]
[ 2.31987929 12.42620027]
[ 3.4801454 15.24663923]
[ 5.22074074 17.97407407]
[ 7.38360768 20.521262 ]
[ 9.81068861 22.80096022]
[ 12.34392593 24.72592593]
[ 14.825262 26.20891632]
[ 17.09663923 27.16268861]
[ 19. 27.5 ]
[ 19. 27.5 ]
[ 19.62962963 27.44718793]
[ 20.18518519 27.29972565]
[ 20.66666667 27.07407407]
[ 21.07407407 26.7866941 ]
[ 21.40740741 26.45404664]
[ 21.66666667 26.09259259]
[ 21.85185185 25.71879287]
[ 21.96296296 25.34910837]
[ 22. 25. ]
[ 22. 25. ]
[ 21.12125862 24.17043472]
[ 18.91060645 23.59946824]
[ 15.97201646 22.9218107 ]
[ 12.84280513 21.85346069]
[ 9.96762011 20.14089993]
[ 7.67242798 17.51028807]
[ 6.13850192 13.61665735]
[ 5.37640942 7.99310742]
[ 5.2 0. ]]

The following would be an example of a solid of revolution plotted around the z axis. As input we take some points and then create the necessary 2D arrays from them.
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as axes3d
# input xy coordinates
xy = np.array([[1,0],[2,1],[2,2],[1,1.5],[1,0]])
# radial component is x values of input
r = xy[:,0]
# angular component is one revolution of 60 steps
phi = np.linspace(0, 2*np.pi, 60)
# create grid
R,Phi = np.meshgrid(r,phi)
# transform to cartesian coordinates
X = R*np.cos(Phi)
Y = R*np.sin(Phi)
# Z values are y values, repeated 60 times
Z = np.tile(xy[:,1],len(Y)).reshape(Y.shape)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, projection='3d')
ax2 = fig.add_axes([0.05,0.7,0.15,.2])
ax2.plot(xy[:,0],xy[:,1], color="k")
ax.plot_surface(X, Y, Z, alpha=0.5, color='gold', rstride=1, cstride=1)
plt.show()

Python Pandas How to read json without sorting by the index?

If i have a json file like this, and import it to dataframe, the column order is always sort -0.8, -0.9. i want the order maintain as it is defined in the json which is -0.9, -0.8
{
"-0.90": {
"A": 1.0,
"B": 0.4935585804
},
"-0.80": {
"A": 1.0,
"B": 0.4935585804
}
}

You can load load your json data as an OrderedDict to preserve the order of the keys, and then use the DataFrame.from_dict constructor:
import json
from collections import OrderedDict
s = """{
"-0.90": {
"A": 1.0,
"B": 0.4935585804
},
"-0.80": {
"A": 1.0,
"B": 0.4935585804
}
}"""
data = json.loads(s, object_pairs_hook=OrderedDict)
pd.DataFrame.from_dict(data)
-0.90 -0.80
A 1.000000 1.000000
B 0.493559 0.493559

transposing data in array using numpy

I have list as following and need to be tranposed to a numpy array
samplelist= [ [ ['Name-1','Name-2','Name-3'] , ['Age-1','Age-2','Age-3'] ],
[ ['new_Name_1','new_Name_2','new_Name_3'], ['new_Age_1','new_Age_2','new_Age_3'] ]
]
Expected Result:
samplearray = [ [ ['Name-1','Age-1'], ['Name-2','Age-2'], ['Name-3','Age-3'] ],
[ ['new_Name_1','new_Age_1], ['new_Name_2','new_Age_2'], ['new_Name_3','new_Age_3'] ]
]
np.transpose results:
np.transpose(a)
array([[['Name-1', 'new_Name_1'],
['Age-1', 'new_Age_1']],
[['Name-2', 'new_Name_2'],
['Age-2', 'new_Age_2']],
[['Name-3', 'new_Name_3'],
['Age-3', 'new_Age_3']]],
dtype='|S10')

samplelist is a 3-D array.
In [58]: samplelist.shape
Out[58]: (2, 2, 3)
Using transpose swaps the first and last axes (0 and 2):
In [55]: samplelist.T
Out[55]:
array([[['Name-1', 'new_Name_1'],
['Age-1', 'new_Age_1']],
[['Name-2', 'new_Name_2'],
['Age-2', 'new_Age_2']],
[['Name-3', 'new_Name_3'],
['Age-3', 'new_Age_3']]],
dtype='|S10')
In [57]: samplelist.swapaxes(0,2)
Out[57]:
array([[['Name-1', 'new_Name_1'],
['Age-1', 'new_Age_1']],
[['Name-2', 'new_Name_2'],
['Age-2', 'new_Age_2']],
[['Name-3', 'new_Name_3'],
['Age-3', 'new_Age_3']]],
dtype='|S10')
To get the desired array, swap axes 1 and 2:
import numpy as np
samplelist = np.array([
[ ['Name-1','Name-2','Name-3'] , ['Age-1','Age-2','Age-3'] ],
[ ['new_Name_1','new_Name_2','new_Name_3'], ['new_Age_1','new_Age_2','new_Age_3'] ]
])
print(samplelist.swapaxes(1,2))
# [[['Name-1' 'Age-1']
# ['Name-2' 'Age-2']
# ['Name-3' 'Age-3']]
# [['new_Name_1' 'new_Age_1']
# ['new_Name_2' 'new_Age_2']
# ['new_Name_3' 'new_Age_3']]]

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Merge two GeoJSON into one in a dataframe - pandas

Related

Groupby value_counts giving keyerror

Error: Size(XX) must match the product of shape x,x,x,x

How do I create a surface plot with matplotlib of a closed loop revolve about an axis given coordinate data of the 2D profile?

Python Pandas How to read json without sorting by the index?

transposing data in array using numpy

Categories

Resources