extract data from sql query json format - sql

I have a table sensor_measurements and the columns measurements and measure_at (timestamp)
select measured_at, pollutants
from sensor_measurements;
which gives:
October 22, 2019, 9:00 PM
[{"name": "NO", "units": "ppm", "concentration": 0.002161, "temporal_resolution": "1h"},
{"name": "NO2", "units": "ppm", "concentration": 0.002, "temporal_resolution": "1h"},
{"name": "TEMP", "units": "celsius", "concentration": 28, "temporal_resolution": "1h"},
{"name": "HUM", "units": "percent", "concentration": 38, "temporal_resolution": "1h"},
{"name": "PM10", "units": "µg/m3", "concentration": 8, "temporal_resolution": "1h"},
{"name": "PM25", "units": "µg/m3", "concentration": 7, "temporal_resolution": "1h"}]
October 22, 2019, 10:00 PM
[{"name": "NO", "units": "ppm", "concentration": 0.002205, "temporal_resolution": "1h"},
{"name": "NO2", "units": "ppm", "concentration": 0.008, "temporal_resolution": "1h"},
{"name": "TEMP", "units": "celsius", "concentration": 28, "temporal_resolution": "1h"},
{"name": "HUM", "units": "percent", "concentration": 38, "temporal_resolution": "1h"},
{"name": "PM10", "units": "µg/m3", "concentration": 9, "temporal_resolution": "1h"},
{"name": "PM25", "units": "µg/m3", "concentration": 8, "temporal_resolution": "1h"}]
October 22, 2019, 11:00 PM
[{"name": "NO", "units": "ppm", "concentration": 0.002209, "temporal_resolution": "1h"},
{"name": "NO2", "units": "ppm", "concentration": 0.004, "temporal_resolution": "1h"},
{"name": "TEMP", "units": "celsius", "concentration": 28, "temporal_resolution": "1h"},
{"name": "HUM", "units": "percent", "concentration": 38, "temporal_resolution": "1h"},
{"name": "PM10", "units": "µg/m3", "concentration": 8, "temporal_resolution": "1h"},
{"name": "PM25", "units": "µg/m3", "concentration": 7, "temporal_resolution": "1h"}]
October 23, 2019, 12:00 AM
[{"name": "NO", "units": "ppm", "concentration": 0.002125, "temporal_resolution": "1h"},
{"name": "NO2", "units": "ppm", "concentration": 0.004, "temporal_resolution": "1h"},
{"name": "TEMP", "units": "celsius", "concentration": 28, "temporal_resolution": "1h"},
{"name": "HUM", "units": "percent", "concentration": 39, "temporal_resolution": "1h"}]
October 23, 2019, 4:00 PM
[{"name": "NO", "units": "ppm", "concentration": 0.004563, "temporal_resolution": "1h"},
{"name": "TEMP", "units": "celsius", "concentration": 34, "temporal_resolution": "1h"},
{"name": "HUM", "units": "percent", "concentration": 28, "temporal_resolution": "1h"}]
I want to extract the timestamp, pollutant and it's value (concentration!
Ideally , I want to create three columns with timestamp, pollutant and value in order to download as csv.
The database type is PostgreSQl (in metabase.com)

I did it on postgres.
First of all you have to have type represents your data:
CREATE TYPE x as ("name" VARCHAR , "units" VARCHAR , "concentration" FLOAT, "temporal_resolution" VARCHAR );
Next, you can use json as joined table:
SELECT measured_at, name, concentration
FROM sensor_measurements
LEFT JOIN LATERAL json_populate_recordset(null::x, pollutants::json) ON true;

Related

How to apply multiple colors in legend for Vega stacked bar?

I have the next Vega-Light bar chart.
Vega Bar online editor
How to apply multiple colors in legend? When I apply
symbols: {
update: {
fill: { field: 'color' },
},
},
I didn't see any symbols, only labels.
I need to apply four colors for legend symbols. When I write fill: { value : "red"}, all of the symbols became red color. I need that the four symbols to have different colors. How I can fix this?
{
"width": 500,
"height": 200,
"title": "STD: cashflow cleaning",
"data": [
{
"name": "table",
"values": [
{"yearIndex": 1, "c": "red", "y": 100000, "y0": 10000, "y1": 110000},
{"yearIndex": 1, "c": "green", "y": 10000, "y0": 0, "y1": 10000},
{"yearIndex": 1, "c": "blue", "y": -12000, "y0": 0, "y1": -12000},
{
"yearIndex": 1,
"c": "orange",
"y": -110000,
"y0": -12000,
"y1": -122000
},
{"yearIndex": 2, "c": "red", "y": 980000, "y0": 98000, "y1": 1078000},
{"yearIndex": 2, "c": "green", "y": 98000, "y0": 0, "y1": 98000},
{"yearIndex": 2, "c": "blue", "y": -10000, "y0": 0, "y1": -10000},
{"yearIndex": 2, "c": "orange", "y": 0, "y0": 98000, "y1": 98000},
{"yearIndex": 3, "c": "red", "y": 960000, "y0": 96000, "y1": 1056000},
{"yearIndex": 3, "c": "green", "y": 96000, "y0": 0, "y1": 96000},
{"yearIndex": 3, "c": "blue", "y": -12000, "y0": 0, "y1": -12000},
{"yearIndex": 3, "c": "orange", "y": 0, "y0": 96000, "y1": 96000},
{"yearIndex": 4, "c": "red", "y": 940000, "y0": 94000, "y1": 1034000},
{"yearIndex": 4, "c": "green", "y": 94000, "y0": 0, "y1": 94000},
{"yearIndex": 4, "c": "blue", "y": -10000, "y0": 0, "y1": -10000},
{"yearIndex": 4, "c": "orange", "y": 0, "y0": 94000, "y1": 94000},
{"yearIndex": 5, "c": "red", "y": 920000, "y0": 92000, "y1": 1012000},
{"yearIndex": 5, "c": "green", "y": 92000, "y0": 0, "y1": 92000},
{"yearIndex": 5, "c": "blue", "y": -12000, "y0": 0, "y1": -12000},
{"yearIndex": 5, "c": "orange", "y": 0, "y0": 92000, "y1": 92000},
{"yearIndex": 6, "c": "red", "y": 900000, "y0": 90000, "y1": 990000},
{"yearIndex": 6, "c": "green", "y": 90000, "y0": 0, "y1": 90000},
{"yearIndex": 6, "c": "blue", "y": -10000, "y0": 0, "y1": -10000},
{
"yearIndex": 6,
"c": "orange",
"y": -91000,
"y0": -10000,
"y1": -101000
},
{"yearIndex": 7, "c": "red", "y": 880000, "y0": 88000, "y1": 968000},
{"yearIndex": 7, "c": "green", "y": 88000, "y0": 0, "y1": 88000},
{"yearIndex": 7, "c": "blue", "y": -12000, "y0": 0, "y1": -12000},
{"yearIndex": 7, "c": "orange", "y": 0, "y0": 88000, "y1": 88000},
{"yearIndex": 8, "c": "red", "y": 860000, "y0": 86000, "y1": 946000},
{"yearIndex": 8, "c": "green", "y": 86000, "y0": 0, "y1": 86000},
{"yearIndex": 8, "c": "blue", "y": -10000, "y0": 0, "y1": -10000},
{"yearIndex": 8, "c": "orange", "y": 0, "y0": 86000, "y1": 86000},
{"yearIndex": 9, "c": "red", "y": 840000, "y0": 84000, "y1": 924000},
{"yearIndex": 9, "c": "green", "y": 84000, "y0": 0, "y1": 84000},
{"yearIndex": 9, "c": "blue", "y": -12000, "y0": 0, "y1": -12000},
{"yearIndex": 9, "c": "orange", "y": 0, "y0": 84000, "y1": 84000},
{"yearIndex": 10, "c": "red", "y": 820000, "y0": 82000, "y1": 902000},
{"yearIndex": 10, "c": "green", "y": 82000, "y0": 0, "y1": 82000},
{"yearIndex": 10, "c": "blue", "y": -10000, "y0": 0, "y1": -10000},
{"yearIndex": 10, "c": "orange", "y": 0, "y0": 82000, "y1": 82000}
],
"transform": [
{
"type": "stack",
"groupby": ["yearIndex"],
"sort": {"field": "c"},
"field": "y"
}
]
}
],
"scales": [
{
"name": "x",
"type": "band",
"range": "width",
"domain": {"data": "table", "field": "yearIndex"}
},
{
"name": "y",
"type": "linear",
"range": "height",
"nice": true,
"zero": true,
"domain": {"data": "table", "field": "y1"}
},
{
"name": "color",
"type": "ordinal",
"range": {"data": "table", "field": "c"},
"domain": [
"basicYieldIncome",
"avoidedSoilingIncomeLoss",
"opex",
"capex"
]
}
],
"axes": [
{"orient": "bottom", "scale": "x", "zindex": 1, "title": "yearIndex"},
{
"orient": "left",
"scale": "y",
"zindex": 1,
"title": "EUR",
"formatType": "number",
"format": ".2s"
}
],
"marks": [
{
"type": "rect",
"from": {"data": "table"},
"encode": {
"enter": {
"x": {"scale": "x", "field": "yearIndex"},
"width": {"scale": "x", "band": 1, "offset": -1},
"y": {"scale": "y", "field": "y0"},
"y2": {"scale": "y", "field": "y1"},
"fill": {"field": "c"}
}
}
}
],
"legends": [
{
"orient": "top",
"direction": "horizontal",
"fill": "color",
"encode": {
"labels": {
"interactive": true,
"update": {"fontSize": {"value": 12}, "fill": {"value": "black"}}
}
}
}
]
}

vega grouping difference for vertical and horizontal bar chart?

thanks in advance! I am trying to make a grouped bar chart with Vega. So I took the "stacked bar chart example" data (removed the stacking transform), and made a vertical one and horizontal one. The strange thing is, the horizontal one worked as I expected, but the vertical one has duplicated overlapping bars on each individual groups. I made them exactly the same way, only switching the properties. I will post my json file below.
Vertical bar chart:
"$schema": "https://vega.github.io/schema/vega/v5.json",
"description": "A basic stacked bar chart example.",
"width": 500,
"height": 200,
"padding": 5,
"data": [
{
"name": "table",
"values": [
{"x": 0, "y": 28, "c": 0}, {"x": 0, "y": 55, "c": 1},
{"x": 1, "y": 43, "c": 0}, {"x": 1, "y": 91, "c": 1},
{"x": 2, "y": 81, "c": 0}, {"x": 2, "y": 53, "c": 1},
{"x": 3, "y": 19, "c": 0}, {"x": 3, "y": 87, "c": 1},
{"x": 4, "y": 52, "c": 0}, {"x": 4, "y": 48, "c": 1},
{"x": 5, "y": 24, "c": 0}, {"x": 5, "y": 49, "c": 1},
{"x": 6, "y": 87, "c": 0}, {"x": 6, "y": 66, "c": 1},
{"x": 7, "y": 17, "c": 0}, {"x": 7, "y": 27, "c": 1},
{"x": 8, "y": 68, "c": 0}, {"x": 8, "y": 16, "c": 1},
{"x": 9, "y": 49, "c": 0}, {"x": 9, "y": 15, "c": 1}
]
}
],
"scales": [
{
"name": "xscale",
"type": "band",
"range": "width",
"domain": {"data": "table", "field": "c"}
},
{
"name": "yscale",
"type": "linear",
"range": "height",
"domain": {"data": "table", "field": "y"}
},
{
"name": "color",
"type": "ordinal",
"range": "category",
"domain": {"data": "table", "field": "c"}
}
],
"axes": [
{"orient": "bottom", "scale": "xscale", "zindex": 1},
{"orient": "left", "scale": "yscale", "zindex": 1}
],
"marks": [
{
"type": "group",
"from": {
"facet": {
"name": "facet",
"data": "table",
"groupby": "c"
}
},
"encode": {
"enter": {
"x": {"scale": "xscale", "field": "c"}
}
},
"signals": [
{"name": "width", "update": "bandwidth('xscale')"}
],
"scales": [
{
"name": "inner",
"type": "band",
"range": "width",
"domain": {"data": "facet", "field": "x"}
}
],
"axes": [
{"orient": "top", "scale": "inner", "tickSize": 0, "labelPadding": 10, "zindex": 2, "title": "x"}
],
"marks": [
{
"type": "rect",
"from": {"data": "table"},
"encode": {
"enter": {
"x": {"scale": "inner", "field": "x"},
"width": {"scale": "inner", "band": 1, "offset": -1},
"y": {"scale": "yscale", "field": "y"},
"y2": {"scale": "yscale", "value": 0},
"fill": {"scale": "color", "field": "c"}
},
"update": {
"fillOpacity": {"value": 1}
},
"hover": {
"fillOpacity": {"value": 0.5}
}
}
}
]
}]
}
Horizontal bar chart:
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"description": "A basic stacked bar chart example.",
"width": 500,
"height": 200,
"padding": 5,
"data": [
{
"name": "table",
"values": [
{"x": 0, "y": 28, "c": 0}, {"x": 0, "y": 55, "c": 1},
{"x": 1, "y": 43, "c": 0}, {"x": 1, "y": 91, "c": 1},
{"x": 2, "y": 81, "c": 0}, {"x": 2, "y": 53, "c": 1},
{"x": 3, "y": 19, "c": 0}, {"x": 3, "y": 87, "c": 1},
{"x": 4, "y": 52, "c": 0}, {"x": 4, "y": 48, "c": 1},
{"x": 5, "y": 24, "c": 0}, {"x": 5, "y": 49, "c": 1},
{"x": 6, "y": 87, "c": 0}, {"x": 6, "y": 66, "c": 1},
{"x": 7, "y": 17, "c": 0}, {"x": 7, "y": 27, "c": 1},
{"x": 8, "y": 68, "c": 0}, {"x": 8, "y": 16, "c": 1},
{"x": 9, "y": 49, "c": 0}, {"x": 9, "y": 15, "c": 1}
]
}
],
"scales": [
{
"name": "y",
"type": "band",
"range": "height",
"domain": {"data": "table", "field": "c"}
},
{
"name": "x",
"type": "linear",
"range": "width",
"domain": {"data": "table", "field": "y"}
},
{
"name": "color",
"type": "ordinal",
"range": "category",
"domain": {"data": "table", "field": "c"}
}
],
"axes": [
{"orient": "bottom", "scale": "x", "zindex": 1},
{"orient": "left", "scale": "y", "zindex": 1}
],
"marks": [
{"type": "group",
"from": {
"facet": {
"name": "facet",
"data": "table",
"groupby": "c"
}
},
"encode": {
"enter": {
"y": {"scale": "y", "field": "c"}
}
},
"signals": [
{"name": "height", "update": "bandwidth('y')"}
],
"scales": [
{
"name": "pos",
"type": "band",
"range": "height",
"domain": {"data": "facet", "field": "x"}
}
],
"axes": [
{"orient": "right", "scale": "pos", "tickSize": 0, "labelPadding": 10, "zindex": 3}
],
"marks": [{
"type": "rect",
"from": {"data": "facet"},
"encode": {
"enter": {
"y": {"scale": "pos", "field": "x"},
"height": {"scale": "pos", "band": 1, "offset":-1},
"x": {"scale": "x", "field": "y"},
"x2": {"scale": "x", "value": 0},
"fill": {"scale": "color", "field": "c"}
}
}
}]
}]
}
Images:
Is this something I didn't setup correctly in the specs? Thanks.
I figured it out. The inner chart marks needs to be sourced from facet instead of the original table
"from": {"data": "facet"}
That fixed it.

Filled area graph vertical orientation

I am attempting to get a vertical filled area graph, where the area on the left between the y axis and the data line is filled. Essentially, take the normal area graph and rotate it 90 degrees clockwise.
I've basically just taken the example from the vega examples and tried to convert everything to vertical, changed the names of the scales to something more related to the data, and added a line width and colour.
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"width": 500,
"height": 500,
"padding": 5,
"signals": [
{
"name": "interpolate",
"value": "linear",
"bind": {
"input": "select",
"options": [
"basis",
"cardinal",
"catmull-rom",
"linear",
"monotone",
"natural",
"step",
"step-after",
"step-before"
]
}
}
],
"data": [
{
"name": "table",
"values": [
{"u": 1, "v": 28}, {"u": 2, "v": 55},
{"u": 3, "v": 43}, {"u": 4, "v": 91},
{"u": 5, "v": 81}, {"u": 6, "v": 53},
{"u": 7, "v": 19}, {"u": 8, "v": 87},
{"u": 9, "v": 52}, {"u": 10, "v": 48},
{"u": 11, "v": 24}, {"u": 12, "v": 49},
{"u": 13, "v": 87}, {"u": 14, "v": 66},
{"u": 15, "v": 17}, {"u": 16, "v": 27},
{"u": 17, "v": 68}, {"u": 18, "v": 16},
{"u": 19, "v": 49}, {"u": 20, "v": 15}
]
}
],
"scales": [
{
"name": "uscale",
"type": "linear",
"range": "height",
"zero": false,
"domain": {"data": "table", "field": "u"}
},
{
"name": "vscale",
"type": "linear",
"range": "width",
"nice": true,
"zero": true,
"domain": {"data": "table", "field": "v"}
}
],
"axes": [
{"orient": "bottom", "scale": "vscale", "tickCount": 25},
{"orient": "left", "scale": "uscale"}
],
"marks": [
{
"type": "area",
"orient": "horizontal",
"from": {"data": "table"},
"encode": {
"enter": {
"x": {"scale": "vscale", "field": "v"},
"y": {"scale": "uscale", "field": "u"},
"x2": {"scale": "vscale", "value": 0},
"stroke": {"value": "#000000"},
"fill": {"value": "steelblue"}
},
"update": {
"interpolate": {"signal": "interpolate"},
"fillOpacity": {"value": 1}
},
"hover": {
"fillOpacity": {"value": 0.5}
}
}
}
]
}
Pretty sure I'm doing something wrong here. According to the docs "vertical" orientation is the default, and I've tried this with x2 and y2, and orient "vertical" and "horizontal" for both - I've also tried to set the scale as vscale and uscale for x2 and y2.
I get no errors in the vega online editor - The line is correct but I would expect the graph to be filled to the left of the line between the y axis and the graph line. The actual output right now is just a solid line.
The orient property has to go into the encode block.
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"width": 500,
"height": 500,
"padding": 5,
"signals": [
{
"name": "interpolate",
"value": "linear",
"bind": {
"input": "select",
"options": [
"basis",
"cardinal",
"catmull-rom",
"linear",
"monotone",
"natural",
"step",
"step-after",
"step-before"
]
}
}
],
"data": [
{
"name": "table",
"values": [
{"u": 1, "v": 28}, {"u": 2, "v": 55},
{"u": 3, "v": 43}, {"u": 4, "v": 91},
{"u": 5, "v": 81}, {"u": 6, "v": 53},
{"u": 7, "v": 19}, {"u": 8, "v": 87},
{"u": 9, "v": 52}, {"u": 10, "v": 48},
{"u": 11, "v": 24}, {"u": 12, "v": 49},
{"u": 13, "v": 87}, {"u": 14, "v": 66},
{"u": 15, "v": 17}, {"u": 16, "v": 27},
{"u": 17, "v": 68}, {"u": 18, "v": 16},
{"u": 19, "v": 49}, {"u": 20, "v": 15}
]
}
],
"scales": [
{
"name": "uscale",
"type": "linear",
"range": "height",
"zero": false,
"domain": {"data": "table", "field": "u"}
},
{
"name": "vscale",
"type": "linear",
"range": "width",
"nice": true,
"zero": true,
"domain": {"data": "table", "field": "v"}
}
],
"axes": [
{"orient": "bottom", "scale": "vscale", "tickCount": 25},
{"orient": "left", "scale": "uscale"}
],
"marks": [
{
"type": "area",
"from": {"data": "table"},
"encode": {
"enter": {
"orient": {"value": "horizontal"},
"x": {"scale": "vscale", "field": "v"},
"y": {"scale": "uscale", "field": "u"},
"x2": {"scale": "vscale", "value": 0},
"stroke": {"value": "#000000"},
"fill": {"value": "steelblue"}
},
"update": {
"interpolate": {"signal": "interpolate"},
"fillOpacity": {"value": 1}
},
"hover": {
"fillOpacity": {"value": 0.5}
}
}
}
]
}

Stacked bar with JUnit style records

I have a dataset exported from a JUnitXML file and I would like to create a simple visualisation using Vega-lite of a stacked bar for each test suite. The problem is that in JUnit style you don't get a value for "success" but instead you get a "tests" fields that is the sum of all types of tests.
So far I've just naively exported as tidy data and then mapped to x, y and color.
Current visualisation
Vega-editor
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {"name": "results"},
"mark": "bar",
"encoding": {
"y": {"field": "name", "type": "nominal"},
"x": {"aggregate": "sum", "field": "count", "type": "quantitative", "stack": "normalize"},
"color": {"field": "type"}
},
"datasets": {
"results": [
{"name": "test_suite_0", "type": "tests", "count": 10},
{"name": "test_suite_1", "type": "tests", "count": 10},
{"name": "test_suite_2", "type": "tests", "count": 10},
{"name": "test_suite_3", "type": "tests", "count": 10},
{"name": "test_suite_4", "type": "tests", "count": 10},
{"name": "general-test-suite", "type": "tests", "count": 10},
{"name": "test_suite_0", "type": "skipped", "count": 2},
{"name": "test_suite_1", "type": "skipped", "count": 2},
{"name": "test_suite_2", "type": "skipped", "count": 3},
{"name": "test_suite_3", "type": "skipped", "count": 4},
{"name": "test_suite_4", "type": "skipped", "count": 3},
{"name": "general-test-suite", "type": "skipped", "count": 1},
{"name": "test_suite_0", "type": "failures", "count": 3},
{"name": "test_suite_1", "type": "failures", "count": 2},
{"name": "test_suite_2", "type": "failures", "count": 0},
{"name": "test_suite_3", "type": "failures", "count": 2},
{"name": "test_suite_4", "type": "failures", "count": 3},
{"name": "general-test-suite", "type": "failures", "count": 3},
{"name": "test_suite_0", "type": "errors", "count": 4},
{"name": "test_suite_1", "type": "errors", "count": 4},
{"name": "test_suite_2", "type": "errors", "count": 4},
{"name": "test_suite_3", "type": "errors", "count": 1},
{"name": "test_suite_4", "type": "errors", "count": 4},
{"name": "general-test-suite", "type": "errors", "count": 2}
]
}
}
I would like to change the tests values for success, which should be the difference of tests-(sum of count of other fields grouped by name) but I have no clue about how to do it.
You could use an aggregate transform followed by a calculate transform; something like this (vega editor):
{
"data": {"name": "results"},
"transform": [
{
"aggregate": [
{"op": "sum", "field": "count", "as": "total"},
{"op": "max", "field": "count", "as": "tests"}
],
"groupby": ["name"]
},
{"calculate": "2 * datum.tests - datum.total", "as": "passed"}
],
"mark": "bar",
"encoding": {
"x": {"type": "quantitative", "field": "passed"},
"y": {"type": "nominal", "field": "name"}
},
"datasets": {
"results": [
{"count": 10, "name": "test_suite_0", "type": "tests"},
{"count": 10, "name": "test_suite_1", "type": "tests"},
{"count": 10, "name": "test_suite_2", "type": "tests"},
{"count": 10, "name": "test_suite_3", "type": "tests"},
{"count": 10, "name": "test_suite_4", "type": "tests"},
{"count": 10, "name": "general-test-suite", "type": "tests"},
{"count": 2, "name": "test_suite_0", "type": "skipped"},
{"count": 2, "name": "test_suite_1", "type": "skipped"},
{"count": 3, "name": "test_suite_2", "type": "skipped"},
{"count": 4, "name": "test_suite_3", "type": "skipped"},
{"count": 3, "name": "test_suite_4", "type": "skipped"},
{"count": 1, "name": "general-test-suite", "type": "skipped"},
{"count": 3, "name": "test_suite_0", "type": "failures"},
{"count": 2, "name": "test_suite_1", "type": "failures"},
{"count": 0, "name": "test_suite_2", "type": "failures"},
{"count": 2, "name": "test_suite_3", "type": "failures"},
{"count": 3, "name": "test_suite_4", "type": "failures"},
{"count": 3, "name": "general-test-suite", "type": "failures"},
{"count": 4, "name": "test_suite_0", "type": "errors"},
{"count": 4, "name": "test_suite_1", "type": "errors"},
{"count": 4, "name": "test_suite_2", "type": "errors"},
{"count": 1, "name": "test_suite_3", "type": "errors"},
{"count": 4, "name": "test_suite_4", "type": "errors"},
{"count": 2, "name": "general-test-suite", "type": "errors"}
]
}
}
I finally got the result I was after (and realised that it's not as pretty as I imagined it). I decided to pivot the columns, calculate the amount of "passed" per test suite and then fold again for easy plotting:
Vega-editor
{
"data": {"name": "results"},
"transform": [
{"pivot": "type", "groupby": ["name"], "value": "count"},
{
"calculate": "datum.tests - (datum.failures + datum.skipped + datum.errors)",
"as": "passed"
},
{
"fold": ["passed", "failures", "skipped", "errors"],
"as": ["type", "count"]
}
],
"mark": "bar",
"encoding": {
"x": {"aggregate": "sum", "type": "quantitative", "field": "count"},
"y": {"type": "nominal", "field": "name"},
"color": {
"type": "nominal",
"field": "type",
"sort": ["passed", "failures", "skipped", "errors"],
"scale": {"range": ["green", "red", "grey", "yellow"]}
},
"order": {"range": ["passed", "failures", "skipped", "errors"]}
},
"datasets": {
"results": [
{"count": 10, "name": "test_suite_0", "type": "tests"},
{"count": 10, "name": "test_suite_1", "type": "tests"},
{"count": 10, "name": "test_suite_2", "type": "tests"},
{"count": 10, "name": "test_suite_3", "type": "tests"},
{"count": 10, "name": "test_suite_4", "type": "tests"},
{"count": 10, "name": "general-test-suite", "type": "tests"},
{"count": 2, "name": "test_suite_0", "type": "skipped"},
{"count": 2, "name": "test_suite_1", "type": "skipped"},
{"count": 3, "name": "test_suite_2", "type": "skipped"},
{"count": 4, "name": "test_suite_3", "type": "skipped"},
{"count": 3, "name": "test_suite_4", "type": "skipped"},
{"count": 1, "name": "general-test-suite", "type": "skipped"},
{"count": 3, "name": "test_suite_0", "type": "failures"},
{"count": 2, "name": "test_suite_1", "type": "failures"},
{"count": 0, "name": "test_suite_2", "type": "failures"},
{"count": 2, "name": "test_suite_3", "type": "failures"},
{"count": 3, "name": "test_suite_4", "type": "failures"},
{"count": 3, "name": "general-test-suite", "type": "failures"},
{"count": 4, "name": "test_suite_0", "type": "errors"},
{"count": 4, "name": "test_suite_1", "type": "errors"},
{"count": 4, "name": "test_suite_2", "type": "errors"},
{"count": 1, "name": "test_suite_3", "type": "errors"},
{"count": 4, "name": "test_suite_4", "type": "errors"},
{"count": 2, "name": "general-test-suite", "type": "errors"}
]
}
}

Repetitive entries in SQL JOIN query

I have three tables posts, comments, and users. posts table includes two types of posts, question and answer. Comments are made on questions and answers. My purpose is to get a single question with comments on it, answers on it and comments on that answers. Also I need username from the users table as author on each question, answer and comment I fetch. I am using Postgres 9.5, and making use of json_agg() function.
While example output I need should be something similar to the first following, I get repeated entries.
What is the thing I am missing here? Correct group by clauses may be. Or the subquery to gather the answers with their comments is not the way to do it. When I comment out the left join from posts on the comments table I get desired results without comments on questions. Also when I cancel the left join containing the subquery I get non-repetative results as I expect, and again that is not complete dataset I desire. These are things I have collected so far to solve my issue.
What I need:
[
{
"post_id": "10",
"created_at": "2016-05-10T00:16:54.469Z",
"post_type": "question",
"post_title": "qwerty",
"post_text": "asdasd asda sdasd",
"post_author_id": 1,
"author": "isikfsc",
"parent_post_id": null,
"is_accepted": null,
"acceptor_id": null,
"answers": [
{
"post_id": 17,
"created_at": "2016-05-10T04:58:56.350229",
"post_type": "answer",
"post_title": null,
"post_text": "222asda dasdad asdada",
"post_author_id": 1,
"author": "isikfsc",
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 5,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"author": "isikfsc",
"parent_post_id": 17
},
{
"id": 8,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"author": "isikfsc",
"parent_post_id": 17
}
]
},
{
"post_id": 14,
"created_at": "2016-05-10T04:19:19.005556",
"post_type": "answer",
"post_title": null,
"post_text": "asdasdasdasd",
"post_author_id": 1,
"author": "isikfsc",
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 2,
"created_at": "2016-05-10T05:25:34.671008",
"text": "qeqweqwe",
"author_id": 1,
"author": "isikfsc",
"parent_post_id": 14
}
]
}
],
"comments": [
{
"id": 1,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"author": "isikfsc",
"parent_post_id": 10
},
{
"id": 4,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"author": "isikfsc",
"parent_post_id": 10
}
]
}
]
My query is:
SELECT
q.*,
json_agg(ac.*) AS answers,
json_agg(c.*) AS comments --comments on posts of post_id questions
FROM posts q
LEFT JOIN
(
SELECT
a.*,
json_agg(c.*) AS comments -- comments on posts of post_id answers
FROM posts a
LEFT JOIN comments c
ON a.post_id = c.parent_post_id
GROUP BY a.post_id
) ac
ON q.post_id = ac.parent_post_id
LEFT JOIN comments c
ON q.post_id = c.parent_post_id
WHERE q.post_id = 10
GROUP BY q.post_id
What I get:
[
{
"post_id": "10",
"created_at": "2016-05-10T00:16:54.469Z",
"post_type": "question",
"post_title": "qwerty",
"post_text": "asdasd asda sdasd",
"post_author_id": 1,
"parent_post_id": null,
"is_accepted": null,
"acceptor_id": null,
"answers": [
{
"post_id": 17,
"created_at": "2016-05-10T04:58:56.350229",
"post_type": "answer",
"post_title": null,
"post_text": "222asda dasdad asdada",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 5,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"parent_post_id": 17
},
{
"id": 8,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"parent_post_id": 17
}
]
},
{
"post_id": 17,
"created_at": "2016-05-10T04:58:56.350229",
"post_type": "answer",
"post_title": null,
"post_text": "222asda dasdad asdada",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 5,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"parent_post_id": 17
},
{
"id": 8,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"parent_post_id": 17
}
]
},
{
"post_id": 17,
"created_at": "2016-05-10T04:58:56.350229",
"post_type": "answer",
"post_title": null,
"post_text": "222asda dasdad asdada",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 5,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"parent_post_id": 17
},
{
"id": 8,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"parent_post_id": 17
}
]
},
{
"post_id": 17,
"created_at": "2016-05-10T04:58:56.350229",
"post_type": "answer",
"post_title": null,
"post_text": "222asda dasdad asdada",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 5,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"parent_post_id": 17
},
{
"id": 8,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"parent_post_id": 17
}
]
},
{
"post_id": 14,
"created_at": "2016-05-10T04:19:19.005556",
"post_type": "answer",
"post_title": null,
"post_text": "asdasdasdasd",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 2,
"created_at": "2016-05-10T05:25:34.671008",
"text": "qeqweqwe",
"author_id": 1,
"parent_post_id": 14
}
]
},
{
"post_id": 14,
"created_at": "2016-05-10T04:19:19.005556",
"post_type": "answer",
"post_title": null,
"post_text": "asdasdasdasd",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 2,
"created_at": "2016-05-10T05:25:34.671008",
"text": "qeqweqwe",
"author_id": 1,
"parent_post_id": 14
}
]
},
{
"post_id": 14,
"created_at": "2016-05-10T04:19:19.005556",
"post_type": "answer",
"post_title": null,
"post_text": "asdasdasdasd",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 2,
"created_at": "2016-05-10T05:25:34.671008",
"text": "qeqweqwe",
"author_id": 1,
"parent_post_id": 14
}
]
},
{
"post_id": 14,
"created_at": "2016-05-10T04:19:19.005556",
"post_type": "answer",
"post_title": null,
"post_text": "asdasdasdasd",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 2,
"created_at": "2016-05-10T05:25:34.671008",
"text": "qeqweqwe",
"author_id": 1,
"parent_post_id": 14
}
]
}
],
"comments": [
{
"id": 1,
"created_at": "2016-05-10T05:25:28.200327",
"text": "asadasdad",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 4,
"created_at": "2016-05-10T10:25:23.381177",
"text": "werwer",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 1,
"created_at": "2016-05-10T05:25:28.200327",
"text": "asadasdad",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 4,
"created_at": "2016-05-10T10:25:23.381177",
"text": "werwer",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 1,
"created_at": "2016-05-10T05:25:28.200327",
"text": "asadasdad",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 4,
"created_at": "2016-05-10T10:25:23.381177",
"text": "werwer",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 1,
"created_at": "2016-05-10T05:25:28.200327",
"text": "asadasdad",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 4,
"created_at": "2016-05-10T10:25:23.381177",
"text": "werwer",
"author_id": 1,
"parent_post_id": 10
}
]
}
]
Grouping happens once all of the parties have been joined, so aggregates will depend on the resulting cardinality. Joining posts with answers AND comments causes a full join between them, duplicating all values. They need to be separated and performed individually, one way you can do it is the following:
SELECT
q.*,
(SELECT json_agg(ac.*)
FROM (
SELECT a.*, json_agg(c.*) AS comments
FROM posts a
LEFT JOIN comments c ON (a.post_id = c.parent_post_id)
WHERE a.parent_post_id = q.post_id
GROUP BY a.post_id
) ac
) AS answers,
json_agg(c.*) AS comments --comments on posts of post_id questions
FROM posts q
LEFT JOIN comments c ON (q.post_id = c.parent_post_id)
WHERE q.post_id = 10
GROUP BY q.post_id;
Alternatively:
SELECT q.*, qa.answers, qc.comments
FROM posts q
LEFT JOIN (
SELECT ac.parent_post_id, json_agg(ac.*) AS answers
FROM (
SELECT ac.*, json_agg(c.*) AS comments
FROM posts ac
LEFT JOIN comments c ON (c.parent_post_id = ac.post_id)
GROUP BY ac.post_id
) ac
GROUP BY ac.parent_post_id
) qa ON (qa.parent_post_id = q.post_id)
LEFT JOIN (
SELECT c.parent_post_id, json_agg(c.*) AS comments
FROM comments c
GROUP BY c.parent_post_id
) qc ON (qc.parent_post_id = q.post_id)
WHERE q.post_id = 10;