Best way to split data into few lines? - vega

I have data as shown:
2021 43466.822 medium variant
2021 43510.982 high variant
2021 43416.407 low variant
2021 43468.429 constant fertility
2021 43580.45 instant replacement
And need to get chart:
https://image.prntscr.com/image/eBKqmOUsSa_6PBlomh5Erg.png
I have tried the transform fold option, but it does not help me. And making a lot of layers for that - will be a lot of code. Is there any smart way? Also I will need an legend like the one shown.
vegalite({
height:300,
autosize: "fit",
width:width,
title: {text:"Ukraine Population Prospects",
subtitle:"Total population, million"
},
data: {
url:"https://gist.githubusercontent.com/turiy/005f2ce11637fefcde8e9d6efdb0c2e6/raw/19e67bb3a6d63e7fd9f49a596e5d24404469bd63/population_prospects.csv"},
transform: [{"calculate": "datum.population/1000", "as": "population"},{fold:["medium variant","high variant", "low variant", "constant fertility","instant replacement", "momentum", "zero migration", "constant mortality", "no change"]}],
layer: [
{ mark: "line",
encoding:{
"x": {
"timeUnit": "utcyear",
"field": "year",
"type": "temporal",
"axis": {
"values":[1950,1991,2020,2100],
"domain": false,
"gridDash": {"value": [1,1]}
}
},
"y": {
"field": "population",
"type": "quantitative",
"scale": {"domain": [15,55]},
"axis": {
"domain": false ,
"gridDash": {"value": [1,1]}
}
}
},
color: {"value":"#0000ff"},
transform:[{filter:{"timeUnit": "utcyear", "field": "year", "range": [1950, 2020]}}]
},
{
mark: "line",legend:{title:"low variant"},
encoding:{
x: {
"timeUnit": "utcyear",
"field": "year",
"type": "temporal",
"axis": {
"values":[1950,1991,2020,2100],
"domain": false,
"gridDash": {"value": [1,1]}}
},
y: {
"field": "population",
"type": "quantitative",
"scale": {"domain": [15,55]},
"axis": {
"domain": false ,
"gridDash": {"value": [1,1]}
}
},
legends:{
"orient": "top-right",
"stroke": "color",
"title": "Origin",
"encode": {
"symbols": {
"update": {
"fill": {"value": ""},
"strokeWidth": {"value": 2},
"size": {"value": 64}
}
}
}
},
color: {"field": "key", "type":"nominal"}
},
transform:[{filter:{"timeUnit": "year", "field": "year", "range": [2020, 2100]}},
{filter:{field:"type", "equal":"low variant"}}]
}
]})
And I am getting like this https://image.prntscr.com/image/3Y9WNk4SQzGYWDr2JKWV9A.png

If your variants are listed by name in a column as in your example dataset, you can use a detail encoding to split them into different lines (vega editor link):
{
"data": {
"url": "https://gist.githubusercontent.com/turiy/005f2ce11637fefcde8e9d6efdb0c2e6/raw/19e67bb3a6d63e7fd9f49a596e5d24404469bd63/population_prospects.csv"
},
"mark": "line",
"encoding": {
"detail": {"type": "nominal", "field": "type"},
"x": {"type": "quantitative", "field": "year"},
"y": {"type": "quantitative", "field": "population"}
}
}
If you use color rather than detail, each line will be a different color and a legend will be included.
To add labels at the right side of the chart, you can use a text mark with an aggregate transform; something like this:
{
"data": {
"url": "https://gist.githubusercontent.com/turiy/005f2ce11637fefcde8e9d6efdb0c2e6/raw/19e67bb3a6d63e7fd9f49a596e5d24404469bd63/population_prospects.csv"
},
"layer": [
{
"mark": "line",
"encoding": {
"detail": {"type": "nominal", "field": "type"},
"x": {"type": "quantitative", "field": "year"},
"y": {"type": "quantitative", "field": "population"}
}
},
{
"transform": [
{"filter": "datum.type != 'estimate'"},
{
"aggregate": [{"op": "argmax", "field": "year", "as": "rightmost"}],
"groupby": ["type"]
}
],
"mark": {"type": "text", "align": "left"},
"encoding": {
"text": {"type": "nominal", "field": "rightmost.type"},
"x": {"type": "quantitative", "field": "rightmost.year"},
"y": {"type": "quantitative", "field": "rightmost.population"}
}
}
],
"width": 400
}

Related

How to display multiple charts with varying x-scales side by side in Vega

I have a set of charts with the same y-scale but varying x-scales. I am using hconcat to display them side by side. In order to conserve space and avoid repetition, I have disabled the y-axis for all but the first chart. However, this is causing the title of the first chart to offset:
This is a link to a Vega Editor.
As the blue circle indicates, the two titles, "Chain" and "Mini Invaders," are not in line. Is there a way to fix this?
I have tried to express these charts using facet but as far as I can tell, facets do not permit varying x-scales. However, please do let me know if this is somehow possible with facets.
You need labelBound: true in your spec.
Editor
{
"config": {
"view": {
"continuousWidth": 400,
"continuousHeight": 300,
"stroke": "#000000",
"strokeOpacity": 1,
"strokeWidth": 2
},
"axis": {"labelFontSize": 24, "titleFontSize": 24, "labelBound":true},
"legend": {"labelFontSize": 24, "labelLimit": 0, "titleFontSize": 32},
"title": {"baseline": "bottom", "fontSize": 24}
},
"hconcat": [
{
"layer": [
{
"mark": {"type": "area", "clip": true, "opacity": 0.2},
"encoding": {
"color": {
"field": "Variations",
"legend": {
"orient": "top",
"symbolOpacity": 1,
"symbolSize": 200,
"symbolStrokeWidth": 3,
"symbolType": "stroke"
},
"scale": {
"domain": ["Original Algorithm"],
"range": [
"#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#a65628",
"#646464"
]
},
"type": "nominal"
},
"x": {"field": "step", "type": "quantitative"},
"y": {
"axis": {"labels": true, "tickCount": 5, "title": null},
"field": "lower",
"type": "quantitative"
},
"y2": {"field": "upper"}
}
},
{
"mark": {"type": "line", "clip": true},
"encoding": {
"color": {"field": "Variations", "type": "nominal"},
"x": {"field": "step", "type": "quantitative"},
"y": {
"field": "regret",
"scale": {"domain": [0, 1]},
"type": "quantitative"
}
}
}
],
"height": 200,
"title": "Chain",
"transform": [{"filter": "(datum.domain === 'Chain')"}],
"width": 200
},
{
"layer": [
{
"mark": {"type": "area", "clip": true, "opacity": 0.2},
"encoding": {
"color": {
"field": "Variations",
"legend": {
"orient": "top",
"symbolOpacity": 1,
"symbolSize": 200,
"symbolStrokeWidth": 3,
"symbolType": "stroke"
},
"scale": {
"domain": ["Original Algorithm"],
"range": [
"#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#a65628",
"#646464"
]
},
"type": "nominal"
},
"x": {"field": "step", "type": "quantitative"},
"y": {
"axis": {"labels": false, "tickCount": 5, "title": null},
"field": "lower",
"type": "quantitative"
},
"y2": {"field": "upper"}
}
},
{
"mark": {"type": "line", "clip": true},
"encoding": {
"color": {"field": "Variations", "type": "nominal"},
"x": {"field": "step", "type": "quantitative"},
"y": {
"field": "regret",
"scale": {"domain": [0, 1]},
"type": "quantitative"
}
}
}
],
"height": 200,
"title": "Mini Invaders",
"transform": [{"filter": "(datum.domain === 'Mini Invaders')"}],
"width": 200
}

Vega How to SUM all descendants value of every node on a treemap

I am trying to access the sum of values for a node in VEGA. In other words, I want to display sum of "percentage" values of all leaves for each parent node.
Got the following Vega specs (https://gist.github.com/omerakko/655674f9f37e9361fe5378b6d440e411)
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"description": "An example of treemap layout for hierarchical data.",
"width": 960,
"height": 500,
"padding": 2.5,
"autosize": "none",
"data": [
{
"name": "tree",
"url": "https://raw.githubusercontent.com/omerakko/VEGA/main/vegaTreemapData.json",
"transform": [
{
"type": "stratify",
"key": "id",
"parentKey": "parent"
},
{
"type": "treemap",
"field": "percentage",
"sort": {"field": "value", "order":"descending"},
"round": true,
"method": "resquarify",
"ratio": 1,
"size": [{"signal": "width"}, {"signal": "height"}],
"paddingOuter": 2,
"paddingInner":2
}
]
},
{
"name": "nodes",
"source": "tree",
"transform": [{ "type": "filter", "expr": "datum.children" }]
},
{
"name": "leaves",
"source": "tree",
"transform": [{ "type": "filter", "expr": "!datum.children" },
{"type": "filter", "expr": "datum.percentage > 0"}]
}
],
"scales": [
{
"name": "color",
"type": "ordinal",
"domain": {"data": "nodes", "field": "name"},
"range": [
"transparent", "#dd96ba", "#dea84e", "#c83836", "#dfde9b",
"#5eafb9", "#adc35d"]
},
{
"name": "size",
"type": "ordinal",
"domain": [0, 1, 2, 3],
"range": [256, 28, 20, 14]
},
{
"name": "opacity",
"type": "ordinal",
"domain": [0, 1, 2, 3],
"range": [0.15, 0.5, 0.8, 1.0]
}
],
"marks": [
{
"type": "rect",
"from": {"data": "nodes"},
"interactive": false,
"encode": {
"enter": {
"fill": {"value":"#333238"},
"stroke": {"scale": "color", "field": "name"},
"strokeWidth":{"value": 5}
},
"update": {
"x": {"field": "x0"},
"y": {"field": "y0"},
"x2": {"field": "x1"},
"y2": {"field": "y1"},
"stroke": {"scale": "color", "field": "name"}
}
}
},
{
"type": "rect",
"from": {"data": "leaves"},
"encode": {
"enter": {
"stroke": {"value": "#fff"}
},
"update": {
"x": {"field": "x0"},
"y": {"field": "y0"},
"x2": {"field": "x1"},
"y2": {"field": "y1"},
"fill": {"value": "transparent"}
},
"hover": {
"fill": {"value": "red"}
}
}
},
{
"type": "text",
"from": {"data": "nodes"},
"interactive": false,
"encode": {
"enter": {
"font": {"value": "Helvetica Neue, Arial"},
"align": {"value": "center"},
"baseline": {"value": "middle"},
"fill": {"scale": "color", "field": "name"},
"text": {"field": "name"},
"fontSize": {"scale": "size", "field": "depth"}
},
"update": {
"x": {"signal": "0.5 * (datum.x0 + datum.x1)"},
"y": {"signal": "0.5 * (datum.y0 + datum.y1)"}
}
}
}
]
}
There is doc available https://vega.github.io/vega/docs/transforms/treemap/ here saying that I can access to what I want, but I couldnt manage to apply it to the specs.
It seems like it's not currently possible in Vega. There's this PR that hasn't been merged yet.
But you can work around that by manually aggregating the values and then looking them up. Here a gist in which there's now a total field for each node, the relevant part being:
{
"name": "leaves",
"source": "tree",
"transform": [
{"type": "filter", "expr": "!datum.children"},
{"type": "filter", "expr": "datum.percentage > 0.3"}
]
},
{
"name": "totals",
"source": "leaves",
"transform": [
{
"type": "aggregate",
"groupby": ["parent"],
"fields": ["percentage"],
"as": ["total"],
"ops": ["sum"]
}
]
},
{
"name": "nodes",
"source": "tree",
"transform": [
{"type": "filter", "expr": "datum.children"},
{
"type": "lookup",
"from": "totals",
"key": "parent",
"fields": ["id"],
"values": ["total"],
"as": ["total"]
}
]
},
Basically you just get sums of all the leaves by parent in totals and then, after constructing the base nodes dataset, look the total up in totals.
Note that this will only work for this particular example, where there are exactly two levels in the hierarchy.

How to add vertical rules as new layer and same x-axis?

When add the strips as a new layer (in the 2-layers chart), stops to work: there are no visualization and a "WARN Cannot project a selection on encoding channel "y", which has no field".
The first two layer-definitions bellow was working fine when only two lines.
vglSpec.push(['#vis2a',{
$schema: vglVers,
data: {"url":"MyDataset1"},
// old "encoding": { x: {"field": "instant", "type": "temporal"} }
width:680,
layer: [
{
"mark": {"stroke": "#68C", "type": "line", "point": true},
"encoding": { x: {"field": "instant", "type": "temporal"}, "y": {
"field": "n_count",
"type": "quantitative"
}},
"selection": {"grid": {"type":"interval", "bind":"scales"}} //zoom
},
{
"mark": {"stroke": "red", "type": "line", "strokeOpacity": 0.4},
"encoding": { x: {"field": "instant", "type": "temporal"}, "y": {
"field": "instant_totmin",
"type": "quantitative"
}}
},
{
"mark": "rule",
"data": {"url":"MyDataset2"}, // little subset of instant of Dataset1
"encoding": {
"x": { "field": "instant", "type": "temporal"},
"color": {"value": "yellow"},
"size": {"value": 5}
},
//resolve:? x is same axis and the only visualization field
}
],
resolve: {"scale": {"y": "independent"}}
}]);
PS: only removed names and titles, all real script.
Emulating with dummy data: working fine!
Please click on the 3rd example of rule guide... And replace or adapt it for this VEGA-lite script:
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {"url": "data/movies.json"},
"layer": [
{
"mark": "bar",
"encoding": {
"x": {"bin": true, "field": "IMDB_Rating", "type": "quantitative"},
"y": {"aggregate": "count", "type": "quantitative"}
}
},
{
"mark": "rule",
"data": {"values": [{"IMDB_Rating":3.5},{"IMDB_Rating":7.8}]},
"encoding": {
"x": { "field": "IMDB_Rating","type": "quantitative" },
"color": {"value": "yellow"},
"size": {"value": 4}
}
}
]
}
You're using an independent y-scale, and the y-scale of a rule mark with no y encoding is not well defined. The best way to address this is probably to combine the rule mark with one of the other layers, so it can use that y scale:
vglSpec.push(['#vis2a',{
$schema: vglVers,
data: {"url":"MyDataset1"},
// old "encoding": { x: {"field": "instant", "type": "temporal"} }
width:680,
layer: [
{
"mark": {"stroke": "#68C", "type": "line", "point": true},
"encoding": { x: {"field": "instant", "type": "temporal"}, "y": {
"field": "n_count",
"type": "quantitative"
}},
"selection": {"grid": {"type":"interval", "bind":"scales"}} //zoom
},
{
layer: [
{
"mark": {"stroke": "red", "type": "line", "strokeOpacity": 0.4},
"encoding": { x: {"field": "instant", "type": "temporal"}, "y": {
"field": "instant_totmin",
"type": "quantitative"
}}
},
{
"mark": "rule",
"data": {"url":"MyDataset2"}, // little subset of instant of Dataset1
"encoding": {
"x": { "field": "instant", "type": "temporal"},
"color": {"value": "yellow"},
"size": {"value": 5}
},
//resolve:? x is same axis and the only visualization field
}
]
}
],
resolve: {"scale": {"y": "independent"}}
}]);
(note, I've not actually tried this solution because you didn't include data in your question, but the approach should work).

Vega Lite Independent Scale with Multiple Layers and Facet

Is it possible to have an independent scale for each facet and each layer? The resolve works great when you have either a facet or an extra layer, but I cannot get it to do both, wondering if it is even possible.
What I want is:
The two scales on each side
mixed with
the faceting here
The way this would be expressed in Vega-Lite is using a layer, with resolve set, within a facet. Something like this:
{
"data": {
"url": "https://vega.github.io/vega-datasets/data/seattle-weather.csv"
},
"facet": {
"column": {
"field": "weather",
"type": "nominal"
}
},
"spec": {
"layer": [
{
"encoding": {
"x": {
"field": "date",
"timeUnit": "month",
"type": "temporal"
},
"y": {
"aggregate": "mean",
"field": "temp_max",
"type": "quantitative"
}
},
"mark": {
"color": "salmon",
"type": "line"
}
},
{
"encoding": {
"x": {
"field": "date",
"timeUnit": "month",
"type": "temporal"
},
"y": {
"aggregate": "mean",
"field": "precipitation",
"type": "quantitative"
}
},
"mark": {
"color": "steelblue",
"type": "line"
}
}
],
"resolve": {
"scale": {
"y": "independent"
}
}
}
}
While this spec is valid according to the Vega-Lite schema, there is unfortunately a bug in the vega-lite renderer that makes it unable to render this spec.
As a workaround, you can manually concatenate two layered charts with a filter transform that selects the desired subset of data for each. For example:
{
"data": {
"url": "https://vega.github.io/vega-datasets/data/seattle-weather.csv"
},
"hconcat": [
{
"layer": [
{
"mark": {"type": "line", "color": "salmon"},
"encoding": {
"x": {"type": "temporal", "field": "date", "timeUnit": "month"},
"y": {
"type": "quantitative",
"aggregate": "mean",
"field": "temp_max"
}
}
},
{
"mark": {"type": "line", "color": "steelblue"},
"encoding": {
"x": {"type": "temporal", "field": "date", "timeUnit": "month"},
"y": {
"type": "quantitative",
"aggregate": "mean",
"field": "precipitation"
}
}
}
],
"resolve": {"scale": {"y": "independent", "x": "shared"}},
"transform": [{"filter": "(datum.weather === 'sun')"}]
},
{
"layer": [
{
"mark": {"type": "line", "color": "salmon"},
"encoding": {
"x": {"type": "temporal", "field": "date", "timeUnit": "month"},
"y": {
"type": "quantitative",
"aggregate": "mean",
"field": "temp_max"
}
}
},
{
"mark": {"type": "line", "color": "steelblue"},
"encoding": {
"x": {"type": "temporal", "field": "date", "timeUnit": "month"},
"y": {
"type": "quantitative",
"aggregate": "mean",
"field": "precipitation"
}
}
}
],
"resolve": {"scale": {"y": "independent", "x": "shared"}},
"transform": [{"filter": "(datum.weather === 'fog')"}]
}
],
"$schema": "https://vega.github.io/schema/vega-lite/v2.6.0.json"
}

How to plot several variables on an axis with Vega-Lite?

Following Vega-Lite's Seattle weather tutorial, it was easy to plot avg min temperature by month:
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"data": {
"url": "https://vega.github.io/vega-lite/data/seattle-weather.csv"
},
"mark": "line",
"encoding": {
"x": {
"timeUnit": "month",
"field": "date",
"type": "temporal"
},
"y": {
"aggregate": "mean",
"field": "temp_min",
"type": "quantitative"
}
}
}
This dataset also has temp_max variable. How can I plot both temp_min and temp_max on y-axis?
You can use layering as described at https://vega.github.io/vega-lite/docs/layer.html.
{
"data": {"url": "data/seattle-weather.csv"},
"layer": [
{
"mark": "line",
"encoding": {
"x": {
"timeUnit": "month",
"field": "date",
"type": "temporal"
},
"y": {
"aggregate": "mean",
"field": "temp_min",
"type": "quantitative"
}
}
},
{
"mark": "line",
"encoding": {
"x": {
"timeUnit": "month",
"field": "date",
"type": "temporal"
},
"y": {
"aggregate": "mean",
"field": "temp_max",
"type": "quantitative"
}
}
}
]
}