Wikidata sort cities in a given state by population - sparql

I'm looking to recreate this list of cities in Texas by population using wikidata.
I see I can do states by population with this query:
SELECT DISTINCT ?state ?stateLabel ?population
{
?state wdt:P31 wd:Q35657 ;
wdt:P1082 ?population .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
GROUP BY ?state ?population ?stateLabel
ORDER BY DESC(?population)
And the id for Texas is wd:Q1439
So I have tried the following:
SELECT ?country ?countryLabel ?state ?stateLabel ?city ?cityLabel ?population
WHERE
{
# ?state wdt:P31 wd:Q35657 . # Give me an american state
?state wdt:P31 wd:Q1439 . # that state is is Texas
?city wdt:P31 wd:Q515 . # it is an instance of city
?city wdt:P17 ?country. # get the country (for double-checking)
?city wdt:P361 ?state. # get the state it belongs to
?city wdt:P1082 ?population . # get the population of the city
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
ORDER BY DESC(?population) limit 68
And no matches. What's the right query for this?
Update
This returns data, but is incorrect. It misses cities like Houston, San Antonio, etc.
SELECT ?mun ?munLabel ?population WHERE {
{
SELECT distinct ?mun ?population WHERE {
values ?habitation {
wd:Q3957
wd:Q515
wd:Q15284
}
?mun (wdt:P31/(wdt:P279*)) ?habitation;
wdt:P131 wd:Q1439;
# wdt:P625 ?loc;
wdt:P1082 ?population .
}
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
}
}
ORDER BY DESC(?population)

The issue is that Houston and San Antonio's locations are listed as Harris and Bexar county respectively, and the counties are located in Texas. If you try this query it should work:
SELECT ?mun ?munLabel ?population WHERE {
{
SELECT distinct ?mun ?population WHERE {
values ?habitation {
wd:Q3957
wd:Q515
wd:Q15284
}
?mun (wdt:P31/(wdt:P279*)) ?habitation;
wdt:P131+ wd:Q1439; #Add '+' here for transitivity
# wdt:P625 ?loc;
wdt:P1082 ?population .
}
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
}
}
ORDER BY DESC(?population)
The trick is to add + next to wdt:P131, which translates the query from "look for exactly one wdt:P131 edge" to "look for one or more wdt:P131 edges".
This takes care of the issue because Harris and Bexar counties are themselves listed as being located in Texas.

Related

SPARQL query timing out for WIkidata

I am trying to run a SPARQL query. As soon as I try to query schema:about for a query , the query starts timing out. I am trying to write a query to get all cities with population greater than X. Am I doing something wrong or is there a way to optimise it?
Original query which is working
SELECT DISTINCT ?city ?cityLabel ?population ?country ?countryLabel ?loc WHERE {
?city wdt:P31/wdt:P279* wd:Q515 .
?city wdt:P1082 ?population .
?city wdt:P17 ?country .
?city wdt:P625 ?loc .
FILTER (?population >= 1000000) .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}
ORDER BY DESC(?population)
Query to get about article:
SELECT DISTINCT ?city ?cityLabel ?population ?country ?countryLabel ?loc ?article WHERE {
?city wdt:P31/wdt:P279* wd:Q515 .
?city wdt:P1082 ?population .
?city wdt:P17 ?country .
?city wdt:P625 ?loc .
FILTER (?population >= 1000000) .
?article schema:about ?city .
?article schema:isPartOf <https://en.wikipedia.org/>.
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}
ORDER BY DESC(?population)
Here is the link where I am trying to run the query: https://query.wikidata.org/

Wikidata SPARQL query to return only most recent population

I have this query that returns each value of a POPULATION (property) for each POINT IN TIME (property), but I only want the most recent population for each city. Sorry for the extraneous query items.
SELECT ?cityLabel ?gps ?population ?date (SAMPLE(?areacode) as ?areacode_sample) (SAMPLE(?image) as ?image_sample)
WITH {
SELECT DISTINCT *
WHERE {
?city wdt:P31/wdt:P279* wd:Q515 .
?city wdt:P17 wd:Q30 .
?city wdt:P625 ?gps .
?city p:P1082 ?statement .
?statement ps:P1082 ?population .
?statement pq:P585 ?date .
OPTIONAL {?city wdt:P473 ?areacode .
}
OPTIONAL {?city wdt:P18 ?image .
}
}
ORDER BY DESC(?population)
} AS %i
WHERE {
INCLUDE %i
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
GROUP BY ?cityLabel ?gps ?population ?date
ORDER BY DESC(?population)
LIMIT 750

Query for EU universities from Wikidata times out

Asking the Wikidata service for all universities works. But when restricted to only EU universities, this query times out.
WHERE {
?item wdt:P31/wdt:P279* wd:Q3918 ;
wdt:P17 ?country.
?country wdt:P463 wd:Q458 .
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
Any idea?
Strangely, using FILTER EXISTS is returns the result (although quite slow).
SELECT ?item ?itemLabel ?countryLabel ?country
WHERE {
?item wdt:P31/wdt:P279* wd:Q3918 ;
wdt:P17 ?country.
FILTER EXISTS { ?country wdt:P463 wd:Q458 }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
Here's a link to the second query.

SPARQL Query: A sets of breeds cat along with the cats that belong to

I am trying to query all the cats breeds (wd:Q43577) along with the list of cats that belong to. Below, you will find my solution. The result of my solution is not relevant and contains a redundancy. Therefore, I need your help to find the best solution. (to execute this query, you have to use this link).
SELECT ?CatsBreedsID ?CatsBreedsName ?CategoryID ?CategoryName ?CatsID ?CatsName where {
?CatsBreedsID wdt:P31 wd:Q43577.
OPTIONAL{
?CatsBreedsID wdt:P279 ?CategoryID.
?CatsID wdt:P31 ?CategoryID.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?CatsBreedsID rdfs:label ?CatsBreedsName .
?CategoryID rdfs:label ?CategoryName .
?CatsID rdfs:label ?CatsName.
}
}
You should use the GROUP_CONCAT function if I understand your question correctly.
Try a query like this:
SELECT ?CatsBreedsID ?CatsBreedsName ?CategoryID ?CategoryName
(GROUP_CONCAT(DISTINCT ?CatsID; SEPARATOR=', ') AS ?CatsID_List)
(GROUP_CONCAT(DISTINCT ?CatsName; SEPARATOR=', ') AS ?CatsName_List)
WHERE {
?CatsBreedsID wdt:P31 wd:Q43577.
OPTIONAL{
?CatsBreedsID wdt:P279 ?CategoryID.
?CatsID wdt:P31 ?CategoryID.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?CatsBreedsID rdfs:label ?CatsBreedsName .
?CategoryID rdfs:label ?CategoryName .
?CatsID rdfs:label ?CatsName.
}
} GROUP BY ?CatsBreedsID ?CatsBreedsName ?CategoryID ?CategoryName

Wikidata Query Names of city in all languages

I query a citiy with its population from wikidata. With the language service i get he label in the given language.
But i would like to get the cityname in all languages or at least in multiple languages at once . I tried to pass * as language but i don't get all citynames returned.
Do I have to make the query for each language once ?
This is my query
SELECT DISTINCT ?city ?cityLabel ?population
WHERE
{
?city wdt:P31/wdt:P279* wd:Q515 .
?city wdt:P1566 "2950157" .
?city wdt:P1082 ?population .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}
I also tried to just set to languages like this
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
bd:serviceparam wikibase:language "de" .
}
but it returns
Unknown error: Expected a variable in the object position to which to
bind the language.
The error is telling you that you have to bind the German tag to a variable (?cityGe) in the selection process.
SELECT DISTINCT ?city ?cityLabel ?cityGe ?population
WHERE
{
?city wdt:P31/wdt:P279* wd:Q515 .
?city wdt:P1566 "2950157" .
?city wdt:P1082 ?population .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
bd:serviceParam wikibase:language "de" .
}
}
However, this does not solve your problem because this is only a fallback mechanism. If it cannot find English then it gives you German label.There are some examples here.
This can be simplified as follows with only one variable:
SERVICE wikibase:label { bd:serviceParam wikibase:language "en,de" }
However, as AKSW points out, you can use rdfs:label for your problem:
SELECT DISTINCT ?city ?label ?population
WHERE
{
?city wdt:P31/wdt:P279* wd:Q515 .
?city wdt:P1566 "2950157" .
?city wdt:P1082 ?population .
?city rdfs:label ?label
filter(lang(?label) = 'de' || lang(?label) = 'en')
}