How do I create a Wikidata SPARQL query to return the top 10 US companies and their directors based on revenue? - sparql

Here is the query I am working with:
SELECT DISTINCT ?item ?itemLabel ?rev ?date (group_concat(distinct ?directorLabel ; separator = ", ") as ?directors)
WHERE {
?item wdt:P31/wdt:P279* wd:Q4830453 ;
wdt:P17 wd:Q30 .
?item p:P2139 ?revSt .
?revSt pq:P585 ?date.
FILTER ( YEAR(?date) = 2020 ).
?revSt ps:P2139 ?rev .
?item p:P3320 ?relationship.
?relationship ps:P3320 ?director.
#Filter Director by no end date
FILTER NOT EXISTS {?relationship pq:P582 ?end.}
#Group directors
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
?director rdfs:label ?directorLabel .
?item rdfs:label ?itemLabel .
}
}
GROUP BY ?item ?itemLabel
ORDER BY DESC(?rev)
LIMIT 10
This query is a bad aggregate, if I add more variables to the GROUP BY clause it times out. Ultimately I would like to aggregate the wdt ids for all the directors too.

SELECT DISTINCT
?item ?itemLabel ?rev ?date
(group_concat(distinct ?directorLabel ; separator = ", ") as ?directors)
WHERE {
?item wdt:P31/wdt:P279* wd:Q4830453 ;
wdt:P17 wd:Q30 .
?item p:P2139 ?revSt .
?revSt pq:P585 ?date.
hint:Prior hint:rangeSafe "true" .
FILTER ( YEAR(?date) = 2020 ).
?revSt ps:P2139 ?rev .
?item p:P3320 ?relationship.
?relationship ps:P3320 ?director.
FILTER NOT EXISTS {?relationship pq:P582 ?end.}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
?director rdfs:label ?directorLabel .
?item rdfs:label ?itemLabel .
}
}
GROUP BY ?item ?itemLabel ?rev ?date
ORDER BY DESC(?rev)
LIMIT 10

Related

SPARQL query timing out for WIkidata

I am trying to run a SPARQL query. As soon as I try to query schema:about for a query , the query starts timing out. I am trying to write a query to get all cities with population greater than X. Am I doing something wrong or is there a way to optimise it?
Original query which is working
SELECT DISTINCT ?city ?cityLabel ?population ?country ?countryLabel ?loc WHERE {
?city wdt:P31/wdt:P279* wd:Q515 .
?city wdt:P1082 ?population .
?city wdt:P17 ?country .
?city wdt:P625 ?loc .
FILTER (?population >= 1000000) .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}
ORDER BY DESC(?population)
Query to get about article:
SELECT DISTINCT ?city ?cityLabel ?population ?country ?countryLabel ?loc ?article WHERE {
?city wdt:P31/wdt:P279* wd:Q515 .
?city wdt:P1082 ?population .
?city wdt:P17 ?country .
?city wdt:P625 ?loc .
FILTER (?population >= 1000000) .
?article schema:about ?city .
?article schema:isPartOf <https://en.wikipedia.org/>.
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}
ORDER BY DESC(?population)
Here is the link where I am trying to run the query: https://query.wikidata.org/

Wikidata SPARQL query to return only most recent population

I have this query that returns each value of a POPULATION (property) for each POINT IN TIME (property), but I only want the most recent population for each city. Sorry for the extraneous query items.
SELECT ?cityLabel ?gps ?population ?date (SAMPLE(?areacode) as ?areacode_sample) (SAMPLE(?image) as ?image_sample)
WITH {
SELECT DISTINCT *
WHERE {
?city wdt:P31/wdt:P279* wd:Q515 .
?city wdt:P17 wd:Q30 .
?city wdt:P625 ?gps .
?city p:P1082 ?statement .
?statement ps:P1082 ?population .
?statement pq:P585 ?date .
OPTIONAL {?city wdt:P473 ?areacode .
}
OPTIONAL {?city wdt:P18 ?image .
}
}
ORDER BY DESC(?population)
} AS %i
WHERE {
INCLUDE %i
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
GROUP BY ?cityLabel ?gps ?population ?date
ORDER BY DESC(?population)
LIMIT 750

I used MAX here to pick the maximum of the net worth of the billionaires, but it seems to not work in cases like, Bill Gates, Elon Musk etc

SELECT DISTINCT ?item ?itemLabel ?itemDescription (MAX(?billion) as ?billions) ?locationLabel (SAMPLE(?image) AS ?picture) ?page_titleEN
WHERE
{
?item wdt:P2218 ?worth.
OPTIONAL {?item wdt:P19 ?location .}
FILTER(?worth>1000000000).
BIND(?worth/1000000000 AS ?billion).
OPTIONAL {
?article schema:about ?item;
schema:isPartOf <https://en.wikipedia.org/>;
schema:name ?page_titleEN.
?item rdfs:label ?LabelEN.
FILTER((LANG(?LabelEN)) = "en")
}
OPTIONAL { ?item wdt:P18 ?image. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?item ?itemLabel ?itemDescription ?billions ?locationLabel ?picture ?page_titleEN
ORDER BY DESC(?billions)

SPARQL Query: A sets of breeds cat along with the cats that belong to

I am trying to query all the cats breeds (wd:Q43577) along with the list of cats that belong to. Below, you will find my solution. The result of my solution is not relevant and contains a redundancy. Therefore, I need your help to find the best solution. (to execute this query, you have to use this link).
SELECT ?CatsBreedsID ?CatsBreedsName ?CategoryID ?CategoryName ?CatsID ?CatsName where {
?CatsBreedsID wdt:P31 wd:Q43577.
OPTIONAL{
?CatsBreedsID wdt:P279 ?CategoryID.
?CatsID wdt:P31 ?CategoryID.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?CatsBreedsID rdfs:label ?CatsBreedsName .
?CategoryID rdfs:label ?CategoryName .
?CatsID rdfs:label ?CatsName.
}
}
You should use the GROUP_CONCAT function if I understand your question correctly.
Try a query like this:
SELECT ?CatsBreedsID ?CatsBreedsName ?CategoryID ?CategoryName
(GROUP_CONCAT(DISTINCT ?CatsID; SEPARATOR=', ') AS ?CatsID_List)
(GROUP_CONCAT(DISTINCT ?CatsName; SEPARATOR=', ') AS ?CatsName_List)
WHERE {
?CatsBreedsID wdt:P31 wd:Q43577.
OPTIONAL{
?CatsBreedsID wdt:P279 ?CategoryID.
?CatsID wdt:P31 ?CategoryID.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?CatsBreedsID rdfs:label ?CatsBreedsName .
?CategoryID rdfs:label ?CategoryName .
?CatsID rdfs:label ?CatsName.
}
} GROUP BY ?CatsBreedsID ?CatsBreedsName ?CategoryID ?CategoryName

Get properties by QID?

I can get item and its properties by label:
SELECT distinct ?item ?itemLabel ?itemDescription
(SAMPLE(?DR) as ?DR) (SAMPLE(?article)as ?article)
WHERE {
?item wdt:P31 wd:Q5.
?item ?label "Einstein"#en
OPTIONAL{?item wdt:P569 ?DR .}
?article schema:about ?item .
?article schema:inLanguage "en" .
?article schema:isPartOf <https://en.wikipedia.org/>.
OPTIONAL{?item wdt:P570 ?RIP .}
OPTIONAL{?item wdt:P18 ?image .}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?item ?itemLabel ?itemDescription
See on Wikidata Query Services.
How can I do the same using QID instead label?
Using the URI instead of the variable ?item will get the information based on the entity Albert Einstein:
PREFIX schema: <http://schema.org/>
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
SELECT DISTINCT ?item ?itemLabel ?itemDescription (SAMPLE(?DR) AS ?DRSample) (SAMPLE(?article) AS ?articleSample)
WHERE
{ ?article schema:about ?item ;
schema:inLanguage "en" ;
schema:isPartOf <https://en.wikipedia.org/>
FILTER ( ?item = <http://www.wikidata.org/entity/Q937> )
OPTIONAL
{ ?item wdt:P569 ?DR }
OPTIONAL
{ ?item wdt:P570 ?RIP }
OPTIONAL
{ ?item wdt:P18 ?image }
SERVICE wikibase:label
{ bd:serviceParam
wikibase:language "en"
}
}
GROUP BY ?item ?itemLabel ?itemDescription
You can utilize the already known QID by using BIND:
BIND(wd:Q937 AS ?item).
...
If you already have the QID of the entity you are looking for and simply look for its properties and labels, you're better off using the Wikidata API wbgetentities module
In A. Einstein (Q937) case, that would give the following API call:
https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q937&format=json