Optimize wikidata query to reduce time. SPARQL - sparql

I am getting timeout on below query.
Can anyone please help me to optimize it?
SELECT ?human ?humanLabel ?alternative ?humanDescription ?birth_date ?image ?occupationLabel ?nationalityLabel ?sex_or_genderLabel ?date_of_death WHERE {
?human wdt:P31 wd:Q5.
?human wdt:P18 ?image.
?human p:P569/psv:P569 ?birth_date_node.
?birth_date_node wikibase:timeValue ?birth_date.
optional {?human wdt:P106 ?occupation.}
optional {?human wdt:P27 ?nationality.}
OPTIONAL {?human wdt:P21 ?sex_or_gender.}
OPTIONAL {?human wdt:P570 ?date_of_death.}
OPTIONAL {?human skos:altLabel ?alternative.}
FILTER (year(?birth_date) >= 1981).
FILTER (year(?birth_date) < 1982).
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}
ORDER BY ?name

Related

Wikidata/SPARQL, get distinct items (object id) in result

When I run the following query I get multiple rows of "Paul Gauguin" since there are multiple informations about his place/time of death, the same could of course happen on all other parameters as well.
SELECT DISTINCT ?item ?itemLabel ?itemDescription ?birthplaceLabel ?birthdate ?deathplaceLabel ?deathdate ?imageLabel ?article ?articleEn
{
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace.
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]". }
}
Is there a way of only recive one of the same object id, and don't care about if there are other "versions" of the object.
I have tried a bit with nested queries but I can't get it to work. Are there som other ways?
This query will work:
SELECT DISTINCT ?item ?itemLabel ?itemDescription
(SAMPLE(?birthplaceLabel) AS ?birthplaceLabel)
(SAMPLE(?birthdate) AS ?birthdate)
(SAMPLE(?deathplaceLabel) AS ?deathplaceLabel)
(SAMPLE(?deathdate) AS ?deathdate)
(SAMPLE(STR(?image)) AS ?image)
?article ?articleEn
WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace .
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label { ?birthplace rdfs:label ?birthplaceLabel .
?deathplace rdfs:label ?deathplaceLabel .
?item rdfs:label ?itemLabel ;
schema:description ?itemDescription .
bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]". }
}
GROUP BY ?item ?itemLabel ?itemDescription ?article ?articleEn
As you can see, we use the SAMPLE function and group by all the variables that aren't sampled.
You could also replace SAMPLE with GROUP_CONCAT if you wish to see all the possible values taken by a property.
E.g. ... (GROUP_CONCAT(?deathdate; SEPARATOR="; ") AS ?deathdates) ...
Note that GROUP_CONCAT takes strings as arguments.
Found the solution with a bit of modyfing of Valerio Cocchis answer.
SELECT ?item ?itemLabel ?itemDescription
(SAMPLE(?birthplaceLabel) AS ?birthplaceLabel)
(SAMPLE(?birthdate) AS ?birthdate)
(SAMPLE(?deathplaceLabel) AS ?deathplaceLabel)
(SAMPLE(?deathdate) AS ?deathdate)
(SAMPLE(?imageLabel) AS ?imageLabel)
?article ?articleEn
WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace.
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]".
?birthplace rdfs:label ?birthplaceLabel .
?deathplace rdfs:label ?deathplaceLabel .
?image rdfs:label ?imageLabel .
?item rdfs:label ?itemLabel .
?item schema:description ?itemDescription .
}
}
GROUP BY ?item ?itemLabel ?itemDescription ?article ?articleEn

I used MAX here to pick the maximum of the net worth of the billionaires, but it seems to not work in cases like, Bill Gates, Elon Musk etc

SELECT DISTINCT ?item ?itemLabel ?itemDescription (MAX(?billion) as ?billions) ?locationLabel (SAMPLE(?image) AS ?picture) ?page_titleEN
WHERE
{
?item wdt:P2218 ?worth.
OPTIONAL {?item wdt:P19 ?location .}
FILTER(?worth>1000000000).
BIND(?worth/1000000000 AS ?billion).
OPTIONAL {
?article schema:about ?item;
schema:isPartOf <https://en.wikipedia.org/>;
schema:name ?page_titleEN.
?item rdfs:label ?LabelEN.
FILTER((LANG(?LabelEN)) = "en")
}
OPTIONAL { ?item wdt:P18 ?image. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?item ?itemLabel ?itemDescription ?billions ?locationLabel ?picture ?page_titleEN
ORDER BY DESC(?billions)

SPARQL Query: A sets of breeds cat along with the cats that belong to

I am trying to query all the cats breeds (wd:Q43577) along with the list of cats that belong to. Below, you will find my solution. The result of my solution is not relevant and contains a redundancy. Therefore, I need your help to find the best solution. (to execute this query, you have to use this link).
SELECT ?CatsBreedsID ?CatsBreedsName ?CategoryID ?CategoryName ?CatsID ?CatsName where {
?CatsBreedsID wdt:P31 wd:Q43577.
OPTIONAL{
?CatsBreedsID wdt:P279 ?CategoryID.
?CatsID wdt:P31 ?CategoryID.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?CatsBreedsID rdfs:label ?CatsBreedsName .
?CategoryID rdfs:label ?CategoryName .
?CatsID rdfs:label ?CatsName.
}
}
You should use the GROUP_CONCAT function if I understand your question correctly.
Try a query like this:
SELECT ?CatsBreedsID ?CatsBreedsName ?CategoryID ?CategoryName
(GROUP_CONCAT(DISTINCT ?CatsID; SEPARATOR=', ') AS ?CatsID_List)
(GROUP_CONCAT(DISTINCT ?CatsName; SEPARATOR=', ') AS ?CatsName_List)
WHERE {
?CatsBreedsID wdt:P31 wd:Q43577.
OPTIONAL{
?CatsBreedsID wdt:P279 ?CategoryID.
?CatsID wdt:P31 ?CategoryID.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?CatsBreedsID rdfs:label ?CatsBreedsName .
?CategoryID rdfs:label ?CategoryName .
?CatsID rdfs:label ?CatsName.
}
} GROUP BY ?CatsBreedsID ?CatsBreedsName ?CategoryID ?CategoryName

Get properties by QID?

I can get item and its properties by label:
SELECT distinct ?item ?itemLabel ?itemDescription
(SAMPLE(?DR) as ?DR) (SAMPLE(?article)as ?article)
WHERE {
?item wdt:P31 wd:Q5.
?item ?label "Einstein"#en
OPTIONAL{?item wdt:P569 ?DR .}
?article schema:about ?item .
?article schema:inLanguage "en" .
?article schema:isPartOf <https://en.wikipedia.org/>.
OPTIONAL{?item wdt:P570 ?RIP .}
OPTIONAL{?item wdt:P18 ?image .}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?item ?itemLabel ?itemDescription
See on Wikidata Query Services.
How can I do the same using QID instead label?
Using the URI instead of the variable ?item will get the information based on the entity Albert Einstein:
PREFIX schema: <http://schema.org/>
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
SELECT DISTINCT ?item ?itemLabel ?itemDescription (SAMPLE(?DR) AS ?DRSample) (SAMPLE(?article) AS ?articleSample)
WHERE
{ ?article schema:about ?item ;
schema:inLanguage "en" ;
schema:isPartOf <https://en.wikipedia.org/>
FILTER ( ?item = <http://www.wikidata.org/entity/Q937> )
OPTIONAL
{ ?item wdt:P569 ?DR }
OPTIONAL
{ ?item wdt:P570 ?RIP }
OPTIONAL
{ ?item wdt:P18 ?image }
SERVICE wikibase:label
{ bd:serviceParam
wikibase:language "en"
}
}
GROUP BY ?item ?itemLabel ?itemDescription
You can utilize the already known QID by using BIND:
BIND(wd:Q937 AS ?item).
...
If you already have the QID of the entity you are looking for and simply look for its properties and labels, you're better off using the Wikidata API wbgetentities module
In A. Einstein (Q937) case, that would give the following API call:
https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q937&format=json

How to retrieve aliases from wikidata

I'm trying to retrieve some information from Wikidata and I have found interesting to collect the aliases of the voices. For examples Francesco Totti is also known as il Capitano or er Pupone :
I'm trying to retrieve all the serie a's football players with this sparql query:
SELECT ?subject ?nomeLabel ?cognomeLabel ?subjectLabel WHERE {
?subject wdt:P31 wd:Q5.
?subject p:P54 ?team .
?team ps:P54 wd:""" + team_code +""" .
FILTER NOT EXISTS { ?team pq:P582 ?end
}
OPTIONAL{
?subject wdt:P735 ?nome .
?subject wdt:P734 ?cognome .
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "it". }
}
ORDER BY (?cognomeLabel)
How I can modify my query to take also the aliases?
Thanks
I have attempted a query with various labels. Here just for Roma:
SELECT distinct ?subject ?subjectLabel ?nomeLabel ?cognomeLabel ?nickname ?alternative ?subjectAltLabel WHERE {
?subject wdt:P31 wd:Q5.
?subject p:P54 ?team .
?team ps:P54 wd:Q2739 .
FILTER NOT EXISTS { ?team pq:P582 ?end . }
OPTIONAL { ?subject wdt:P735 ?nome . }
OPTIONAL { ?subject wdt:P734 ?cognome . }
OPTIONAL { ?subject wdt:P1449 ?nickname . }
OPTIONAL { ?subject skos:altLabel ?alternative . }
SERVICE wikibase:label { bd:serviceParam wikibase:language "it,en,fr". }
}
ORDER BY (?cognomeLabel)
I believe the P1449 property should be the most appropriate property to store an alias/nickname, but it does not seem to be used that much for football players. I just added "il Capitano" to Francesco Totti. Beyond that one there does not seem to be other nicknames for Roma players.
The "Also known as" label (in the right column) is not necessarily the nickname, but may be a spelling variation.
Something more generic if someone is interested in all properties that will return only the english also known as:
SELECT ?property ?propertyLabel ?propertyDescription (GROUP_CONCAT(DISTINCT(?altLabel); separator = ", ") AS ?altLabel_list) WHERE {
?property a wikibase:Property .
OPTIONAL { ?property skos:altLabel ?altLabel . FILTER (lang(?altLabel) = "en") }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" .}
}
GROUP BY ?property ?propertyLabel ?propertyDescription
LIMIT 5000
Another more simple example for male actors with itemAltLabel :
#Male Actors
SELECT ?item ?itemLabel ?itemAltLabel
WHERE
{
?item wdt:P21 wd:Q6581097.
?item wdt:P106 wd:Q33999.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}