Wikidata/SPARQL, get distinct items (object id) in result - sparql

When I run the following query I get multiple rows of "Paul Gauguin" since there are multiple informations about his place/time of death, the same could of course happen on all other parameters as well.
SELECT DISTINCT ?item ?itemLabel ?itemDescription ?birthplaceLabel ?birthdate ?deathplaceLabel ?deathdate ?imageLabel ?article ?articleEn
{
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace.
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]". }
}
Is there a way of only recive one of the same object id, and don't care about if there are other "versions" of the object.
I have tried a bit with nested queries but I can't get it to work. Are there som other ways?

This query will work:
SELECT DISTINCT ?item ?itemLabel ?itemDescription
(SAMPLE(?birthplaceLabel) AS ?birthplaceLabel)
(SAMPLE(?birthdate) AS ?birthdate)
(SAMPLE(?deathplaceLabel) AS ?deathplaceLabel)
(SAMPLE(?deathdate) AS ?deathdate)
(SAMPLE(STR(?image)) AS ?image)
?article ?articleEn
WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace .
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label { ?birthplace rdfs:label ?birthplaceLabel .
?deathplace rdfs:label ?deathplaceLabel .
?item rdfs:label ?itemLabel ;
schema:description ?itemDescription .
bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]". }
}
GROUP BY ?item ?itemLabel ?itemDescription ?article ?articleEn
As you can see, we use the SAMPLE function and group by all the variables that aren't sampled.
You could also replace SAMPLE with GROUP_CONCAT if you wish to see all the possible values taken by a property.
E.g. ... (GROUP_CONCAT(?deathdate; SEPARATOR="; ") AS ?deathdates) ...
Note that GROUP_CONCAT takes strings as arguments.

Found the solution with a bit of modyfing of Valerio Cocchis answer.
SELECT ?item ?itemLabel ?itemDescription
(SAMPLE(?birthplaceLabel) AS ?birthplaceLabel)
(SAMPLE(?birthdate) AS ?birthdate)
(SAMPLE(?deathplaceLabel) AS ?deathplaceLabel)
(SAMPLE(?deathdate) AS ?deathdate)
(SAMPLE(?imageLabel) AS ?imageLabel)
?article ?articleEn
WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace.
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]".
?birthplace rdfs:label ?birthplaceLabel .
?deathplace rdfs:label ?deathplaceLabel .
?image rdfs:label ?imageLabel .
?item rdfs:label ?itemLabel .
?item schema:description ?itemDescription .
}
}
GROUP BY ?item ?itemLabel ?itemDescription ?article ?articleEn

Related

Retrieve Wikidata Item Wiki url with SPARQL

I'm using this approach to retrieve the Wikipedia url for a Wikidata item for multiple languages, using Sparql:
SELECT ?item ?en ?url_en ?es WHERE {
{ ?item wdt:P31 wd:Q6256. }
UNION
{ ?item wdt:P31 wd:Q1250464. }
UNION
{ ?item wdt:P31 wd:Q3624078. }
UNION
{ ?item wdt:P31 wd:Q619610. }
UNION
{ ?item wdt:P31 wd:Q179164. }
UNION
{ ?item wdt:P31 wd:Q7270. }
?item rdfs:label ?en filter (lang(?en) = "en").
?item rdfs:label ?es filter (lang(?es) = "es").
OPTIONAL {
?url_en schema:about ?item .
?url_en schema:inLanguage "en" .
FILTER (SUBSTR(str(?url_en), 1, 25) = "https://en.wikipedia.org/")
}
OPTIONAL {
?url_es schema:about ?item .
?url_es schema:inLanguage "es" .
FILTER (SUBSTR(str(?url_en), 1, 25) = "https://es.wikipedia.org/")
}
} LIMIT 1000
I get a limited number of results despite the LIMIT value set, while when retrieving labels only:
SELECT ?item ?en ?es ?it WHERE {
{ ?item wdt:P31 wd:Q6256. }
UNION
{ ?item wdt:P31 wd:Q1250464. }
UNION
{ ?item wdt:P31 wd:Q3624078. }
UNION
{ ?item wdt:P31 wd:Q619610. }
UNION
{ ?item wdt:P31 wd:Q179164. }
UNION
{ ?item wdt:P31 wd:Q7270. }
?item rdfs:label ?en filter (lang(?en) = "en").
?item rdfs:label ?es filter (lang(?es) = "es").
?item rdfs:label ?it filter (lang(?it) = "it").
} LIMIT 1000
I get more results like the
OPTIONAL {
?url_en schema:about ?item .
?url_en schema:inLanguage "en" .
FILTER (SUBSTR(str(?url_en), 1, 25) = "https://en.wikipedia.org/")
}
is limiting the results found in some way.

I used MAX here to pick the maximum of the net worth of the billionaires, but it seems to not work in cases like, Bill Gates, Elon Musk etc

SELECT DISTINCT ?item ?itemLabel ?itemDescription (MAX(?billion) as ?billions) ?locationLabel (SAMPLE(?image) AS ?picture) ?page_titleEN
WHERE
{
?item wdt:P2218 ?worth.
OPTIONAL {?item wdt:P19 ?location .}
FILTER(?worth>1000000000).
BIND(?worth/1000000000 AS ?billion).
OPTIONAL {
?article schema:about ?item;
schema:isPartOf <https://en.wikipedia.org/>;
schema:name ?page_titleEN.
?item rdfs:label ?LabelEN.
FILTER((LANG(?LabelEN)) = "en")
}
OPTIONAL { ?item wdt:P18 ?image. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?item ?itemLabel ?itemDescription ?billions ?locationLabel ?picture ?page_titleEN
ORDER BY DESC(?billions)

How can I get the list of Wonders of the World using SPARQL from wikidata?

SELECT ?item ?itemLabel ?itemDescription ?countryLabel ?picture {
?item wdt:P31 wd:Q209287
OPTIONAL { ?item wdt:P17 ?country }
OPTIONAL { ?item wdt:P18 ?picture }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}

Optimize wikidata query to reduce time. SPARQL

I am getting timeout on below query.
Can anyone please help me to optimize it?
SELECT ?human ?humanLabel ?alternative ?humanDescription ?birth_date ?image ?occupationLabel ?nationalityLabel ?sex_or_genderLabel ?date_of_death WHERE {
?human wdt:P31 wd:Q5.
?human wdt:P18 ?image.
?human p:P569/psv:P569 ?birth_date_node.
?birth_date_node wikibase:timeValue ?birth_date.
optional {?human wdt:P106 ?occupation.}
optional {?human wdt:P27 ?nationality.}
OPTIONAL {?human wdt:P21 ?sex_or_gender.}
OPTIONAL {?human wdt:P570 ?date_of_death.}
OPTIONAL {?human skos:altLabel ?alternative.}
FILTER (year(?birth_date) >= 1981).
FILTER (year(?birth_date) < 1982).
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}
ORDER BY ?name

Get properties by QID?

I can get item and its properties by label:
SELECT distinct ?item ?itemLabel ?itemDescription
(SAMPLE(?DR) as ?DR) (SAMPLE(?article)as ?article)
WHERE {
?item wdt:P31 wd:Q5.
?item ?label "Einstein"#en
OPTIONAL{?item wdt:P569 ?DR .}
?article schema:about ?item .
?article schema:inLanguage "en" .
?article schema:isPartOf <https://en.wikipedia.org/>.
OPTIONAL{?item wdt:P570 ?RIP .}
OPTIONAL{?item wdt:P18 ?image .}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?item ?itemLabel ?itemDescription
See on Wikidata Query Services.
How can I do the same using QID instead label?
Using the URI instead of the variable ?item will get the information based on the entity Albert Einstein:
PREFIX schema: <http://schema.org/>
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
SELECT DISTINCT ?item ?itemLabel ?itemDescription (SAMPLE(?DR) AS ?DRSample) (SAMPLE(?article) AS ?articleSample)
WHERE
{ ?article schema:about ?item ;
schema:inLanguage "en" ;
schema:isPartOf <https://en.wikipedia.org/>
FILTER ( ?item = <http://www.wikidata.org/entity/Q937> )
OPTIONAL
{ ?item wdt:P569 ?DR }
OPTIONAL
{ ?item wdt:P570 ?RIP }
OPTIONAL
{ ?item wdt:P18 ?image }
SERVICE wikibase:label
{ bd:serviceParam
wikibase:language "en"
}
}
GROUP BY ?item ?itemLabel ?itemDescription
You can utilize the already known QID by using BIND:
BIND(wd:Q937 AS ?item).
...
If you already have the QID of the entity you are looking for and simply look for its properties and labels, you're better off using the Wikidata API wbgetentities module
In A. Einstein (Q937) case, that would give the following API call:
https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q937&format=json