Retrieve Wikidata Item Wiki url with SPARQL - sparql

I'm using this approach to retrieve the Wikipedia url for a Wikidata item for multiple languages, using Sparql:
SELECT ?item ?en ?url_en ?es WHERE {
{ ?item wdt:P31 wd:Q6256. }
UNION
{ ?item wdt:P31 wd:Q1250464. }
UNION
{ ?item wdt:P31 wd:Q3624078. }
UNION
{ ?item wdt:P31 wd:Q619610. }
UNION
{ ?item wdt:P31 wd:Q179164. }
UNION
{ ?item wdt:P31 wd:Q7270. }
?item rdfs:label ?en filter (lang(?en) = "en").
?item rdfs:label ?es filter (lang(?es) = "es").
OPTIONAL {
?url_en schema:about ?item .
?url_en schema:inLanguage "en" .
FILTER (SUBSTR(str(?url_en), 1, 25) = "https://en.wikipedia.org/")
}
OPTIONAL {
?url_es schema:about ?item .
?url_es schema:inLanguage "es" .
FILTER (SUBSTR(str(?url_en), 1, 25) = "https://es.wikipedia.org/")
}
} LIMIT 1000
I get a limited number of results despite the LIMIT value set, while when retrieving labels only:
SELECT ?item ?en ?es ?it WHERE {
{ ?item wdt:P31 wd:Q6256. }
UNION
{ ?item wdt:P31 wd:Q1250464. }
UNION
{ ?item wdt:P31 wd:Q3624078. }
UNION
{ ?item wdt:P31 wd:Q619610. }
UNION
{ ?item wdt:P31 wd:Q179164. }
UNION
{ ?item wdt:P31 wd:Q7270. }
?item rdfs:label ?en filter (lang(?en) = "en").
?item rdfs:label ?es filter (lang(?es) = "es").
?item rdfs:label ?it filter (lang(?it) = "it").
} LIMIT 1000
I get more results like the
OPTIONAL {
?url_en schema:about ?item .
?url_en schema:inLanguage "en" .
FILTER (SUBSTR(str(?url_en), 1, 25) = "https://en.wikipedia.org/")
}
is limiting the results found in some way.

Related

How do I create a Wikidata SPARQL query to return the top 10 US companies and their directors based on revenue?

Here is the query I am working with:
SELECT DISTINCT ?item ?itemLabel ?rev ?date (group_concat(distinct ?directorLabel ; separator = ", ") as ?directors)
WHERE {
?item wdt:P31/wdt:P279* wd:Q4830453 ;
wdt:P17 wd:Q30 .
?item p:P2139 ?revSt .
?revSt pq:P585 ?date.
FILTER ( YEAR(?date) = 2020 ).
?revSt ps:P2139 ?rev .
?item p:P3320 ?relationship.
?relationship ps:P3320 ?director.
#Filter Director by no end date
FILTER NOT EXISTS {?relationship pq:P582 ?end.}
#Group directors
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
?director rdfs:label ?directorLabel .
?item rdfs:label ?itemLabel .
}
}
GROUP BY ?item ?itemLabel
ORDER BY DESC(?rev)
LIMIT 10
This query is a bad aggregate, if I add more variables to the GROUP BY clause it times out. Ultimately I would like to aggregate the wdt ids for all the directors too.
SELECT DISTINCT
?item ?itemLabel ?rev ?date
(group_concat(distinct ?directorLabel ; separator = ", ") as ?directors)
WHERE {
?item wdt:P31/wdt:P279* wd:Q4830453 ;
wdt:P17 wd:Q30 .
?item p:P2139 ?revSt .
?revSt pq:P585 ?date.
hint:Prior hint:rangeSafe "true" .
FILTER ( YEAR(?date) = 2020 ).
?revSt ps:P2139 ?rev .
?item p:P3320 ?relationship.
?relationship ps:P3320 ?director.
FILTER NOT EXISTS {?relationship pq:P582 ?end.}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
?director rdfs:label ?directorLabel .
?item rdfs:label ?itemLabel .
}
}
GROUP BY ?item ?itemLabel ?rev ?date
ORDER BY DESC(?rev)
LIMIT 10

Wikidata/SPARQL, get distinct items (object id) in result

When I run the following query I get multiple rows of "Paul Gauguin" since there are multiple informations about his place/time of death, the same could of course happen on all other parameters as well.
SELECT DISTINCT ?item ?itemLabel ?itemDescription ?birthplaceLabel ?birthdate ?deathplaceLabel ?deathdate ?imageLabel ?article ?articleEn
{
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace.
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]". }
}
Is there a way of only recive one of the same object id, and don't care about if there are other "versions" of the object.
I have tried a bit with nested queries but I can't get it to work. Are there som other ways?
This query will work:
SELECT DISTINCT ?item ?itemLabel ?itemDescription
(SAMPLE(?birthplaceLabel) AS ?birthplaceLabel)
(SAMPLE(?birthdate) AS ?birthdate)
(SAMPLE(?deathplaceLabel) AS ?deathplaceLabel)
(SAMPLE(?deathdate) AS ?deathdate)
(SAMPLE(STR(?image)) AS ?image)
?article ?articleEn
WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace .
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label { ?birthplace rdfs:label ?birthplaceLabel .
?deathplace rdfs:label ?deathplaceLabel .
?item rdfs:label ?itemLabel ;
schema:description ?itemDescription .
bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]". }
}
GROUP BY ?item ?itemLabel ?itemDescription ?article ?articleEn
As you can see, we use the SAMPLE function and group by all the variables that aren't sampled.
You could also replace SAMPLE with GROUP_CONCAT if you wish to see all the possible values taken by a property.
E.g. ... (GROUP_CONCAT(?deathdate; SEPARATOR="; ") AS ?deathdates) ...
Note that GROUP_CONCAT takes strings as arguments.
Found the solution with a bit of modyfing of Valerio Cocchis answer.
SELECT ?item ?itemLabel ?itemDescription
(SAMPLE(?birthplaceLabel) AS ?birthplaceLabel)
(SAMPLE(?birthdate) AS ?birthdate)
(SAMPLE(?deathplaceLabel) AS ?deathplaceLabel)
(SAMPLE(?deathdate) AS ?deathdate)
(SAMPLE(?imageLabel) AS ?imageLabel)
?article ?articleEn
WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace.
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]".
?birthplace rdfs:label ?birthplaceLabel .
?deathplace rdfs:label ?deathplaceLabel .
?image rdfs:label ?imageLabel .
?item rdfs:label ?itemLabel .
?item schema:description ?itemDescription .
}
}
GROUP BY ?item ?itemLabel ?itemDescription ?article ?articleEn

I used MAX here to pick the maximum of the net worth of the billionaires, but it seems to not work in cases like, Bill Gates, Elon Musk etc

SELECT DISTINCT ?item ?itemLabel ?itemDescription (MAX(?billion) as ?billions) ?locationLabel (SAMPLE(?image) AS ?picture) ?page_titleEN
WHERE
{
?item wdt:P2218 ?worth.
OPTIONAL {?item wdt:P19 ?location .}
FILTER(?worth>1000000000).
BIND(?worth/1000000000 AS ?billion).
OPTIONAL {
?article schema:about ?item;
schema:isPartOf <https://en.wikipedia.org/>;
schema:name ?page_titleEN.
?item rdfs:label ?LabelEN.
FILTER((LANG(?LabelEN)) = "en")
}
OPTIONAL { ?item wdt:P18 ?image. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?item ?itemLabel ?itemDescription ?billions ?locationLabel ?picture ?page_titleEN
ORDER BY DESC(?billions)

How can I get the list of Wonders of the World using SPARQL from wikidata?

SELECT ?item ?itemLabel ?itemDescription ?countryLabel ?picture {
?item wdt:P31 wd:Q209287
OPTIONAL { ?item wdt:P17 ?country }
OPTIONAL { ?item wdt:P18 ?picture }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}

Get properties by QID?

I can get item and its properties by label:
SELECT distinct ?item ?itemLabel ?itemDescription
(SAMPLE(?DR) as ?DR) (SAMPLE(?article)as ?article)
WHERE {
?item wdt:P31 wd:Q5.
?item ?label "Einstein"#en
OPTIONAL{?item wdt:P569 ?DR .}
?article schema:about ?item .
?article schema:inLanguage "en" .
?article schema:isPartOf <https://en.wikipedia.org/>.
OPTIONAL{?item wdt:P570 ?RIP .}
OPTIONAL{?item wdt:P18 ?image .}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?item ?itemLabel ?itemDescription
See on Wikidata Query Services.
How can I do the same using QID instead label?
Using the URI instead of the variable ?item will get the information based on the entity Albert Einstein:
PREFIX schema: <http://schema.org/>
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
SELECT DISTINCT ?item ?itemLabel ?itemDescription (SAMPLE(?DR) AS ?DRSample) (SAMPLE(?article) AS ?articleSample)
WHERE
{ ?article schema:about ?item ;
schema:inLanguage "en" ;
schema:isPartOf <https://en.wikipedia.org/>
FILTER ( ?item = <http://www.wikidata.org/entity/Q937> )
OPTIONAL
{ ?item wdt:P569 ?DR }
OPTIONAL
{ ?item wdt:P570 ?RIP }
OPTIONAL
{ ?item wdt:P18 ?image }
SERVICE wikibase:label
{ bd:serviceParam
wikibase:language "en"
}
}
GROUP BY ?item ?itemLabel ?itemDescription
You can utilize the already known QID by using BIND:
BIND(wd:Q937 AS ?item).
...
If you already have the QID of the entity you are looking for and simply look for its properties and labels, you're better off using the Wikidata API wbgetentities module
In A. Einstein (Q937) case, that would give the following API call:
https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q937&format=json