this seems to be relatively simple, to query the date of birth of Barack Obama, why does it take so long that it timeout at https://query.wikidata.org/ ? There are only about 3 entity that has label Barack Obama and also about 2 for "date of birth"
SELECT DISTINCT ?ent ?wdtProperty ?val ?valLabel WHERE {
?ent rdfs:label|skos:altLabel "Barack Obama"#en.
?wdProperty1 rdfs:label|skos:altLabel "date of birth"#en;
wikibase:directClaim ?wdtProperty1.
?ent ?wdtProperty1 ?val .
} LIMIT 10
The correct answer to this is
SELECT DISTINCT ?ent ?wdtProperty1 ?val
WITH
{
SELECT ?wdtProperty1
WHERE
{
[] rdfs:label|skos:altLabel "date of birth"#en;
wikibase:directClaim ?wdtProperty1.
}
} AS %get_predicate
WITH
{
SELECT ?ent
WHERE
{
?ent rdfs:label|skos:altLabel "Barack Obama"#en.
}
} AS %get_subject
WHERE
{
INCLUDE %get_predicate
INCLUDE %get_subject
?ent ?wdtProperty1 ?val .
}
Thanks to https://www.wikidata.org/wiki/Wikidata:Request_a_query#Slow_query_on_label
Related
I am trying to query wikidata to retrieve artworks, their material characteristics and the artistic movement they are associated with. Each resulting record can have a number of movements/materials associated with (as an artwork can be classified as belonging to two movements at the same time, or with different materials).
I would like to retrieve for each artwork only one of the movement/material associated with, as not to have duplicate lines in the results to manually remove afterwards.
How can I achieve such result using only SPARQL?
Here's my current query:
SELECT DISTINCT ?artwork ?image ?time ?creatorLabel ?movementLabel ?materialLabel WHERE {
?artwork wdt:P31 wd:Q3305213 ;
wdt:P571 ?time ;
wdt:P18 ?image .
OPTIONAL {
?artwork wdt:P170 ?creator
}
OPTIONAL {
?artwork wdt:P135 ?movement.
}
OPTIONAL {
?artwork wdt:P186 ?material.
}
FILTER(?time > "1870-01-01T00:00:00"^^xsd:dateTime)
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } # Helps get the label in your language, if not, then en language
}
LIMIT 100
I tried to use COUNT and HAVING (HAVING (COUNT(?material) < 2)) to limit the result, but with such method I get a timeout. Is there any other way?
You can use SAMPLE, which picks an arbitrary value:
SELECT DISTINCT ?artwork ?image ?time ?creatorLabel (SAMPLE(?movementLabel) AS ?movementLabel_sample) (SAMPLE(?materialLabel) AS ?materialLabel_sample)
WHERE {
{
SELECT ?artwork ?image ?time ?creatorLabel ?movementLabel ?materialLabel
WHERE {
VALUES ?artwork { wd:Q728373 wd:Q720602 } # remove this line to query all artworks
?artwork wdt:P31 wd:Q3305213 ;
wdt:P571 ?time ;
wdt:P18 ?image .
OPTIONAL { ?artwork wdt:P170 ?creator . }
OPTIONAL { ?artwork wdt:P135 ?movement . }
OPTIONAL { ?artwork wdt:P186 ?material. }
FILTER(?time > "1870-01-01T00:00:00"^^xsd:dateTime)
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } # Helps get the label in your language, if not, then en language
}
}
}
GROUP BY ?artwork ?image ?time ?creatorLabel
LIMIT 100
But if your only concern is
not to have duplicate lines in the results to manually remove afterwards
you could use GROUP_CONCAT to get one line per artwork, with multiple values per cell:
SELECT DISTINCT ?artwork ?image ?time ?creatorLabel (GROUP_CONCAT(DISTINCT ?movementLabel; separator=", ") AS ?movementLabels) (GROUP_CONCAT(DISTINCT ?materialLabel; separator=", ") AS ?materialLabels)
WHERE {
{
SELECT ?artwork ?image ?time ?creatorLabel ?movementLabel ?materialLabel
WHERE {
VALUES ?artwork { wd:Q728373 wd:Q720602 } # remove this line to query all artworks
?artwork wdt:P31 wd:Q3305213 ;
wdt:P571 ?time ;
wdt:P18 ?image .
OPTIONAL { ?artwork wdt:P170 ?creator . }
OPTIONAL { ?artwork wdt:P135 ?movement . }
OPTIONAL { ?artwork wdt:P186 ?material. }
FILTER(?time > "1870-01-01T00:00:00"^^xsd:dateTime)
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } # Helps get the label in your language, if not, then en language
}
}
}
GROUP BY ?artwork ?image ?time ?creatorLabel
LIMIT 100
(If there can be multiple images, times, or creators, you could do the same for these properties, too.)
I'm trying to get info e.g. for Belgium for aerodromes or stations with having the label of all instances for each item (e.g. Brussels South Charleroi Airpor can be an airport, business, etc..)
I've written two separate queries but struggling to join them
SELECT DISTINCT ?poi ?itemLabel_nl ?itemLabel_fr ?itemLabel_en ?itemLabel_wa ?itemLabel_vls
WHERE {
?poi p:P17 ?statement0.
?statement0 (ps:P17/(wdt:P279*)) wd:Q31.
{
?poi p:P31 ?statement1.
?statement1 (ps:P31/(wdt:P279*)) wd:Q62447.
}
UNION
{
?poi p:P31 ?statement2.
?statement2 (ps:P31/(wdt:P279*)) wd:Q12819564.
}
OPTIONAL { ?poi rdfs:label ?itemLabel_nl filter (lang(?itemLabel_nl) = "nl") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_fr filter (lang(?itemLabel_fr) = "fr") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_en filter (lang(?itemLabel_en) = "en") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_wa filter (lang(?itemLabel_wa) = "wa") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_vls filter (lang(?itemLabel_vls) = "vls") . }
}
and the second part to get labels for the exact item (e.g. Brussels South Charleroi Airpor):
SELECT ?instance ?instanceLabel
WHERE
{
wd:Q1431012 p:P31 ?statement.
?statement ps:P31 ?instance.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY ?instanceLabel
any thought here? :)
I'm trying to get some data from Wikidata. I've got a simple query which fetches information about universities:
SELECT ?item ?itemLabel ?site WHERE {
?item (p:P31/ps:P31/(wdt:P279*)) wd:Q38723;
wdt:P17 ?country;
wdt:P856 ?site.
SERVICE wikibase:label { bd:serviceParam wikibase:language "ru,en". }
}
And another query, which gets list of members of the CIS:
SELECT DISTINCT ?state WHERE {
?state wdt:P31/wdt:P279* wd:Q3624078;
p:P463 ?memberOfStatement.
?memberOfStatement a wikibase:BestRank;
ps:P463 wd:Q7779
MINUS { ?memberOfStatement pq:P582 ?endTime. }
MINUS { ?state wdt:P576|wdt:P582 ?end. }
}
Both work fine. But now I want to combine them to get list of universities which are located in the CIS. I try to do it like shown in the answer to this question:
SELECT ?item ?itemLabel ?site WHERE {
?item (p:P31/ps:P31/(wdt:P279*)) wd:Q38723;
wdt:P17 ?country;
wdt:P856 ?site.
FILTER(EXISTS {
SELECT DISTINCT ?state WHERE {
{
?state (wdt:P31/(wdt:P279*)) wd:Q3624078;
p:P463 ?memberOfStatement.
?memberOfStatement rdf:type wikibase:BestRank;
ps:P463 wd:Q7779.
MINUS { ?memberOfStatement pq:P582 ?endTime. }
MINUS { ?state (wdt:P576|wdt:P582) ?end. }
}
FILTER(?country = ?state)
}
})
SERVICE wikibase:label { bd:serviceParam wikibase:language "ru,en". }
}
But, for some reason, I get zero results. What am I doing wrong here?
On Wikidata,
I am trying to filter events of certain time periods, but I get errors, or count of 0, or I get the same count no matter how I change the time period.
What am I doing wrong?
SELECT (count(distinct ?event) as ?cnt)
WHERE
{
?event wdt:P31/wdt:P279* wd:Q171558.
OPTIONAL {
?event rdfs:label ?eventLabel.
FILTER ( ?date >= "2000-01-01"^^xsd:date && ?date <= "2018-12-31"^^xsd:date )
}.
}
I also see an example from the query page, but how do I count? What should I do to the first line of the query to convert it into Count without causing an error? :
SELECT ?event ?eventLabel ?date
WHERE
{
?event wdt:P31/wdt:P279* wd:Q1190554.
OPTIONAL { ?event wdt:P585 ?date. }
OPTIONAL { ?event wdt:P580 ?date. }
FILTER(BOUND(?date) && DATATYPE(?date) = xsd:dateTime).
BIND(NOW() - ?date AS ?distance).
FILTER(0 <= ?distance && ?distance < 31).
OPTIONAL {
?event rdfs:label ?eventLabel.
FILTER(LANG(?eventLabel) = "en").
}
}
I want to find the number of times that each subject returned by this query is the object of another triple.
I can use the following example to select everything I want without the count that I want as well.
SELECT DISTINCT
?subject
?displayId
?version
?name
?description
?type
WHERE {
{ ?subject a sbol2:ComponentDefinition } UNION
{ ?subject a sbol2:ModuleDefinition } UNION
{ ?subject a sbol2:Collection } UNION
{ ?subject a sbol2:Sequence } UNION
{ ?subject a sbol2:Model } .
?subject a ?type
OPTIONAL { ?subject sbol2:displayId ?displayId . }
OPTIONAL { ?subject sbol2:version ?version . }
OPTIONAL { ?subject dcterms:title ?name . }
OPTIONAL { ?subject dcterms:description ?description . }
}
LIMIT 50
I can add in the following criteria to limit my query to things that are only related to an example object, in this case, the object with URI <http://localhost:7777/public/test/U49845/1>.
{ ?use sbol2:definition <http://localhost:7777/public/test/U49845/1> .
{ ?subject sbol2:module ?use } UNION
{ ?subject sbol2:component ?use } UNION
{ ?subject sbol2:functionalComponent ?use }
} UNION
{ ?subject sbol2:model <http://localhost:7777/public/test/U49845/1> } UNION
{ ?subject sbol2:sequence <http://localhost:7777/public/test/U49845/1> } .
Is there a way to get the number of items that the second query would return for every ?subject in the first query?
General Cleanup
Alternative objects or subjects
First, note that this union block
{ ?subject a sbol2:ComponentDefinition } UNION
{ ?subject a sbol2:ModuleDefinition } UNION
{ ?subject a sbol2:Collection } UNION
{ ?subject a sbol2:Sequence } UNION
{ ?subject a sbol2:Model } .
can be much shorter using values:
values ?type { sbol2:ComponentDefinition sbol2:ModuleDefinition
sbol2:Collection sbol2:Sequence sbol2:Model }
?subject a ?type
Alternative properties
Similarly, if you've got a bunch of alternative properties, just use a property path. This union:
{ ?subject sbol2:module ?use } UNION
{ ?subject sbol2:component ?use } UNION
{ ?subject sbol2:functionalComponent ?use }
is equivalent to
?subject sbol2:module|sbol2:component|sbol2:functionalComponent ?use
The specific issue
So, turning the query you came up with into legal SPARQL by adding the required group by, you end up with:
SELECT DISTINCT
?subject ?displayId ?version ?name ?description ?type
(COUNT(DISTINCT ?user) as ?uses)
WHERE {
#-- Find users of the subject.
?user ((sbol2:module|sbol2:component|sbol2:functionalComponent)
/sbol2:definition)|sbol2:model|sbol2:sequence ?subject .
#-- Find subjects
values ?stype { sbol2:ComponentDefinition sbol2:ModuleDefinition
sbol2:Collection sbol2:Sequence sbol2:Model }
?subject a ?stype .
#-- Find other types and properties of the ?subject
?subject a ?type .
OPTIONAL { ?subject sbol2:displayId ?displayId . }
OPTIONAL { ?subject sbol2:version ?version . }
OPTIONAL { ?subject dcterms:title ?name . }
OPTIONAL { ?subject dcterms:description ?description . }
}
GROUP BY ?subject ?displayId ?version ?name ?description ?type
ORDER BY DESC(?uses)
All right, figured this out as I asked the question. For anyone curious, this was how I modified the query:
SELECT DISTINCT
?subject
?displayId
?version
?name
?description
?type
(COUNT(DISTINCT ?user) as ?uses)
WHERE {
{ ?use sbol2:definition ?subject .
{ ?user sbol2:module ?use } UNION
{ ?user sbol2:component ?use } UNION
{ ?user sbol2:functionalComponent ?use }
} UNION
{ ?user sbol2:model ?subject} UNION
{ ?user sbol2:sequence ?subject } .
{ ?subject a sbol2:ComponentDefinition } UNION
{ ?subject a sbol2:ModuleDefinition } UNION
{ ?subject a sbol2:Collection } UNION
{ ?subject a sbol2:Sequence } UNION
{ ?subject a sbol2:Model } .
?subject a ?type
OPTIONAL { ?subject sbol2:displayId ?displayId . }
OPTIONAL { ?subject sbol2:version ?version . }
OPTIONAL { ?subject dcterms:title ?name . }
OPTIONAL { ?subject dcterms:description ?description . }
} ORDER BY DESC(?uses)
LIMIT 50