SPARQL limit result of only one variable - sparql

I am trying to query wikidata to retrieve artworks, their material characteristics and the artistic movement they are associated with. Each resulting record can have a number of movements/materials associated with (as an artwork can be classified as belonging to two movements at the same time, or with different materials).
I would like to retrieve for each artwork only one of the movement/material associated with, as not to have duplicate lines in the results to manually remove afterwards.
How can I achieve such result using only SPARQL?
Here's my current query:
SELECT DISTINCT ?artwork ?image ?time ?creatorLabel ?movementLabel ?materialLabel WHERE {
?artwork wdt:P31 wd:Q3305213 ;
wdt:P571 ?time ;
wdt:P18 ?image .
OPTIONAL {
?artwork wdt:P170 ?creator
}
OPTIONAL {
?artwork wdt:P135 ?movement.
}
OPTIONAL {
?artwork wdt:P186 ?material.
}
FILTER(?time > "1870-01-01T00:00:00"^^xsd:dateTime)
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } # Helps get the label in your language, if not, then en language
}
LIMIT 100
I tried to use COUNT and HAVING (HAVING (COUNT(?material) < 2)) to limit the result, but with such method I get a timeout. Is there any other way?

You can use SAMPLE, which picks an arbitrary value:
SELECT DISTINCT ?artwork ?image ?time ?creatorLabel (SAMPLE(?movementLabel) AS ?movementLabel_sample) (SAMPLE(?materialLabel) AS ?materialLabel_sample)
WHERE {
{
SELECT ?artwork ?image ?time ?creatorLabel ?movementLabel ?materialLabel
WHERE {
VALUES ?artwork { wd:Q728373 wd:Q720602 } # remove this line to query all artworks
?artwork wdt:P31 wd:Q3305213 ;
wdt:P571 ?time ;
wdt:P18 ?image .
OPTIONAL { ?artwork wdt:P170 ?creator . }
OPTIONAL { ?artwork wdt:P135 ?movement . }
OPTIONAL { ?artwork wdt:P186 ?material. }
FILTER(?time > "1870-01-01T00:00:00"^^xsd:dateTime)
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } # Helps get the label in your language, if not, then en language
}
}
}
GROUP BY ?artwork ?image ?time ?creatorLabel
LIMIT 100
But if your only concern is
not to have duplicate lines in the results to manually remove afterwards
you could use GROUP_CONCAT to get one line per artwork, with multiple values per cell:
SELECT DISTINCT ?artwork ?image ?time ?creatorLabel (GROUP_CONCAT(DISTINCT ?movementLabel; separator=", ") AS ?movementLabels) (GROUP_CONCAT(DISTINCT ?materialLabel; separator=", ") AS ?materialLabels)
WHERE {
{
SELECT ?artwork ?image ?time ?creatorLabel ?movementLabel ?materialLabel
WHERE {
VALUES ?artwork { wd:Q728373 wd:Q720602 } # remove this line to query all artworks
?artwork wdt:P31 wd:Q3305213 ;
wdt:P571 ?time ;
wdt:P18 ?image .
OPTIONAL { ?artwork wdt:P170 ?creator . }
OPTIONAL { ?artwork wdt:P135 ?movement . }
OPTIONAL { ?artwork wdt:P186 ?material. }
FILTER(?time > "1870-01-01T00:00:00"^^xsd:dateTime)
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } # Helps get the label in your language, if not, then en language
}
}
}
GROUP BY ?artwork ?image ?time ?creatorLabel
LIMIT 100
(If there can be multiple images, times, or creators, you could do the same for these properties, too.)

Related

Grouping qualifiers in the output of a Wikidata SPARQL query

I am building a Wikidata SPARQL query to retrieve details about specific people. When asking for their educational details (P69 - educatedAt)- I am not sure how to craft the query such that it will collate their degrees and majors in the case that in one statement about being educated at an institution they have multiple degrees or majors (I used this other query to find people with multiple degrees from Harvard).
This is the query:
SELECT ?itemLabel (GROUP_CONCAT(DISTINCT ?altNames; SEPARATOR = ";") AS ?aliases) ?itemDesc ?genderLabel ?birthday ?placeOfBirthLabel ?image (GROUP_CONCAT(DISTINCT ?ed; SEPARATOR = "|") AS ?education) WHERE {
VALUES ?item {
wd:Q5402996
}
OPTIONAL {
?item skos:altLabel ?altNames.
FILTER((LANG(?altNames)) = "en")
}
{
OPTIONAL { ?item wdt:P21 ?gender. }
OPTIONAL { ?item wdt:P569 ?birthday. }
OPTIONAL { ?item wdt:P19 ?placeOfBirth. }
OPTIONAL { ?item wdt:P18 ?image. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
{
OPTIONAL {
?item p:P69 ?statement.
?statement (ps:P69/rdfs:label) ?eduLabel.
FILTER((LANG(?eduLabel)) = "en")
OPTIONAL { ?statement pq:P580 ?edStart. }
OPTIONAL { ?statement pq:P582 ?edEnd. }
OPTIONAL {
?statement (pq:P512/rdfs:label) ?edDegrees.
FILTER((LANG(?edDegrees)) = "en")
}
OPTIONAL {
?statement (pq:P812/rdfs:label) ?edMajors.
FILTER((LANG(?edMajors)) = "en")
}
BIND(IF(BOUND(?edStart), CONCAT("::start:", STR(YEAR(?edStart))), "") AS ?edStartText)
BIND(IF(BOUND(?edEnd), CONCAT("::end:", STR(YEAR(?edEnd))), "") AS ?edEndText)
BIND(IF(BOUND(?edDegrees), CONCAT("::degrees:", STR(?edDegrees)), "") AS ?edDegreeText)
BIND(IF(BOUND(?edMajors), CONCAT("::majors:", STR(?edMajors)), "") AS ?edMajorText)
BIND(CONCAT(?eduLabel, ?edStartText, ?edEndText, ?edDegreeText, ?edMajorText) AS ?ed)
}
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?item schema:description ?itemDesc.
}
}
GROUP BY ?itemLabel ?itemDesc ?genderLabel ?birthday ?image ?placeOfBirthLabel
For the education result I get:
Harvard University::end:1980::degrees:Master of Arts::majors:astronomy|
Harvard University::end:1980::degrees:Doctor of Philosophy::majors:astronomy|
University of Rochester::end:1976::degrees:Bachelor of Arts|
University of Rochester::end:1976::degrees:Bachelor of Science
I would like to get:
Harvard University::end:1980::degrees:Master of Arts;Doctor of Philosophy::majors:astronomy|
University of Rochester::end:1976::degrees:Bachelor of Arts:Bachelor of Science|
How can I group the degrees in line like this in my query?
Or even better have them be nested in the JSON output rather than using delimiters?

Missing data fields in Wikidata Query Service results despite items having these data

I am trying to retrieve some municipalities from Wikidata using SPARQL but several items returned have much of their fields empty despite these items having these data. I do not understand what is wrong with the query below (link to WQS). For example, the municipality Almelo has its coordinates (P625), and parent place (P131) erroneously missing in the results:
SELECT ?mun ?munLabel ?coords ?parentPlace ?area WHERE {
?mun p:P31 ?instanceOf # Get statement because we need this later
.
?instanceOf ps:P31/wdt:279* wd:Q2039348.
OPTIONAL {
?mun wdt:P625 ?coords;
wdt:P131 ?parentPlace;
wdt:P2046 ?area
.
}
MINUS { ?instanceOf pq:P582 ?endTime. } # Don't show municipalities that have an end time
service wikibase:label { bd:serviceParam wikibase:language "en". }
} ORDER BY ?munLabel
This is because you are using one OPTIONAL statement instead of 3 separately.
In this case, Almelo doesn't have an 'area', wdt:P2046, so the whole OPTIONAL statement evaluates as false, and so it binds no variables.
The following query works:
Notice that we have 3 distinct optional statements, so that they may fail to bind variables independently of each other.
SELECT ?mun ?munLabel ?coords ?parentPlace ?area WHERE {
?mun p:P31 ?instanceOf # Get statement because we need this later
.
?instanceOf ps:P31/wdt:279* wd:Q2039348.
OPTIONAL {?mun wdt:P625 ?coords }
OPTIONAL {?mun wdt:P131 ?parentPlace }
OPTIONAL {?mun wdt:P2046 ?area }
MINUS { ?instanceOf pq:P582 ?endTime. } # Don't show municipalities that have an end time
service wikibase:label { bd:serviceParam wikibase:language "en". }
} ORDER BY ?munLabel
You have to declare OPTIONAL each statement independently:
OPTIONAL { ?mun wdt:P625 ?coords . }
OPTIONAL { ?mun wdt:P131 ?parentPlace . }
OPTIONAL { ?mun wdt:P2046 ?area . }
Otherwise, if one of them is missing, then the whole OPTIONAL block is ignored.
See also Multiple Optional Graph Patterns.

Filter data with subquery in SPARQL

I'm trying to get some data from Wikidata. I've got a simple query which fetches information about universities:
SELECT ?item ?itemLabel ?site WHERE {
?item (p:P31/ps:P31/(wdt:P279*)) wd:Q38723;
wdt:P17 ?country;
wdt:P856 ?site.
SERVICE wikibase:label { bd:serviceParam wikibase:language "ru,en". }
}
And another query, which gets list of members of the CIS:
SELECT DISTINCT ?state WHERE {
?state wdt:P31/wdt:P279* wd:Q3624078;
p:P463 ?memberOfStatement.
?memberOfStatement a wikibase:BestRank;
ps:P463 wd:Q7779
MINUS { ?memberOfStatement pq:P582 ?endTime. }
MINUS { ?state wdt:P576|wdt:P582 ?end. }
}
Both work fine. But now I want to combine them to get list of universities which are located in the CIS. I try to do it like shown in the answer to this question:
SELECT ?item ?itemLabel ?site WHERE {
?item (p:P31/ps:P31/(wdt:P279*)) wd:Q38723;
wdt:P17 ?country;
wdt:P856 ?site.
FILTER(EXISTS {
SELECT DISTINCT ?state WHERE {
{
?state (wdt:P31/(wdt:P279*)) wd:Q3624078;
p:P463 ?memberOfStatement.
?memberOfStatement rdf:type wikibase:BestRank;
ps:P463 wd:Q7779.
MINUS { ?memberOfStatement pq:P582 ?endTime. }
MINUS { ?state (wdt:P576|wdt:P582) ?end. }
}
FILTER(?country = ?state)
}
})
SERVICE wikibase:label { bd:serviceParam wikibase:language "ru,en". }
}
But, for some reason, I get zero results. What am I doing wrong here?

Path matching inside a VALUES clause

I'm trying to perform path matching inside a VALUES clause in sparql in order to match all instances and subclasses of both battles and sieges in wikidata. The following request repeatedly times out.
SELECT DISTINCT ?battle ?battleLabel WHERE {
{
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
VALUES ?type {wd:Q178561 wd:Q188055} ?battle (wdt:P31/wdt:P279*) ?type .
?battle rdfs:label ?queryByTitle.
FILTER(REGEX(?queryByTitle, "saratoga", "i"))
}
}
It seems that VALUES, esp. in conjunction with /, confuses the Blazegraph's query optimizer in that case.
Use UNION instead of VALUES:
SELECT DISTINCT ?battle ?battleLabel WHERE {
{ ?battle wdt:P31/wdt:P279* wd:Q178561 }
UNION
{ ?battle wdt:P31/wdt:P279* wd:Q188055 }
?battle rdfs:label ?queryByTitle.
FILTER(REGEX(?queryByTitle, "saratoga", "i"))
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
Alternatively, disable the optimizer and specify explicit order:
SELECT DISTINCT ?battle ?battleLabel WHERE {
hint:Query hint:optimizer "None" .
VALUES ?type {wd:Q178561 wd:Q188055}
?subtype wdt:P279* ?type .
?battle wdt:P31 ?subtype .
?battle rdfs:label ?queryByTitle.
FILTER(REGEX(?queryByTitle, "saratoga", "i"))
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}

How to get all properties for only a specific category in Wikidata?

Is there an RDF data/other format that allow me to get all the properties that can exist in a category e.g. Person, then I should be returned properties like sex, date of birth.
How to query this information at https://query.wikidata.org/ ?
What I want is this https://www.wikidata.org/wiki/Wikidata:List_of_properties/Summary_table
But is there a better format for this? I want to access programmatically.
UPDATE
This query is too heavy, causes timeout.
SELECT ?p ?attName WHERE {
?q wdt:P31 wd:Q5.
?q ?p ?statement.
?realAtt wikibase:claim ?p.
?realAtt rdfs:label ?attName.
FILTER(((LANG(?attName)) = "en") || ((LANG(?attName)) = ""))
}
GROUP BY ?p ?attName
I must specify the entity, e.g. to Barrack Obama then it works, but this does not give me the all possible properties.
SELECT ?p ?attName WHERE {
BIND(wd:Q76 AS ?q)
?q wdt:P31 wd:Q5.
?q ?p ?statement.
?realAtt wikibase:claim ?p.
?realAtt rdfs:label ?attName.
FILTER(((LANG(?attName)) = "en") || ((LANG(?attName)) = ""))
}
GROUP BY ?p ?attName
1
The page you have linked to is created by a bot. Contact the BetaBot operator, if you need to know how the bot works.
2
Perhaps the bot relies on the wd:P1963 property:
SELECT ?property ?propertyLabel {
VALUES (?class) {(wd:Q5)}
?class wdt:P1963 ?property
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
} ORDER BY ASC(xsd:integer(strafter(str(?property), concat(str(wd:), "P"))))
The above query returns 49 results.
3
I'd suggest you rely on type constraints from property pages:
SELECT ?property ?propertyLabel {
VALUES (?class) {(wd:Q5)}
?property a wikibase:Property .
?property p:P2302 [ ps:P2302 wd:Q21503250 ;
pq:P2309 wd:Q21503252 ;
pq:P2308 ?class ] .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
} ORDER BY ASC(xsd:integer(strafter(str(?property), concat(str(wd:), "P"))))
The above query returns 700 results.
4
The first query from your question works fine for relatively small classes, e. g. wd:Q6256 ('country'). On the public endpoint, it is not possible to make the query work for large classes.
However, you could split the query into small parts. In Python:
from wdqs import Client
from time import sleep
client = Client()
result = client.query("SELECT (count(?p) AS ?c) {?p a wikibase:Property}")
count = int(result[0]["c"])
offset = 0
limit = 50
possible = []
while offset <= count:
props = client.query("""
SELECT ?property WHERE {
hint:Query hint:optimizer "None" .
{
SELECT ?property {
?property a wikibase:Property .
} ORDER BY ?property OFFSET %s LIMIT %s
}
?property wikibase:directClaim ?wdt.
FILTER EXISTS {
?human ?wdt [] ; wdt:P31 wd:Q5 .
hint:Group hint:maxParallel 501 .
}
hint:Query hint:filterExists "SubQueryLimitOne" .
# SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
""" % (offset, limit))
for prop in props:
possible.append(prop['property'])
offset += limit
print (len(possible), min(offset, count))
sleep(0.25)
The last line of the output is:
2156 5154