query in Wikidata: how to retrieve the labels for instances - sparql

I'm trying to get info e.g. for Belgium for aerodromes or stations with having the label of all instances for each item (e.g. Brussels South Charleroi Airpor can be an airport, business, etc..)
I've written two separate queries but struggling to join them
SELECT DISTINCT ?poi ?itemLabel_nl ?itemLabel_fr ?itemLabel_en ?itemLabel_wa ?itemLabel_vls
WHERE {
?poi p:P17 ?statement0.
?statement0 (ps:P17/(wdt:P279*)) wd:Q31.
{
?poi p:P31 ?statement1.
?statement1 (ps:P31/(wdt:P279*)) wd:Q62447.
}
UNION
{
?poi p:P31 ?statement2.
?statement2 (ps:P31/(wdt:P279*)) wd:Q12819564.
}
OPTIONAL { ?poi rdfs:label ?itemLabel_nl filter (lang(?itemLabel_nl) = "nl") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_fr filter (lang(?itemLabel_fr) = "fr") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_en filter (lang(?itemLabel_en) = "en") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_wa filter (lang(?itemLabel_wa) = "wa") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_vls filter (lang(?itemLabel_vls) = "vls") . }
}
and the second part to get labels for the exact item (e.g. Brussels South Charleroi Airpor):
SELECT ?instance ?instanceLabel
WHERE
{
wd:Q1431012 p:P31 ?statement.
?statement ps:P31 ?instance.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY ?instanceLabel
any thought here? :)

Related

Grouping qualifiers in the output of a Wikidata SPARQL query

I am building a Wikidata SPARQL query to retrieve details about specific people. When asking for their educational details (P69 - educatedAt)- I am not sure how to craft the query such that it will collate their degrees and majors in the case that in one statement about being educated at an institution they have multiple degrees or majors (I used this other query to find people with multiple degrees from Harvard).
This is the query:
SELECT ?itemLabel (GROUP_CONCAT(DISTINCT ?altNames; SEPARATOR = ";") AS ?aliases) ?itemDesc ?genderLabel ?birthday ?placeOfBirthLabel ?image (GROUP_CONCAT(DISTINCT ?ed; SEPARATOR = "|") AS ?education) WHERE {
VALUES ?item {
wd:Q5402996
}
OPTIONAL {
?item skos:altLabel ?altNames.
FILTER((LANG(?altNames)) = "en")
}
{
OPTIONAL { ?item wdt:P21 ?gender. }
OPTIONAL { ?item wdt:P569 ?birthday. }
OPTIONAL { ?item wdt:P19 ?placeOfBirth. }
OPTIONAL { ?item wdt:P18 ?image. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
{
OPTIONAL {
?item p:P69 ?statement.
?statement (ps:P69/rdfs:label) ?eduLabel.
FILTER((LANG(?eduLabel)) = "en")
OPTIONAL { ?statement pq:P580 ?edStart. }
OPTIONAL { ?statement pq:P582 ?edEnd. }
OPTIONAL {
?statement (pq:P512/rdfs:label) ?edDegrees.
FILTER((LANG(?edDegrees)) = "en")
}
OPTIONAL {
?statement (pq:P812/rdfs:label) ?edMajors.
FILTER((LANG(?edMajors)) = "en")
}
BIND(IF(BOUND(?edStart), CONCAT("::start:", STR(YEAR(?edStart))), "") AS ?edStartText)
BIND(IF(BOUND(?edEnd), CONCAT("::end:", STR(YEAR(?edEnd))), "") AS ?edEndText)
BIND(IF(BOUND(?edDegrees), CONCAT("::degrees:", STR(?edDegrees)), "") AS ?edDegreeText)
BIND(IF(BOUND(?edMajors), CONCAT("::majors:", STR(?edMajors)), "") AS ?edMajorText)
BIND(CONCAT(?eduLabel, ?edStartText, ?edEndText, ?edDegreeText, ?edMajorText) AS ?ed)
}
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?item schema:description ?itemDesc.
}
}
GROUP BY ?itemLabel ?itemDesc ?genderLabel ?birthday ?image ?placeOfBirthLabel
For the education result I get:
Harvard University::end:1980::degrees:Master of Arts::majors:astronomy|
Harvard University::end:1980::degrees:Doctor of Philosophy::majors:astronomy|
University of Rochester::end:1976::degrees:Bachelor of Arts|
University of Rochester::end:1976::degrees:Bachelor of Science
I would like to get:
Harvard University::end:1980::degrees:Master of Arts;Doctor of Philosophy::majors:astronomy|
University of Rochester::end:1976::degrees:Bachelor of Arts:Bachelor of Science|
How can I group the degrees in line like this in my query?
Or even better have them be nested in the JSON output rather than using delimiters?

Filter data with subquery in SPARQL

I'm trying to get some data from Wikidata. I've got a simple query which fetches information about universities:
SELECT ?item ?itemLabel ?site WHERE {
?item (p:P31/ps:P31/(wdt:P279*)) wd:Q38723;
wdt:P17 ?country;
wdt:P856 ?site.
SERVICE wikibase:label { bd:serviceParam wikibase:language "ru,en". }
}
And another query, which gets list of members of the CIS:
SELECT DISTINCT ?state WHERE {
?state wdt:P31/wdt:P279* wd:Q3624078;
p:P463 ?memberOfStatement.
?memberOfStatement a wikibase:BestRank;
ps:P463 wd:Q7779
MINUS { ?memberOfStatement pq:P582 ?endTime. }
MINUS { ?state wdt:P576|wdt:P582 ?end. }
}
Both work fine. But now I want to combine them to get list of universities which are located in the CIS. I try to do it like shown in the answer to this question:
SELECT ?item ?itemLabel ?site WHERE {
?item (p:P31/ps:P31/(wdt:P279*)) wd:Q38723;
wdt:P17 ?country;
wdt:P856 ?site.
FILTER(EXISTS {
SELECT DISTINCT ?state WHERE {
{
?state (wdt:P31/(wdt:P279*)) wd:Q3624078;
p:P463 ?memberOfStatement.
?memberOfStatement rdf:type wikibase:BestRank;
ps:P463 wd:Q7779.
MINUS { ?memberOfStatement pq:P582 ?endTime. }
MINUS { ?state (wdt:P576|wdt:P582) ?end. }
}
FILTER(?country = ?state)
}
})
SERVICE wikibase:label { bd:serviceParam wikibase:language "ru,en". }
}
But, for some reason, I get zero results. What am I doing wrong here?

Path matching inside a VALUES clause

I'm trying to perform path matching inside a VALUES clause in sparql in order to match all instances and subclasses of both battles and sieges in wikidata. The following request repeatedly times out.
SELECT DISTINCT ?battle ?battleLabel WHERE {
{
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
VALUES ?type {wd:Q178561 wd:Q188055} ?battle (wdt:P31/wdt:P279*) ?type .
?battle rdfs:label ?queryByTitle.
FILTER(REGEX(?queryByTitle, "saratoga", "i"))
}
}
It seems that VALUES, esp. in conjunction with /, confuses the Blazegraph's query optimizer in that case.
Use UNION instead of VALUES:
SELECT DISTINCT ?battle ?battleLabel WHERE {
{ ?battle wdt:P31/wdt:P279* wd:Q178561 }
UNION
{ ?battle wdt:P31/wdt:P279* wd:Q188055 }
?battle rdfs:label ?queryByTitle.
FILTER(REGEX(?queryByTitle, "saratoga", "i"))
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
Alternatively, disable the optimizer and specify explicit order:
SELECT DISTINCT ?battle ?battleLabel WHERE {
hint:Query hint:optimizer "None" .
VALUES ?type {wd:Q178561 wd:Q188055}
?subtype wdt:P279* ?type .
?battle wdt:P31 ?subtype .
?battle rdfs:label ?queryByTitle.
FILTER(REGEX(?queryByTitle, "saratoga", "i"))
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}

How to get all properties for only a specific category in Wikidata?

Is there an RDF data/other format that allow me to get all the properties that can exist in a category e.g. Person, then I should be returned properties like sex, date of birth.
How to query this information at https://query.wikidata.org/ ?
What I want is this https://www.wikidata.org/wiki/Wikidata:List_of_properties/Summary_table
But is there a better format for this? I want to access programmatically.
UPDATE
This query is too heavy, causes timeout.
SELECT ?p ?attName WHERE {
?q wdt:P31 wd:Q5.
?q ?p ?statement.
?realAtt wikibase:claim ?p.
?realAtt rdfs:label ?attName.
FILTER(((LANG(?attName)) = "en") || ((LANG(?attName)) = ""))
}
GROUP BY ?p ?attName
I must specify the entity, e.g. to Barrack Obama then it works, but this does not give me the all possible properties.
SELECT ?p ?attName WHERE {
BIND(wd:Q76 AS ?q)
?q wdt:P31 wd:Q5.
?q ?p ?statement.
?realAtt wikibase:claim ?p.
?realAtt rdfs:label ?attName.
FILTER(((LANG(?attName)) = "en") || ((LANG(?attName)) = ""))
}
GROUP BY ?p ?attName
1
The page you have linked to is created by a bot. Contact the BetaBot operator, if you need to know how the bot works.
2
Perhaps the bot relies on the wd:P1963 property:
SELECT ?property ?propertyLabel {
VALUES (?class) {(wd:Q5)}
?class wdt:P1963 ?property
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
} ORDER BY ASC(xsd:integer(strafter(str(?property), concat(str(wd:), "P"))))
The above query returns 49 results.
3
I'd suggest you rely on type constraints from property pages:
SELECT ?property ?propertyLabel {
VALUES (?class) {(wd:Q5)}
?property a wikibase:Property .
?property p:P2302 [ ps:P2302 wd:Q21503250 ;
pq:P2309 wd:Q21503252 ;
pq:P2308 ?class ] .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
} ORDER BY ASC(xsd:integer(strafter(str(?property), concat(str(wd:), "P"))))
The above query returns 700 results.
4
The first query from your question works fine for relatively small classes, e. g. wd:Q6256 ('country'). On the public endpoint, it is not possible to make the query work for large classes.
However, you could split the query into small parts. In Python:
from wdqs import Client
from time import sleep
client = Client()
result = client.query("SELECT (count(?p) AS ?c) {?p a wikibase:Property}")
count = int(result[0]["c"])
offset = 0
limit = 50
possible = []
while offset <= count:
props = client.query("""
SELECT ?property WHERE {
hint:Query hint:optimizer "None" .
{
SELECT ?property {
?property a wikibase:Property .
} ORDER BY ?property OFFSET %s LIMIT %s
}
?property wikibase:directClaim ?wdt.
FILTER EXISTS {
?human ?wdt [] ; wdt:P31 wd:Q5 .
hint:Group hint:maxParallel 501 .
}
hint:Query hint:filterExists "SubQueryLimitOne" .
# SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
""" % (offset, limit))
for prop in props:
possible.append(prop['property'])
offset += limit
print (len(possible), min(offset, count))
sleep(0.25)
The last line of the output is:
2156 5154

How to retrieve aliases from wikidata

I'm trying to retrieve some information from Wikidata and I have found interesting to collect the aliases of the voices. For examples Francesco Totti is also known as il Capitano or er Pupone :
I'm trying to retrieve all the serie a's football players with this sparql query:
SELECT ?subject ?nomeLabel ?cognomeLabel ?subjectLabel WHERE {
?subject wdt:P31 wd:Q5.
?subject p:P54 ?team .
?team ps:P54 wd:""" + team_code +""" .
FILTER NOT EXISTS { ?team pq:P582 ?end
}
OPTIONAL{
?subject wdt:P735 ?nome .
?subject wdt:P734 ?cognome .
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "it". }
}
ORDER BY (?cognomeLabel)
How I can modify my query to take also the aliases?
Thanks
I have attempted a query with various labels. Here just for Roma:
SELECT distinct ?subject ?subjectLabel ?nomeLabel ?cognomeLabel ?nickname ?alternative ?subjectAltLabel WHERE {
?subject wdt:P31 wd:Q5.
?subject p:P54 ?team .
?team ps:P54 wd:Q2739 .
FILTER NOT EXISTS { ?team pq:P582 ?end . }
OPTIONAL { ?subject wdt:P735 ?nome . }
OPTIONAL { ?subject wdt:P734 ?cognome . }
OPTIONAL { ?subject wdt:P1449 ?nickname . }
OPTIONAL { ?subject skos:altLabel ?alternative . }
SERVICE wikibase:label { bd:serviceParam wikibase:language "it,en,fr". }
}
ORDER BY (?cognomeLabel)
I believe the P1449 property should be the most appropriate property to store an alias/nickname, but it does not seem to be used that much for football players. I just added "il Capitano" to Francesco Totti. Beyond that one there does not seem to be other nicknames for Roma players.
The "Also known as" label (in the right column) is not necessarily the nickname, but may be a spelling variation.
Something more generic if someone is interested in all properties that will return only the english also known as:
SELECT ?property ?propertyLabel ?propertyDescription (GROUP_CONCAT(DISTINCT(?altLabel); separator = ", ") AS ?altLabel_list) WHERE {
?property a wikibase:Property .
OPTIONAL { ?property skos:altLabel ?altLabel . FILTER (lang(?altLabel) = "en") }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" .}
}
GROUP BY ?property ?propertyLabel ?propertyDescription
LIMIT 5000
Another more simple example for male actors with itemAltLabel :
#Male Actors
SELECT ?item ?itemLabel ?itemAltLabel
WHERE
{
?item wdt:P21 wd:Q6581097.
?item wdt:P106 wd:Q33999.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}