Grouping qualifiers in the output of a Wikidata SPARQL query - sparql

I am building a Wikidata SPARQL query to retrieve details about specific people. When asking for their educational details (P69 - educatedAt)- I am not sure how to craft the query such that it will collate their degrees and majors in the case that in one statement about being educated at an institution they have multiple degrees or majors (I used this other query to find people with multiple degrees from Harvard).
This is the query:
SELECT ?itemLabel (GROUP_CONCAT(DISTINCT ?altNames; SEPARATOR = ";") AS ?aliases) ?itemDesc ?genderLabel ?birthday ?placeOfBirthLabel ?image (GROUP_CONCAT(DISTINCT ?ed; SEPARATOR = "|") AS ?education) WHERE {
VALUES ?item {
wd:Q5402996
}
OPTIONAL {
?item skos:altLabel ?altNames.
FILTER((LANG(?altNames)) = "en")
}
{
OPTIONAL { ?item wdt:P21 ?gender. }
OPTIONAL { ?item wdt:P569 ?birthday. }
OPTIONAL { ?item wdt:P19 ?placeOfBirth. }
OPTIONAL { ?item wdt:P18 ?image. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
{
OPTIONAL {
?item p:P69 ?statement.
?statement (ps:P69/rdfs:label) ?eduLabel.
FILTER((LANG(?eduLabel)) = "en")
OPTIONAL { ?statement pq:P580 ?edStart. }
OPTIONAL { ?statement pq:P582 ?edEnd. }
OPTIONAL {
?statement (pq:P512/rdfs:label) ?edDegrees.
FILTER((LANG(?edDegrees)) = "en")
}
OPTIONAL {
?statement (pq:P812/rdfs:label) ?edMajors.
FILTER((LANG(?edMajors)) = "en")
}
BIND(IF(BOUND(?edStart), CONCAT("::start:", STR(YEAR(?edStart))), "") AS ?edStartText)
BIND(IF(BOUND(?edEnd), CONCAT("::end:", STR(YEAR(?edEnd))), "") AS ?edEndText)
BIND(IF(BOUND(?edDegrees), CONCAT("::degrees:", STR(?edDegrees)), "") AS ?edDegreeText)
BIND(IF(BOUND(?edMajors), CONCAT("::majors:", STR(?edMajors)), "") AS ?edMajorText)
BIND(CONCAT(?eduLabel, ?edStartText, ?edEndText, ?edDegreeText, ?edMajorText) AS ?ed)
}
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?item schema:description ?itemDesc.
}
}
GROUP BY ?itemLabel ?itemDesc ?genderLabel ?birthday ?image ?placeOfBirthLabel
For the education result I get:
Harvard University::end:1980::degrees:Master of Arts::majors:astronomy|
Harvard University::end:1980::degrees:Doctor of Philosophy::majors:astronomy|
University of Rochester::end:1976::degrees:Bachelor of Arts|
University of Rochester::end:1976::degrees:Bachelor of Science
I would like to get:
Harvard University::end:1980::degrees:Master of Arts;Doctor of Philosophy::majors:astronomy|
University of Rochester::end:1976::degrees:Bachelor of Arts:Bachelor of Science|
How can I group the degrees in line like this in my query?
Or even better have them be nested in the JSON output rather than using delimiters?

Related

query in Wikidata: how to retrieve the labels for instances

I'm trying to get info e.g. for Belgium for aerodromes or stations with having the label of all instances for each item (e.g. Brussels South Charleroi Airpor can be an airport, business, etc..)
I've written two separate queries but struggling to join them
SELECT DISTINCT ?poi ?itemLabel_nl ?itemLabel_fr ?itemLabel_en ?itemLabel_wa ?itemLabel_vls
WHERE {
?poi p:P17 ?statement0.
?statement0 (ps:P17/(wdt:P279*)) wd:Q31.
{
?poi p:P31 ?statement1.
?statement1 (ps:P31/(wdt:P279*)) wd:Q62447.
}
UNION
{
?poi p:P31 ?statement2.
?statement2 (ps:P31/(wdt:P279*)) wd:Q12819564.
}
OPTIONAL { ?poi rdfs:label ?itemLabel_nl filter (lang(?itemLabel_nl) = "nl") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_fr filter (lang(?itemLabel_fr) = "fr") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_en filter (lang(?itemLabel_en) = "en") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_wa filter (lang(?itemLabel_wa) = "wa") . }
OPTIONAL { ?poi rdfs:label ?itemLabel_vls filter (lang(?itemLabel_vls) = "vls") . }
}
and the second part to get labels for the exact item (e.g. Brussels South Charleroi Airpor):
SELECT ?instance ?instanceLabel
WHERE
{
wd:Q1431012 p:P31 ?statement.
?statement ps:P31 ?instance.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY ?instanceLabel
any thought here? :)

Filter data with subquery in SPARQL

I'm trying to get some data from Wikidata. I've got a simple query which fetches information about universities:
SELECT ?item ?itemLabel ?site WHERE {
?item (p:P31/ps:P31/(wdt:P279*)) wd:Q38723;
wdt:P17 ?country;
wdt:P856 ?site.
SERVICE wikibase:label { bd:serviceParam wikibase:language "ru,en". }
}
And another query, which gets list of members of the CIS:
SELECT DISTINCT ?state WHERE {
?state wdt:P31/wdt:P279* wd:Q3624078;
p:P463 ?memberOfStatement.
?memberOfStatement a wikibase:BestRank;
ps:P463 wd:Q7779
MINUS { ?memberOfStatement pq:P582 ?endTime. }
MINUS { ?state wdt:P576|wdt:P582 ?end. }
}
Both work fine. But now I want to combine them to get list of universities which are located in the CIS. I try to do it like shown in the answer to this question:
SELECT ?item ?itemLabel ?site WHERE {
?item (p:P31/ps:P31/(wdt:P279*)) wd:Q38723;
wdt:P17 ?country;
wdt:P856 ?site.
FILTER(EXISTS {
SELECT DISTINCT ?state WHERE {
{
?state (wdt:P31/(wdt:P279*)) wd:Q3624078;
p:P463 ?memberOfStatement.
?memberOfStatement rdf:type wikibase:BestRank;
ps:P463 wd:Q7779.
MINUS { ?memberOfStatement pq:P582 ?endTime. }
MINUS { ?state (wdt:P576|wdt:P582) ?end. }
}
FILTER(?country = ?state)
}
})
SERVICE wikibase:label { bd:serviceParam wikibase:language "ru,en". }
}
But, for some reason, I get zero results. What am I doing wrong here?

Optimizing a SPARQL query

I'm trying to get the details of an organization based on the official website of the company using the below query. It constantly gets timed out.
I require all the below fields. Is there a way to optimize it? I understand that OPTIONAL is equivalent to an INNER JOIN and is the cause of the timeouts but is there any other way of getting these fields?
I'm using the python api and setting a timeout of 5mins doesn't help either. The timeout value doesn't get set.
SELECT distinct
(GROUP_CONCAT( DISTINCT ?official_name; separator=";") AS ?official_name)
(GROUP_CONCAT( DISTINCT ?isin; separator=";") AS ?isin)
?item
?itemLabel
?stock_exchange
?stock_exchangeLabel
(GROUP_CONCAT( DISTINCT ?ticker; separator=";") AS ?ticker)
(GROUP_CONCAT( DISTINCT ?other_name; separator=";") AS ?other_name)
(GROUP_CONCAT(DISTINCT ?parent_orgLabel; SEPARATOR = ";") AS ?parent_orgLabel)
(GROUP_CONCAT(DISTINCT ?owned_byLabel; SEPARATOR = ";") AS ?owned_byLabel)
(GROUP_CONCAT(DISTINCT ?instance_of; SEPARATOR = ";") AS ?instance_of)
(GROUP_CONCAT(DISTINCT ?instance_ofLabel; SEPARATOR = ";") AS ?instance_ofLabel)
(GROUP_CONCAT(DISTINCT ?domains; SEPARATOR = ";") AS ?domains)
(GROUP_CONCAT(DISTINCT ?subsidiaryLabel; SEPARATOR = ";") AS ?subsidiaryLabel)
(GROUP_CONCAT(DISTINCT ?owner_ofLabel; SEPARATOR = ";") AS ?owner_ofLabel)
(GROUP_CONCAT(DISTINCT ?part_ofLabel; SEPARATOR = ";") AS ?part_ofLabel)
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
{
{ ?item p:P856 [ ps:P856 <https://www.amazon.com> ]}}
OPTIONAL {
?item p:P856 ?web_domains.
?web_domains ps:P856 ?domains .
}
OPTIONAL { ?item wdt:P1448 ?official_name. }
OPTIONAL { ?item wdt:P946 ?isin. }
OPTIONAL {
?item p:P414 ?SE .
?SE ps:P414 ?stock_exchange .
?SE pq:P249 ?ticker .
}
OPTIONAL { ?item skos:altLabel ?other_name. FILTER (LANG (?other_name) = "en") }
OPTIONAL {
?item wdt:P361 ?part_of.
?part_of rdfs:label ?part_ofLabel.
filter(lang(?part_ofLabel)="en")
}
OPTIONAL {
?item wdt:P749 ?parent_org.
?parent_org rdfs:label ?parent_orgLabel.
filter(lang(?parent_orgLabel)="en")
}
OPTIONAL {
?item wdt:P127 ?owned_by.
?owned_by rdfs:label ?owned_byLabel.
filter(lang(?owned_byLabel)="en")
}
OPTIONAL {
?item wdt:P31 ?instance_of.
?instance_of rdfs:label ?instance_ofLabel.
filter(lang(?instance_ofLabel)="en")
}
OPTIONAL {
?item wdt:P355 ?subsidiary.
?subsidiary rdfs:label ?subsidiaryLabel.
filter(lang(?subsidiaryLabel)="en")
}
OPTIONAL {
?item wdt:P1830 ?owner_of.
?owner_of rdfs:label ?owner_ofLabel.
filter(lang(?owner_ofLabel)="en")
}
}
GROUP BY ?item ?itemLabel ?stock_exchange ?stock_exchangeLabel

Path matching inside a VALUES clause

I'm trying to perform path matching inside a VALUES clause in sparql in order to match all instances and subclasses of both battles and sieges in wikidata. The following request repeatedly times out.
SELECT DISTINCT ?battle ?battleLabel WHERE {
{
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
VALUES ?type {wd:Q178561 wd:Q188055} ?battle (wdt:P31/wdt:P279*) ?type .
?battle rdfs:label ?queryByTitle.
FILTER(REGEX(?queryByTitle, "saratoga", "i"))
}
}
It seems that VALUES, esp. in conjunction with /, confuses the Blazegraph's query optimizer in that case.
Use UNION instead of VALUES:
SELECT DISTINCT ?battle ?battleLabel WHERE {
{ ?battle wdt:P31/wdt:P279* wd:Q178561 }
UNION
{ ?battle wdt:P31/wdt:P279* wd:Q188055 }
?battle rdfs:label ?queryByTitle.
FILTER(REGEX(?queryByTitle, "saratoga", "i"))
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
Alternatively, disable the optimizer and specify explicit order:
SELECT DISTINCT ?battle ?battleLabel WHERE {
hint:Query hint:optimizer "None" .
VALUES ?type {wd:Q178561 wd:Q188055}
?subtype wdt:P279* ?type .
?battle wdt:P31 ?subtype .
?battle rdfs:label ?queryByTitle.
FILTER(REGEX(?queryByTitle, "saratoga", "i"))
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}

How to retrieve aliases from wikidata

I'm trying to retrieve some information from Wikidata and I have found interesting to collect the aliases of the voices. For examples Francesco Totti is also known as il Capitano or er Pupone :
I'm trying to retrieve all the serie a's football players with this sparql query:
SELECT ?subject ?nomeLabel ?cognomeLabel ?subjectLabel WHERE {
?subject wdt:P31 wd:Q5.
?subject p:P54 ?team .
?team ps:P54 wd:""" + team_code +""" .
FILTER NOT EXISTS { ?team pq:P582 ?end
}
OPTIONAL{
?subject wdt:P735 ?nome .
?subject wdt:P734 ?cognome .
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "it". }
}
ORDER BY (?cognomeLabel)
How I can modify my query to take also the aliases?
Thanks
I have attempted a query with various labels. Here just for Roma:
SELECT distinct ?subject ?subjectLabel ?nomeLabel ?cognomeLabel ?nickname ?alternative ?subjectAltLabel WHERE {
?subject wdt:P31 wd:Q5.
?subject p:P54 ?team .
?team ps:P54 wd:Q2739 .
FILTER NOT EXISTS { ?team pq:P582 ?end . }
OPTIONAL { ?subject wdt:P735 ?nome . }
OPTIONAL { ?subject wdt:P734 ?cognome . }
OPTIONAL { ?subject wdt:P1449 ?nickname . }
OPTIONAL { ?subject skos:altLabel ?alternative . }
SERVICE wikibase:label { bd:serviceParam wikibase:language "it,en,fr". }
}
ORDER BY (?cognomeLabel)
I believe the P1449 property should be the most appropriate property to store an alias/nickname, but it does not seem to be used that much for football players. I just added "il Capitano" to Francesco Totti. Beyond that one there does not seem to be other nicknames for Roma players.
The "Also known as" label (in the right column) is not necessarily the nickname, but may be a spelling variation.
Something more generic if someone is interested in all properties that will return only the english also known as:
SELECT ?property ?propertyLabel ?propertyDescription (GROUP_CONCAT(DISTINCT(?altLabel); separator = ", ") AS ?altLabel_list) WHERE {
?property a wikibase:Property .
OPTIONAL { ?property skos:altLabel ?altLabel . FILTER (lang(?altLabel) = "en") }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" .}
}
GROUP BY ?property ?propertyLabel ?propertyDescription
LIMIT 5000
Another more simple example for male actors with itemAltLabel :
#Male Actors
SELECT ?item ?itemLabel ?itemAltLabel
WHERE
{
?item wdt:P21 wd:Q6581097.
?item wdt:P106 wd:Q33999.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}