I'm trying to get the details of an organization based on the official website of the company using the below query. It constantly gets timed out.
I require all the below fields. Is there a way to optimize it? I understand that OPTIONAL is equivalent to an INNER JOIN and is the cause of the timeouts but is there any other way of getting these fields?
I'm using the python api and setting a timeout of 5mins doesn't help either. The timeout value doesn't get set.
SELECT distinct
(GROUP_CONCAT( DISTINCT ?official_name; separator=";") AS ?official_name)
(GROUP_CONCAT( DISTINCT ?isin; separator=";") AS ?isin)
?item
?itemLabel
?stock_exchange
?stock_exchangeLabel
(GROUP_CONCAT( DISTINCT ?ticker; separator=";") AS ?ticker)
(GROUP_CONCAT( DISTINCT ?other_name; separator=";") AS ?other_name)
(GROUP_CONCAT(DISTINCT ?parent_orgLabel; SEPARATOR = ";") AS ?parent_orgLabel)
(GROUP_CONCAT(DISTINCT ?owned_byLabel; SEPARATOR = ";") AS ?owned_byLabel)
(GROUP_CONCAT(DISTINCT ?instance_of; SEPARATOR = ";") AS ?instance_of)
(GROUP_CONCAT(DISTINCT ?instance_ofLabel; SEPARATOR = ";") AS ?instance_ofLabel)
(GROUP_CONCAT(DISTINCT ?domains; SEPARATOR = ";") AS ?domains)
(GROUP_CONCAT(DISTINCT ?subsidiaryLabel; SEPARATOR = ";") AS ?subsidiaryLabel)
(GROUP_CONCAT(DISTINCT ?owner_ofLabel; SEPARATOR = ";") AS ?owner_ofLabel)
(GROUP_CONCAT(DISTINCT ?part_ofLabel; SEPARATOR = ";") AS ?part_ofLabel)
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
{
{ ?item p:P856 [ ps:P856 <https://www.amazon.com> ]}}
OPTIONAL {
?item p:P856 ?web_domains.
?web_domains ps:P856 ?domains .
}
OPTIONAL { ?item wdt:P1448 ?official_name. }
OPTIONAL { ?item wdt:P946 ?isin. }
OPTIONAL {
?item p:P414 ?SE .
?SE ps:P414 ?stock_exchange .
?SE pq:P249 ?ticker .
}
OPTIONAL { ?item skos:altLabel ?other_name. FILTER (LANG (?other_name) = "en") }
OPTIONAL {
?item wdt:P361 ?part_of.
?part_of rdfs:label ?part_ofLabel.
filter(lang(?part_ofLabel)="en")
}
OPTIONAL {
?item wdt:P749 ?parent_org.
?parent_org rdfs:label ?parent_orgLabel.
filter(lang(?parent_orgLabel)="en")
}
OPTIONAL {
?item wdt:P127 ?owned_by.
?owned_by rdfs:label ?owned_byLabel.
filter(lang(?owned_byLabel)="en")
}
OPTIONAL {
?item wdt:P31 ?instance_of.
?instance_of rdfs:label ?instance_ofLabel.
filter(lang(?instance_ofLabel)="en")
}
OPTIONAL {
?item wdt:P355 ?subsidiary.
?subsidiary rdfs:label ?subsidiaryLabel.
filter(lang(?subsidiaryLabel)="en")
}
OPTIONAL {
?item wdt:P1830 ?owner_of.
?owner_of rdfs:label ?owner_ofLabel.
filter(lang(?owner_ofLabel)="en")
}
}
GROUP BY ?item ?itemLabel ?stock_exchange ?stock_exchangeLabel
Related
I am building a Wikidata SPARQL query to retrieve details about specific people. When asking for their educational details (P69 - educatedAt)- I am not sure how to craft the query such that it will collate their degrees and majors in the case that in one statement about being educated at an institution they have multiple degrees or majors (I used this other query to find people with multiple degrees from Harvard).
This is the query:
SELECT ?itemLabel (GROUP_CONCAT(DISTINCT ?altNames; SEPARATOR = ";") AS ?aliases) ?itemDesc ?genderLabel ?birthday ?placeOfBirthLabel ?image (GROUP_CONCAT(DISTINCT ?ed; SEPARATOR = "|") AS ?education) WHERE {
VALUES ?item {
wd:Q5402996
}
OPTIONAL {
?item skos:altLabel ?altNames.
FILTER((LANG(?altNames)) = "en")
}
{
OPTIONAL { ?item wdt:P21 ?gender. }
OPTIONAL { ?item wdt:P569 ?birthday. }
OPTIONAL { ?item wdt:P19 ?placeOfBirth. }
OPTIONAL { ?item wdt:P18 ?image. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
{
OPTIONAL {
?item p:P69 ?statement.
?statement (ps:P69/rdfs:label) ?eduLabel.
FILTER((LANG(?eduLabel)) = "en")
OPTIONAL { ?statement pq:P580 ?edStart. }
OPTIONAL { ?statement pq:P582 ?edEnd. }
OPTIONAL {
?statement (pq:P512/rdfs:label) ?edDegrees.
FILTER((LANG(?edDegrees)) = "en")
}
OPTIONAL {
?statement (pq:P812/rdfs:label) ?edMajors.
FILTER((LANG(?edMajors)) = "en")
}
BIND(IF(BOUND(?edStart), CONCAT("::start:", STR(YEAR(?edStart))), "") AS ?edStartText)
BIND(IF(BOUND(?edEnd), CONCAT("::end:", STR(YEAR(?edEnd))), "") AS ?edEndText)
BIND(IF(BOUND(?edDegrees), CONCAT("::degrees:", STR(?edDegrees)), "") AS ?edDegreeText)
BIND(IF(BOUND(?edMajors), CONCAT("::majors:", STR(?edMajors)), "") AS ?edMajorText)
BIND(CONCAT(?eduLabel, ?edStartText, ?edEndText, ?edDegreeText, ?edMajorText) AS ?ed)
}
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?item schema:description ?itemDesc.
}
}
GROUP BY ?itemLabel ?itemDesc ?genderLabel ?birthday ?image ?placeOfBirthLabel
For the education result I get:
Harvard University::end:1980::degrees:Master of Arts::majors:astronomy|
Harvard University::end:1980::degrees:Doctor of Philosophy::majors:astronomy|
University of Rochester::end:1976::degrees:Bachelor of Arts|
University of Rochester::end:1976::degrees:Bachelor of Science
I would like to get:
Harvard University::end:1980::degrees:Master of Arts;Doctor of Philosophy::majors:astronomy|
University of Rochester::end:1976::degrees:Bachelor of Arts:Bachelor of Science|
How can I group the degrees in line like this in my query?
Or even better have them be nested in the JSON output rather than using delimiters?
I'm using this approach to retrieve the Wikipedia url for a Wikidata item for multiple languages, using Sparql:
SELECT ?item ?en ?url_en ?es WHERE {
{ ?item wdt:P31 wd:Q6256. }
UNION
{ ?item wdt:P31 wd:Q1250464. }
UNION
{ ?item wdt:P31 wd:Q3624078. }
UNION
{ ?item wdt:P31 wd:Q619610. }
UNION
{ ?item wdt:P31 wd:Q179164. }
UNION
{ ?item wdt:P31 wd:Q7270. }
?item rdfs:label ?en filter (lang(?en) = "en").
?item rdfs:label ?es filter (lang(?es) = "es").
OPTIONAL {
?url_en schema:about ?item .
?url_en schema:inLanguage "en" .
FILTER (SUBSTR(str(?url_en), 1, 25) = "https://en.wikipedia.org/")
}
OPTIONAL {
?url_es schema:about ?item .
?url_es schema:inLanguage "es" .
FILTER (SUBSTR(str(?url_en), 1, 25) = "https://es.wikipedia.org/")
}
} LIMIT 1000
I get a limited number of results despite the LIMIT value set, while when retrieving labels only:
SELECT ?item ?en ?es ?it WHERE {
{ ?item wdt:P31 wd:Q6256. }
UNION
{ ?item wdt:P31 wd:Q1250464. }
UNION
{ ?item wdt:P31 wd:Q3624078. }
UNION
{ ?item wdt:P31 wd:Q619610. }
UNION
{ ?item wdt:P31 wd:Q179164. }
UNION
{ ?item wdt:P31 wd:Q7270. }
?item rdfs:label ?en filter (lang(?en) = "en").
?item rdfs:label ?es filter (lang(?es) = "es").
?item rdfs:label ?it filter (lang(?it) = "it").
} LIMIT 1000
I get more results like the
OPTIONAL {
?url_en schema:about ?item .
?url_en schema:inLanguage "en" .
FILTER (SUBSTR(str(?url_en), 1, 25) = "https://en.wikipedia.org/")
}
is limiting the results found in some way.
When I run the following query I get multiple rows of "Paul Gauguin" since there are multiple informations about his place/time of death, the same could of course happen on all other parameters as well.
SELECT DISTINCT ?item ?itemLabel ?itemDescription ?birthplaceLabel ?birthdate ?deathplaceLabel ?deathdate ?imageLabel ?article ?articleEn
{
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace.
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]". }
}
Is there a way of only recive one of the same object id, and don't care about if there are other "versions" of the object.
I have tried a bit with nested queries but I can't get it to work. Are there som other ways?
This query will work:
SELECT DISTINCT ?item ?itemLabel ?itemDescription
(SAMPLE(?birthplaceLabel) AS ?birthplaceLabel)
(SAMPLE(?birthdate) AS ?birthdate)
(SAMPLE(?deathplaceLabel) AS ?deathplaceLabel)
(SAMPLE(?deathdate) AS ?deathdate)
(SAMPLE(STR(?image)) AS ?image)
?article ?articleEn
WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace .
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label { ?birthplace rdfs:label ?birthplaceLabel .
?deathplace rdfs:label ?deathplaceLabel .
?item rdfs:label ?itemLabel ;
schema:description ?itemDescription .
bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]". }
}
GROUP BY ?item ?itemLabel ?itemDescription ?article ?articleEn
As you can see, we use the SAMPLE function and group by all the variables that aren't sampled.
You could also replace SAMPLE with GROUP_CONCAT if you wish to see all the possible values taken by a property.
E.g. ... (GROUP_CONCAT(?deathdate; SEPARATOR="; ") AS ?deathdates) ...
Note that GROUP_CONCAT takes strings as arguments.
Found the solution with a bit of modyfing of Valerio Cocchis answer.
SELECT ?item ?itemLabel ?itemDescription
(SAMPLE(?birthplaceLabel) AS ?birthplaceLabel)
(SAMPLE(?birthdate) AS ?birthdate)
(SAMPLE(?deathplaceLabel) AS ?deathplaceLabel)
(SAMPLE(?deathdate) AS ?deathdate)
(SAMPLE(?imageLabel) AS ?imageLabel)
?article ?articleEn
WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P119 wd:Q5024152.
OPTIONAL {
?item wdt:P18 ?image.
}
OPTIONAL {
?item wdt:P19 ?birthplace.
}
OPTIONAL {
?item wdt:P569 ?birthdate.
}
OPTIONAL {
?item wdt:P20 ?deathplace.
}
OPTIONAL {
?item wdt:P570 ?deathdate.
}
OPTIONAL {
?article schema:about ?item.
?article schema:isPartOf <https://sv.wikipedia.org/>.
}
OPTIONAL {
?articleEn schema:about ?item.
?articleEn schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "sv,en, [AUTO_LANGUAGE]".
?birthplace rdfs:label ?birthplaceLabel .
?deathplace rdfs:label ?deathplaceLabel .
?image rdfs:label ?imageLabel .
?item rdfs:label ?itemLabel .
?item schema:description ?itemDescription .
}
}
GROUP BY ?item ?itemLabel ?itemDescription ?article ?articleEn
Before, my query worked perfectly. However, something changed on their backend, and now I'm getting an error when running this query.
SELECT ?person ?personLabel ?personDescription (GROUP_CONCAT(DISTINCT ?dob ; SEPARATOR = ' | ') AS ?dob) (GROUP_CONCAT(DISTINCT ?gender ; SEPARATOR = ' | ') AS ?gender) (GROUP_CONCAT(DISTINCT ?image ; SEPARATOR = ' | ') AS ?image)
WHERE {
SERVICE wikibase:mwapi {
bd:serviceParam wikibase:api "EntitySearch" .
bd:serviceParam wikibase:endpoint "www.wikidata.org" .
bd:serviceParam mwapi:search "william" .
bd:serviceParam mwapi:language "en" .
?person wikibase:apiOutputItem mwapi:item .
}
?person wdt:P31 wd:Q5 .
OPTIONAL { ?person wdt:P569 ?dob }
OPTIONAL { ?person wdt:P21 ?gender }
OPTIONAL { ?person wdt:P18 ?image }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
GROUP BY ?person ?personLabel ?personDescription
I'm trying to retrieve some information from Wikidata and I have found interesting to collect the aliases of the voices. For examples Francesco Totti is also known as il Capitano or er Pupone :
I'm trying to retrieve all the serie a's football players with this sparql query:
SELECT ?subject ?nomeLabel ?cognomeLabel ?subjectLabel WHERE {
?subject wdt:P31 wd:Q5.
?subject p:P54 ?team .
?team ps:P54 wd:""" + team_code +""" .
FILTER NOT EXISTS { ?team pq:P582 ?end
}
OPTIONAL{
?subject wdt:P735 ?nome .
?subject wdt:P734 ?cognome .
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "it". }
}
ORDER BY (?cognomeLabel)
How I can modify my query to take also the aliases?
Thanks
I have attempted a query with various labels. Here just for Roma:
SELECT distinct ?subject ?subjectLabel ?nomeLabel ?cognomeLabel ?nickname ?alternative ?subjectAltLabel WHERE {
?subject wdt:P31 wd:Q5.
?subject p:P54 ?team .
?team ps:P54 wd:Q2739 .
FILTER NOT EXISTS { ?team pq:P582 ?end . }
OPTIONAL { ?subject wdt:P735 ?nome . }
OPTIONAL { ?subject wdt:P734 ?cognome . }
OPTIONAL { ?subject wdt:P1449 ?nickname . }
OPTIONAL { ?subject skos:altLabel ?alternative . }
SERVICE wikibase:label { bd:serviceParam wikibase:language "it,en,fr". }
}
ORDER BY (?cognomeLabel)
I believe the P1449 property should be the most appropriate property to store an alias/nickname, but it does not seem to be used that much for football players. I just added "il Capitano" to Francesco Totti. Beyond that one there does not seem to be other nicknames for Roma players.
The "Also known as" label (in the right column) is not necessarily the nickname, but may be a spelling variation.
Something more generic if someone is interested in all properties that will return only the english also known as:
SELECT ?property ?propertyLabel ?propertyDescription (GROUP_CONCAT(DISTINCT(?altLabel); separator = ", ") AS ?altLabel_list) WHERE {
?property a wikibase:Property .
OPTIONAL { ?property skos:altLabel ?altLabel . FILTER (lang(?altLabel) = "en") }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" .}
}
GROUP BY ?property ?propertyLabel ?propertyDescription
LIMIT 5000
Another more simple example for male actors with itemAltLabel :
#Male Actors
SELECT ?item ?itemLabel ?itemAltLabel
WHERE
{
?item wdt:P21 wd:Q6581097.
?item wdt:P106 wd:Q33999.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}