Wikidata query for items with one Wikipedia page not in English - sparql

I want to find Wikidata items, with each referring to exactly one Wikipedia page which is not an en. Wikipedia page.
I came up with this query:
SELECT ?item WHERE {
?article schema:about ?item .
FILTER (SUBSTR(str(?article), 9, 2) != "en") .
{
SELECT ?item (COUNT(DISTINCT ?lang) AS ?count) WHERE {
?item wdt:P1367 ?yp_id . # BBC 'Your paintings' artist identifier
?article schema:about ?item .
FILTER (SUBSTR(str(?article), 11, 15) = ".wikipedia.org/") .
?article schema:inLanguage ?lang .
} GROUP BY ?item
HAVING (?count=1)
ORDER BY DESC (?count)
}
}
It executes. However, I always get a timeout.
Is there a better query to achieve what I am looking for?

Here's some tip:
Since you take only ?count=1, there is no reason to order by ?count.
Since for each article you can have only one ?lang, you can count by ?article without considering a redundant variable.
Instead of working on (sub)strings, just use the schema:isPartOf property for selecting the specific domain that you want to exclude.
Use FILTER NOT EXISTS instead of FILTER (... != ...)
The fourth optimiziation is the most important and it is sufficient per se.
SELECT ?item WHERE {
FILTER NOT EXISTS {
?article schema:about ?item ;
schema:isPartOf <https://en.wikipedia.org/> .
}
{
SELECT ?item (COUNT(DISTINCT ?article) AS ?count) WHERE {
?item wdt:P1367 ?yp_id . # BBC 'Your paintings' artist identifier
?article schema:about ?item .
FILTER (SUBSTR(str(?article), 11, 15) = ".wikipedia.org/") .
}
GROUP BY ?item
HAVING (?count=1)
}
}

Related

SPARQL query that returns Wikidpedia labels from Wikidata itemLabel

I am new to SPARQL,
Is it possible to write a query that returns Wikipedia box information for a corresponding item label from the Wikipedia box for the Arabic Language that appears at the bottom of the Wikidata item page?
see the picture:
Instead of the Wikipedia URL in the following Query, I need to return the Wikipedia Label, in our case (الرامة (جنين))
Try Query on Wikidata Query Service
SELECT DISTINCT ?article ?item ?itemLabel ?itemDescription ?entity_type ?main_category (GROUP_CONCAT(DISTINCT(?altLabel); separator = ", ") AS ?altLabel_list) WHERE {
?item ?label "الرامة"#ar.
?item wdt:P31 ?entity_type .
MINUS { ?item wdt:P31 wd:Q4167410}
OPTIONAL{ ?item wdt:P910 ?main_category}
?article schema:about ?item;
schema:isPartOf <https://ar.wikipedia.org/>;
OPTIONAL { ?item skos:altLabel ?altLabel . FILTER (lang(?altLabel) = "ar") }
SERVICE wikibase:label { bd:serviceParam wikibase:language "ar" .}
}
GROUP BY ?article ?item ?itemLabel ?itemDescription ?entity_type ?main_category
This is the answer by the UninformedUser
> SELECT ?article ?wikipediaLabel WHERE
> { ?article schema:about wd:Q12187640 . ?article schema:isPartOf <https://ar.wikipedia.org/>; schema:name
> ?wikipediaLabel }

How to query wikidata using SPARQL to find a place that matches certain criteria and is geographically near another city

I wrote the following SPARQL query to find the wikidata item with the label "San Leucio" in Italy.
SELECT DISTINCT * WHERE {
?location ?label 'San Leucio'#en .
?location wdt:P17 wd:Q38 .
?location rdfs:label ?locationName .
OPTIONAL {
?article schema:about ?location .
?article schema:isPartOf <https://en.wikivoyage.org/> .
}
?location wdt:P18 ?image .
FILTER(lang(?locationName) = "en")
}
The query returns these 3 results:
wd:Q55179410
wd:Q20009063
wd:Q846499
The result I want is wd:Q846499, which is outside of Naples, Italy. Is there any way I could further filter this query to return the result that is nearest to Naples? I know that I can get the geoCoordinates for each of these with ?location wdt:P625 ?coordinates, but I'm not sure how I could use that to compare to the geo-coordinates of Naples to get what I want.
SELECT DISTINCT * {
VALUES ?naples {wd:Q2634}
?Napfes wdt:P625 ?naples_coordinates.
?location rdfs:label 'San Leucio'#en .
?location wdt:P17 wd:Q38 .
?location wdt:P18 ?image .
?location wdt:P625 ?location_coordinates.
OPTIONAL {
?article schema:about ?location .
?article schema:isPartOf <https://en.wikivoyage.org/> .
}
BIND (geof:distance(?location_coordinates, ?naples_coordinates) AS ?distance)
} ORDER BY ?distance LIMIT 1

Identify entity of a Wikipedia page

My question is related to a similar question/comment which unfortunately never received an answer.
Given a list of multiple Wikipedia pages, e.g.:
https://en.wikipedia.org/wiki/Donald_Trump
https://en.wikipedia.org/wiki/The_Matrix
https://en.wikipedia.org/wiki/Tiger
...
how can I find out what type of entity these articles refer to. i.e. ideally I would want something on a higher level e.g. person, movie, animal etc.
My best guess so far was the Wikidata API using SPARQL to move back the instance_of or subclass tree. However, this did not lead to meaningful results.
SELECT ?lemma ?item ?itemLabel ?itemDescription ?instance ?instanceLabel ?subclassLabel WHERE {
VALUES ?lemma {
"Donald Trump"#en
"The Matrix"#en
"Tiger" #en
}
?sitelink schema:about ?item;
schema:isPartOf <https://en.wikipedia.org/>;
schema:name ?lemma.
?item wdt:P31* ?instance.
?item wdt:P279* ?subclass.
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en,da,sv".}
}
The result can be seen here: https://w.wiki/ZmQ
One option would of course also be to look at the itemDescription, but I'm afraid that this is too granular to build meaningful groups from larger lists and count frequencies later on.
Does anyone have a hint/idea on how to get more general entity categories? Maybe also from the mediawiki API?
Any input would be highly appreciated!
Here are three possibilities, side-by-side:
SELECT ?lemma ?item (GROUP_CONCAT(DISTINCT ?instanceLabel; SEPARATOR = " ") AS ?a) (GROUP_CONCAT(DISTINCT ?subclassLabel; SEPARATOR = " ") AS ?b) (GROUP_CONCAT(DISTINCT ?isaLabel; SEPARATOR = " ") AS ?c) WHERE {
VALUES ?lemma {
"Donald Trump"#en
"The Matrix"#en
"Tiger"#en
}
?sitelink schema:about ?item;
schema:isPartOf <https://en.wikipedia.org/>;
schema:name ?lemma.
OPTIONAL { ?item (wdt:P31/(wdt:P279*)) ?instance. }
OPTIONAL { ?item wdt:P279 ?subclass. }
OPTIONAL { ?item wdt:P31 ?isa. }
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en,da,sv".
?instance rdfs:label ?instanceLabel.
?subclass rdfs:label ?subclassLabel.
?isa rdfs:label ?isaLabel.
}
# Here, you could add: FILTER(?instanceLabel in ("mammal"#en, "movie"#en, "musical"#en (and so on...)))
}
GROUP BY ?lemma ?item
Live here.
If you're looking at labels such as "film" and "mammal", i. e. a couple dozen at most, you could explicitly list them in order of preference, then use the first one that occurs.
Note that you may be running into this bug: https://www.wikidata.org/wiki/Wikidata:SPARQL_tutorial#wikibase:Label_and_aggregations_bug

How to SPARQL select all living politicians from EU political parties on Wikidata?

Can somebody, please help me write a SPARQL query, which would select all currently living politicians, whose political institutions are from a country within the EU?
Currently I have 3 queries, but I don't know how to combine them
The first one checks for people who are living, whose occupation is politician and/or who are a member of a political party
SELECT DISTINCT ?politician ?politicianLabel
WHERE {
?politician wdt:P106 wd:Q82955 .
FILTER NOT EXISTS {?politician wdt:P570|wdt:P509|wdt:P20 ?o}
OPTIONAL {
?politician wdt:P102 ?membership .
}
FILTER NOT EXISTS {?politician wdt:P570|wdt:P509|wdt:P20 ?o}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}
The second one shows all EU countries
SELECT ?country ?countryLabel
WHERE {
?country wdt:P463 wd:Q458
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}
And the third one finds parties from a specific country
SELECT ?party
WHERE {
?party wdt:P31 wd:Q7278
?party wdt:17 #EU Countries#
}
But How can I combine these queries? Can somebody help?
Thank you
Thank you #AKSW
This works:
WHERE {
?country wdt:P463 wd:Q458.
?party wdt:P31 wd:Q7278 .
?party wdt:P17 ?country .
?politician wdt:P106 wd:Q82955 . hint:Prior hint:runFirst true.
?politician wdt:P102 ?party .
FILTER NOT EXISTS {?politician wdt:P570|wdt:P509|wdt:P20 ?o}
}
If you start with the country and add the
hint:Prior hint:runFirst true.
line, it is optimized enough to give result.

Search Wikidata SPARQL for movies or tv series by title

I want to search "Star Wars" across both TV series (http://www.wikidata.org/entity/Q5398426) and films (http://www.wikidata.org/entity/Q11424). The following SPARQL is for searching movies:
# search movies by name
SELECT DISTINCT ?item ?name WHERE {
?item wdt:P31 wd:Q11424.
?item rdfs:label ?queryByTitle.
OPTIONAL { ?item wdt:P1476 ?name. }
#FILTER(REGEX(?queryByTitle, "star wars", "i"))
FILTER(contains(lcase(?queryByTitle), "star wars"))
}
LIMIT 100
Any idea how I can search by TV Series as well?
#AKSW answered the question. Here's the full gist for anyone searching in the future.
SELECT DISTINCT ?item ?name WHERE {
VALUES ?type {wd:Q5398426 wd:Q11424} ?item wdt:P31 ?type .
?item rdfs:label ?queryByTitle.
OPTIONAL { ?item wdt:P1476 ?name. }
FILTER(REGEX(?queryByTitle, "star wars", "i"))
}
LIMIT 100