SPARQL Wikidata Query to retrieve url for multiple languages - sparql

I wrote this Sparql query to retrieve item labels in multiple language
SELECT ?item ?en ?es ?it WHERE {
{ ?item wdt:P31 wd:Q6256. }
UNION
{ ?item wdt:P31 wd:Q1250464. }
UNION
{ ?item wdt:P31 wd:Q3624078. }
UNION
{ ?item wdt:P31 wd:Q619610. }
UNION
{ ?item wdt:P31 wd:Q179164. }
UNION
{ ?item wdt:P31 wd:Q7270. }
?item rdfs:label ?en filter (lang(?en) = "en").
?item rdfs:label ?es filter (lang(?es) = "es").
?item rdfs:label ?it filter (lang(?it) = "it").
} LIMIT 2
that gives
{
"item": {
"type": "uri",
"value": "http://www.wikidata.org/entity/Q43"
},
"en": {
"xml:lang": "en",
"type": "literal",
"value": "Turkey"
},
"es": {
"xml:lang": "es",
"type": "literal",
"value": "Turquía"
},
"it": {
"xml:lang": "it",
"type": "literal",
"value": "Turchia"
}
}
I would like now to retrieve item url for each language. An approach could be the following
SELECT ?cid ?country ?article_en ?article_de WHERE {
?cid wdt:P31 wd:Q3624078 .
OPTIONAL {
?cid rdfs:label ?country filter (lang(?country) = "en") .
}
OPTIONAL {
?cid rdfs:label ?country filter (lang(?country) = "de") .
}
OPTIONAL {
?article_en schema:about ?cid .
?article_en schema:inLanguage "en" .
FILTER (SUBSTR(str(?article_en), 1, 25) = "https://en.wikipedia.org/")
}
OPTIONAL {
?article_de schema:about ?cid .
?article_de schema:inLanguage "de" .
FILTER (SUBSTR(str(?article_de), 1, 25) = "https://de.wikipedia.org/")
}
}
that gives
{
"cid": {
"type": "uri",
"value": "http://www.wikidata.org/entity/Q236"
},
"article_de": {
"type": "uri",
"value": "https://de.wikipedia.org/wiki/Montenegro"
},
"article_en": {
"type": "uri",
"value": "https://en.wikipedia.org/wiki/Montenegro"
},
"country": {
"xml:lang": "en",
"type": "literal",
"value": "Montenegro"
}
}
Try it here.
This will require to have a separate property article_* for each language.
Putting all together:
SELECT ?item ?en ?es ?url_en ?url_es WHERE {
{ ?item wdt:P31 wd:Q43229. }
UNION { ?item wdt:P31 wd:Q4830453. }
OPTIONAL { ?item rdfs:label ?en filter (lang(?en) = "en"). }
OPTIONAL { ?item rdfs:label ?es filter (lang(?es) = "es"). }
OPTIONAL {
?url_en schema:about ?item .
?url_en schema:inLanguage "en" .
FILTER (SUBSTR(str(?url_en), 1, 25) = "https://en.wikipedia.org/")
}
OPTIONAL {
?url_es schema:about ?item .
?url_es schema:inLanguage "es" .
FILTER (SUBSTR(str(?url_es), 1, 25) = "https://es.wikipedia.org/")
}
} LIMIT 10
Try it here.
that gives
{
"item": {
"type": "uri",
"value": "http://www.wikidata.org/entity/Q130178"
},
"en": {
"xml:lang": "en",
"type": "literal",
"value": "Arcadie"
},
"es": {
"xml:lang": "es",
"type": "literal",
"value": "Arcadie"
},
"url_en": {
"type": "uri",
"value": "https://en.wikipedia.org/wiki/Arcadie"
},
"url_es": {
"type": "uri",
"value": "https://es.wikipedia.org/wiki/Arcadie_(grupo)"
}
}
Is there a way to group the url and the label property result like
{
"en": {
"url": {
"xml:lang": "es",
"type": "literal",
"value": "Arcadie"
},
"label": {
"type": "uri",
"value": "https://es.wikipedia.org/wiki/Arcadie_(grupo)"
}
}
}

Related

How to write distinct query in Elasticsearch 7.6.2

I am new to ES. My requirement is to fetch last n recent timestamp and distinct trace_id along with their records. Like in the sql query "Select distinct trace_id, job_name from stpjoblogs where status="SUCCESS"". Please let me know how to achieve the same in Elasticsearch 7.6.2.
My response would be:
_source" : {
"port" : 57376,
"job_name" : "stbl-executive-dashboard",
"timestamp" : "2020-04-28T10:55:45.640267+00:00",
"trace_id" : "180600fd27ef8108",
"PCF_Space" : "Development",
"PCF_Org" : "EPSOrg",
"status" : "SUCCESS"
}
and I tried with below query:
GET /stpjoblogs/_search
{
"query": {
"bool": {
"must": [
{"match":{"status":"SUCCESS"}}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
],
"aggs": {
"distinct_transactions": {
"terms": { "field": "trace_id"}
}
}
}
But I am getting below error
{
"error" : {
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [trace_id] in order to load field data by uninverting the inverted index. Note that this can use significant memory."
}
},
"status" : 400
}
You need to add trace_id.keyword. If index is created by automapping this field will be already present or you need to this in mapping
"trace_id":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword"
}
}
}
GET /stpjoblogs/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"status": "SUCCESS"
}
}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
],
"aggs": {
"distinct_transactions": {
"terms": {
"field": "trace_id.keyword"
},
"aggs": {
"job_names": {
"terms": {
"field": "job_name.keyword",
"size": 10
},
"aggs": {
"docs": {
"top_hits": {
"size": 1,
"sort": [{"timestamp":"desc"}]
}
}
}
}
}
}
}
}
To get latest document for a trace_id, you can use field collapsing , it works like Group by and returns top one document per field
{
"query": {
"bool": {
"must": [
{
"match": {
"status": "SUCCESS"
}
}
]
}
},
"collapse": {
"field": "trace_id.keyword",
"inner_hits" : {
"name": "space",
"collapse" : {"field" : "job_name.keyword"},
"size": 3
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}

Elasticsearch filter after aggregation

I want convert sql query to ES query.
This is my sql query
SELECT * FROM
(SELECT order_number, MIN(log_datetime) as log_datetime
FROM t_log
WHERE mall_id='amazon' AND action_name='order_register' AND log_level='3'
GROUP BY order_number) as temp
WHERE log_datetime BETWEEN '2018-11-16 00:00:00' AND '2018-11-16 23:59:59';
and my es query
{
"size": 0,
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"term": {
"mall_id": "devsdkwms1001"
}
},
{
"term": {
"action_name": "order_register"
}
},
{
"term": {
"log_level": 3
}
}
]
}
}
}
},
"aggs": {
"temp": {
"range": {
"field": "log_datetime",
"ranges": [
{
"from": "2018-11-16 00:00:00",
"to": "2018-11-16 23:59:59"
}
]
},
"aggs": {
"result": {
"terms": {
"field": "order_number",
"size": 0
}
}
}
}
}
}
My es query.. it doesn't work properly.
I can't find a way to filter the aggregate results in Elastic Search.
Only can aggregate after filter.
Is there any way? Thank you
If you want filter agg result take a look to bucker selector:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-bucket-selector-aggregation.html

RDFazer show no results ( syntax error in the query )

it's my first time working on ontology.
I downloaded Virtuoso, also the RDFazer which helps me to find the matches terms with ESCO ontology ..
this is the default sitting JSON file ( including the query statment), i did not change it at all:
{
"sparql": "http://localhost:8890/sparql",
"fileURI": "",
"profile": "ESCO (virtuoso)",
"profiles": {
"ESCO (virtuoso)": {
"query": "select ?target ?label (group_concat(distinct(?labels); separator=\"| \") as ?altLabels) (group_concat(distinct(?types); separator=\"| \") as ?types)\n where { \n{ ?target a <http://ec.europa.eu/esco/model#Occupation> . } \nUNION\n { ?target a <http://ec.europa.eu/esco/model#Skill> . } \n?target <http://www.w3.org/2008/05/skos-xl#prefLabel> ?thing3. ?thing3 <http://www.w3.org/2008/05/skos-xl#literalForm> ?label .\n ?target <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?types .\n{ ?target <http://www.w3.org/2008/05/skos-xl#prefLabel> ?thing1. \n?thing1 <http://www.w3.org/2008/05/skos-xl#literalForm> ?plabels . \nFILTER (bif:contains(?plabels,\"'$searchTerm*'\")) . \nFILTER (lang(?plabels) = \"en\") . } \nUNION\n { ?target <http://www.w3.org/2008/05/skos-xl#altLabel> ?thing2.\n ?thing2 <http://www.w3.org/2008/05/skos-xl#literalForm> ?plabels .\n FILTER (bif:contains(?plabels,\"'$searchTerm*'\")) . \nFILTER (lang(?plabels)= \"en\") . \n} \nOPTIONAL {?target <http://www.w3.org/2008/05/skos-xl#altLabel> ?thing4\n. ?thing4 <http://www.w3.org/2008/05/skos-xl#literalForm> ?labels\n. FILTER (lang (?labels) = \"en\") \n}\nFILTER (lang (?label) = \"en\") \n} GROUP BY ?target ?label",
"uriToUrl": "'https://ec.europa.eu/esco/web/guest/concept/-/concept/thing/en/' +uri",
"labelProperty": "label",
"labelPredicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"storedInfo": {
"label": {
"predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"type": "property",
"decorate": {
"xml:lang": "en"
}
},
"altLabels": {
"predicate": "http://www.w3.org/2004/02/skos/core#altLabel",
"type": "property",
"csv": "|",
"decorate": {
"xml:lang": "en"
}
},
"types": {
"predicate": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
"type": "relation",
"csv": "|"
}
}
},
"default (skos)": {
"query": "select ?target ?label (group_concat(distinct(?labels); separator=\"| \") as ?altLabels)\n (group_concat(distinct(?types); separator=\"| \") as ?types) where {\n ?target <http://www.w3.org/2004/02/skos/core##prefLabel> ?label .\n ?target <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?types .\n{ ?target <http://www.w3.org/2004/02/skos/core#prefLabel> ?plabels .\n FILTER (bif:contains(?plabels,\"'$searchTerm*'\")) . }\n UNION {\n ?target <http://www.w3.org/2004/02/skos/core#altLabel> ?plabels .\n FILTER (bif:contains(?plabels,\"'$searchTerm*'\")) .\n } OPTIONAL {\n?target <http://www.w3.org/2004/02/skos/core#altLabel> ?labels.\n FILTER (lang (?labels) = \"en\") }\nFILTER (lang (?label) = \"en\") \n} GROUP BY ?target ?label",
"uriToUrl": "uri",
"labelProperty": "label",
"labelPredicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"storedInfo": {
"label": {
"predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"type": "property",
"decorate": {
"xml:lang": "en"
}
},
"altLabels": {
"predicate": "http://www.w3.org/2004/02/skos/core#altLabel",
"type": "property",
"csv": "|",
"decorate": {
"xml:lang": "en"
}
},
"types": {
"predicate": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
"type": "relation",
"csv": "|"
}
}
},
"ESCO (pure SPARQL 1.1)": {
"query": "select ?target ?label (group_concat(distinct(?labels); separator=\"| \") as ?altLabels) (group_concat(distinct(?ttypes); separator=\"| \") as ?types)\nwhere { \n{ ?target a <http://ec.europa.eu/esco/model#Occupation> . } \nUNION\n{ ?target a <http://ec.europa.eu/esco/model#Skill> . } \n?target <http://www.w3.org/2008/05/skos-xl#prefLabel> ?thing3. ?thing3 <http://www.w3.org/2008/05/skos-xl#literalForm> ?label .\n?target <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?ttypes .\n{ ?target <http://www.w3.org/2008/05/skos-xl#prefLabel> ?thing1. \n?thing1 <http://www.w3.org/2008/05/skos-xl#literalForm> ?plabels . \n} UNION\n{ ?target <http://www.w3.org/2008/05/skos-xl#altLabel> ?thing2.\n?thing2 <http://www.w3.org/2008/05/skos-xl#literalForm> ?plabels .\n} \nFILTER (regex(?plabels,\".*$searchTerm.*\",\"i\")) . \nFILTER (lang(?plabels) = \"en\") . \nOPTIONAL {?target <http://www.w3.org/2008/05/skos-xl#altLabel> ?thing4\n. ?thing4 <http://www.w3.org/2008/05/skos-xl#literalForm> ?labels\n. FILTER (lang (?labels) = \"en\") \n}\nFILTER (lang (?label) = \"en\") \n} GROUP BY ?target ?label",
"uriToUrl": "'https://ec.europa.eu/esco/web/guest/concept/-/concept/thing/en/' +uri",
"labelProperty": "label",
"labelPredicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"storedInfo": {
"label": {
"predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"type": "property",
"decorate": {
"xml:lang": "en"
}
},
"altLabels": {
"predicate": "http://www.w3.org/2004/02/skos/core#altLabel",
"type": "property",
"csv": "|",
"decorate": {
"xml:lang": "en"
}
},
"types": {
"predicate": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
"type": "relation",
"csv": "|"
}
}
},
"ESCO (fuseki, text index)": {
"query": "select ?target ?label (group_concat(distinct(?labels); separator=\"| \") as ?altLabels) (group_concat(distinct(?ttypes); separator=\"| \") as ?types)\nwhere { \n{ ?target a <http://ec.europa.eu/esco/model#Occupation> . } \nUNION\n{ ?target a <http://ec.europa.eu/esco/model#Skill> . } \n?target <http://www.w3.org/2008/05/skos-xl#prefLabel> ?thing3. ?thing3 <http://www.w3.org/2008/05/skos-xl#literalForm> ?label .\n?target <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?ttypes .\n?target <http://jena.apache.org/text#query> \"$searchTerm\". \nOPTIONAL {?target <http://www.w3.org/2008/05/skos-xl#altLabel> ?thing4\n. ?thing4 <http://www.w3.org/2008/05/skos-xl#literalForm> ?labels\n. FILTER (lang (?labels) = \"en\") \n}\nFILTER (lang (?label) = \"en\") \n} GROUP BY ?target ?label",
"uriToUrl": "'https://ec.europa.eu/esco/web/guest/concept/-/concept/thing/en/' +uri",
"labelProperty": "label",
"labelPredicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"storedInfo": {
"label": {
"predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"type": "property",
"decorate": {
"xml:lang": "en"
}
},
"altLabels": {
"predicate": "http://www.w3.org/2004/02/skos/core#altLabel",
"type": "property",
"csv": "|",
"decorate": {
"xml:lang": "en"
}
},
"types": {
"predicate": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
"type": "relation",
"csv": "|"
}
}
}
}
}
I tried to query many times. I did not now it says syntax error as shown in this image ( sorry I could not copy the error, i was only able to take a screenshot)
is there any way that I can fix this error ?
Many thanks in advance.
Solution was to update apparently old Virtuoso to a current version.

Elasticsearch match combos of two fields

How can I get this simple SQL query running on Elasticsearch?
SELECT * FROM [mytype] where (id=123 and cid = classroomIdA) or
(id=234 and cid = classroomIdB) or (id=345 and cid = classroomIdC)
I'm really having troubles with its syntax, multi-match queries doesn't work in my case. What type of query should I use?
The right way to do it is to combine bool/should (for the outer OR conditions) and bool/filter (for the inner AND conditions) together.
POST mytype/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"bool": {
"filter": [
{
"term": {
"id": 123
}
},
{
"term": {
"cid": "classroomIdA"
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"id": 234
}
},
{
"term": {
"cid": "classroomIdB"
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"id": 345
}
},
{
"term": {
"cid": "classroomIdC"
}
}
]
}
}
]
}
}
}
UPDATE
The equivalent ES 1.7 query would be (just replace bool/filter by bool/must):
POST mytype/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"bool": {
"must": [
{
"term": {
"id": 123
}
},
{
"term": {
"cid": "classroomIdA"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"id": 234
}
},
{
"term": {
"cid": "classroomIdB"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"id": 345
}
},
{
"term": {
"cid": "classroomIdC"
}
}
]
}
}
]
}
}
}

elastic search query parsing exception when adding term filter

I'm not quite sure why the term filter "term": {"language": "Austrian"} is causing an elastic search parse exception.
The surprising thing is it works if I remove the query_string query.
Where would I put "term": {"language": "Austrian"} filter if it doesn't go there?
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"terms": {
"status_type": [
"1",
"2",
"7"
]
}
}
]
}
},
"filter": {
"query": {
"query_string": {
"fields": [
[
"name",
"message"
]
],
"query": "Arnold AND Schwarz"
}
},
"term": { <-- Causes parse exception
"language": "Austrian"
}
}
}
},
"sort": [
{
"total": {
"order": "desc"
}
}
]
}
Inside your filter, you need a bool filter if you have more than one constraints, which is your case, since you have a query filter and a term filter. So the correct way of doing it is like this:
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"terms": {
"status_type": [
"1",
"2",
"7"
]
}
}
]
}
},
"filter": {
"bool": { <---- add this
"must": [ <---- and this
{
"query": {
"query_string": {
"fields": [
[
"name",
"message"
]
],
"query": "Arnold AND Schwarz"
}
}
},
{
"term": {
"language": "Austrian"
}
}
]
}
}
}
},
"sort": [
{
"total": {
"order": "desc"
}
}
]
}
However, if I may add something, I would rewrite your query a bit differently and move the query_string over to the query part and the status_type term over to the filter part, it would feel more "natural". Also, in your query part you don't need a bool/must if you have only one constraint.
{
"query": {
"filtered": {
"query": {
"query_string": {
"fields": [
[
"name",
"message"
]
],
"query": "Arnold AND Schwarz"
}
},
"filter": {
"bool": {
"must": [
{
"terms": {
"status_type": [
"1",
"2",
"7"
]
}
},
{
"term": {
"language": "Austrian"
}
}
]
}
}
}
},
"sort": [
{
"total": {
"order": "desc"
}
}
]
}