select value that occurs most frequently by group

select value that occurs most frequently by group - sparql

I have RDF data about hospital patients, including their date of birth. There are frequently multiple triples abut their date of birth, and some of those triples may be wrong. My group has decided to use this rule: whatever date occurs most frequently will provisionally be considered correct. It's clear how to do this in any programming language of our choice, external to SPARQL.
Is an aggregation of aggregations possible in SPARQL?
I have read the similar question SPARQL selecting MAX value of a counter, but I'm not there yet.
Given these triples:
#prefix turbo: <http://example.org/ontologies/> .
#prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
<http://example.org/ontologies/b6be95364ec943af2ef4ab161c11c855>
a <http://example.org/ontologies/StudyPartWithBBDonation> ;
turbo:hasBirthDateO turbo:3950b2b6-f575-4074-b0e8-f9fa3378f3be, turbo:4250aafa-4b0c-4f73-92b6-7639f427b61d, turbo:a3e6676e-a214-4af4-b8ef-34a8e20170bf .
turbo:3950b2b6-f575-4074-b0e8-f9fa3378f3be turbo:hasDateValue "1971-12-30"^^xsd:date .
turbo:4250aafa-4b0c-4f73-92b6-7639f427b61d turbo:hasDateValue "1971-12-30"^^xsd:date .
turbo:a3e6676e-a214-4af4-b8ef-34a8e20170bf turbo:hasDateValue "1971-12-30"^^xsd:date .
turbo:6e200ca0d5150282787464a2bda55814
a turbo:StudyPartWithBBDonation ;
turbo:hasBirthDateO turbo:b09519f5-b123-40d5-bb4a-737ec9f8b9a8, turbo:06c56881-a6c7-4d1d-993b-add8862dffd7, turbo:12ef184d-c8d6-4d93-a558-a3ba47bb56ca .
turbo:b09519f5-b123-40d5-bb4a-737ec9f8b9a8 turbo:hasDateValue "2000-04-04"^^xsd:date .
turbo:06c56881-a6c7-4d1d-993b-add8862dffd7 turbo:hasDateValue "2000-04-04"^^xsd:date .
turbo:12ef184d-c8d6-4d93-a558-a3ba47bb56ca turbo:hasDateValue "2000-04-05"^^xsd:date .
This query
PREFIX turbo: <http://example.org/ontologies/>
SELECT ?part ?xsddate (COUNT(?xsddate) AS ?datecount)
{ ?part rdf:type turbo:StudyPartWithBBDonation ;
turbo:hasBirthDateO ?dob .
?dob turbo:hasDateValue ?xsddate
}
GROUP BY ?part ?xsddate
gives the following:
+----------------------------------------+------------------------+------------------+
| part | xsddate | datecount |
+----------------------------------------+------------------------+------------------+
| turbo:6e200ca0d5150282787464a2bda55814 | "2000-04-05"^^xsd:date | "1"^^xsd:integer |
| turbo:b6be95364ec943af2ef4ab161c11c855 | "1971-12-30"^^xsd:date | "3"^^xsd:integer |
| turbo:6e200ca0d5150282787464a2bda55814 | "2000-04-04"^^xsd:date | "2"^^xsd:integer |
+----------------------------------------+------------------------+------------------+
I only want to see the date with the highest count for each patient who is participating in a study:
+----------------------------------------+------------------------+------------------+
| part | xsddate | datecount |
+----------------------------------------+------------------------+------------------+
| turbo:b6be95364ec943af2ef4ab161c11c855 | "1971-12-30"^^xsd:date | "3"^^xsd:integer |
| turbo:6e200ca0d5150282787464a2bda55814 | "2000-04-04"^^xsd:date | "2"^^xsd:integer |
+----------------------------------------+------------------------+------------------+
I think I'm getting close here. Now I need to get the counts and max counts on the same row!
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX turbo: <http://example.org/ontologies/>
SELECT ?part ?xsddate ?datecount ?countmax
WHERE
{ { SELECT ?part ?xsddate (COUNT(?xsddate) AS ?datecount)
WHERE
{ ?part rdf:type turbo:StudyPartWithBBDonation ;
turbo:hasBirthDateO ?dob .
?dob turbo:hasDateValue ?xsddate
}
GROUP BY ?part ?xsddate
}
UNION
{ SELECT ?part (MAX(?datecount) AS ?countmax)
WHERE
{ SELECT ?part ?xsddate (COUNT(?xsddate) AS ?datecount)
WHERE
{ ?part rdf:type turbo:StudyPartWithBBDonation ;
turbo:hasBirthDateO ?dob .
?dob turbo:hasDateValue ?xsddate
}
GROUP BY ?part ?xsddate
}
GROUP BY ?part
}
}
giving
+----------------------------------------+------------------------+------------------+------------------+
| part | xsddate | datecount | countmax |
+----------------------------------------+------------------------+------------------+------------------+
| turbo:6e200ca0d5150282787464a2bda55814 | "2000-04-05"^^xsd:date | "1"^^xsd:integer | |
| turbo:b6be95364ec943af2ef4ab161c11c855 | "1971-12-30"^^xsd:date | "3"^^xsd:integer | |
| turbo:6e200ca0d5150282787464a2bda55814 | "2000-04-04"^^xsd:date | "2"^^xsd:integer | |
| turbo:6e200ca0d5150282787464a2bda55814 | | | "2"^^xsd:integer |
| turbo:b6be95364ec943af2ef4ab161c11c855 | | | "3"^^xsd:integer |
+----------------------------------------+------------------------+------------------+------------------+

Essentially, you need just to replace UNION with . in your query (or you could just remove this UNION, as #AKSW has pointed out in the comment below).
In GraphDB, however, your will recieve an error:
Variable ?datecount is already used in a previous projection. Bindings
are not propagated through projections since Sesame 2.8, so this may
lead to logical errors in the query.
Thus, change your query in this way:
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX turbo: <http://example.org/ontologies/>
SELECT ?part ?xsddate ?datecount_ ?countmax
WHERE
{ { SELECT ?part ?xsddate (COUNT(?xsddate) AS ?datecount_)
WHERE
{ ?part rdf:type turbo:StudyPartWithBBDonation ;
turbo:hasBirthDateO ?dob .
?dob turbo:hasDateValue ?xsddate
}
GROUP BY ?part ?xsddate
}
.
{ SELECT ?part (MAX(?datecount) AS ?countmax)
WHERE
{ SELECT ?part ?xsddate (COUNT(?xsddate) AS ?datecount)
WHERE
{ ?part rdf:type turbo:StudyPartWithBBDonation ;
turbo:hasBirthDateO ?dob .
?dob turbo:hasDateValue ?xsddate
}
GROUP BY ?part ?xsddate
}
GROUP BY ?part
}
}
In Blazegraph, you could use named subqueries:
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX turbo: <http://example.org/ontologies/>
SELECT ?part ?xsddate ?datecount ?countmax
WITH
{ SELECT ?part ?xsddate (COUNT(?xsddate) AS ?datecount)
WHERE
{ ?part rdf:type turbo:StudyPartWithBBDonation ;
turbo:hasBirthDateO ?dob .
?dob turbo:hasDateValue ?xsddate
}
GROUP BY ?part ?xsddate
} AS %sub
WHERE
{ { SELECT ?part (MAX(?datecount) AS ?countmax)
WHERE { INCLUDE %sub } GROUP BY ?part
}
INCLUDE %sub
}

My elaboration on Stanislav's awesome answer
renamed the ?datecount in one of the {} patterns
added a filter
inserting the consensus DOB into a named graph within the triplestore
.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX turbo: <http://example.org/ontologies/>
INSERT {
GRAPH turbo:DOB_conclusions {
?part turbo:hasBirthDateO ?DOBconc .
?DOBconc turbo:hasDateValue ?xsddate .
?DOBconc turbo:conclusionated true .
?DOBconc rdf:type <http://www.ebi.ac.uk/efo/EFO_0004950> .
}
}
WHERE
{ { SELECT ?part ?xsddate (COUNT(?xsddate) AS ?datecount)
WHERE
{ ?part rdf:type turbo:StudyPartWithBBDonation ;
turbo:hasBirthDateO ?dob .
?dob turbo:hasDateValue ?xsddate
}
GROUP BY ?part ?xsddate
}
.
{ SELECT ?part (MAX(?datecount2) AS ?countmax)
WHERE
{ SELECT ?part ?xsddate (COUNT(?xsddate) AS ?datecount2)
WHERE
{ ?part rdf:type turbo:StudyPartWithBBDonation ;
turbo:hasBirthDateO ?dob .
?dob turbo:hasDateValue ?xsddate
}
GROUP BY ?part ?xsddate
}
GROUP BY ?part
}
FILTER ( ?datecount = ?countmax )
BIND(uri(concat("http://transformunify.org/ontologies/", struuid())) AS ?DOBconc)
}

Related

trying to display data using SPARQL

so I am trying to display top 10 richest tennis players. I want to make a query that lists tennis players according to their name ,net worth ,and nationality.
SELECT DISTINCT ?item ?itemLabel ?networths ?nationality WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". }
{
SELECT DISTINCT ?item (COUNT(?statement2) AS ?networths) (COUNT(?statement1) AS ?nationality) WHERE {
?item p:P106 ?statement0.
?statement0 (ps:P106/(wdt:P279*)) wd:Q10833314.
?item p:P27 ?statement1.
?statement1 (ps:P27/(wdt:P279*)) _:anyValueP27.
?item p:P2218 ?statement2.
?statement2 (psv:P2218/wikibase:quantityAmount) ?numericQuantity.
}
GROUP BY ?item ORDER BY DESC (?networths)
}
}
ORDER BY DESC (?networths)

SPARQL degrees of separation

Using SPARQL, how can I find entities linked to an entity by x degrees of separation, for example 4 or 5 degrees? For example, with test data like this, how do I find which of my relatives connects to which one? The aim is to form a graph from the triples. (Using only “raised” as predicates in this example, but any connection should be considered.)
<http://www.example.com/great-grandma> <:raised1> <http://www.example.com/grandma> .
<http://www.example.com/grandma> <:raised2> <http://www.example.com/ma> .
<http://www.example.com/ma> <:raised3> <http://www.example.com/me> .
<http://www.example.com/ma> <:raised4> <http://www.example.com/sis> .
<http://www.example.com/me> <:raised5> <http://www.example.com/kid> .
<http://www.example.com/spouse> <:raised6> <http://www.example.com/kid> .
So from the above, the wanted first-degree relationships would be:
<http://www.example.com/ma> <:raised3> <http://www.example.com/me> .
<http://www.example.com/me> <:raised5> <http://www.example.com/kid> .
With the whole s-p-o triple as the result.
I can get to two degrees relatively easily:
SELECT *
WHERE
{ { SELECT DISTINCT ?s ?p ?o
WHERE
{ ?s ?p1 <http://www.example.com/me> .
?s ?p ?o
}
}
UNION
{ SELECT DISTINCT ?s ?p ?o
WHERE
{ <http://www.example.com/me> ?p1 ?o .
?s ?p ?o
}
}
UNION
{ SELECT DISTINCT ?s ?p ?o
WHERE
{ ?o ?p1 <http://www.example.com/me> .
?s ?p ?o
}
}
UNION
{ SELECT DISTINCT ?s ?p ?o
WHERE
{ <http://www.example.com/me> ?p1 ?s .
?s ?p ?o
}
}
}
This finds my grandmother and sister, for example. But this does not seem practical with more degrees of separation, as the number of sub-queries would double with each new degree. Is there a better way to do this? It would need to work with larger amounts of data too, so can't just run a new query on every linked entity.
EDIT: Highlighting that different predicates should be expected, I clarified and changed the example a bit.

Property paths are your friend.
SELECT DISTINCT ?rel {
{
SELECT ?rel {
:me (:raised|^:raised) ?rel
}
}
UNION {
SELECT ?rel {
:me (:raised|^:raised)/(:raised|^:raised) ?rel
}
}
UNION {
SELECT ?rel {
:me (:raised|^:raised)/(:raised|^:raised)/(:raised|^:raised) ?rel
}
}
}
and so on...

Can somebody tell me how to list all the users from the ontology in eclipse?

public class Main {
public static void main(String args[]){
//read an ontology
String filename = "IOTOntology.owl";
Model model = ModelFactory.createDefaultModel();
OntModel model1 = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM);
//over
// Waits for the device ID
System.out.print("Enter name of man: ");
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
String devID = null;
try {
devID = br.readLine();
} catch (IOException ioe) {
System.out.println("IO error trying to read device ID!");
System.exit(1);
}
try {
File file=new File(filename);
FileInputStream reader=new FileInputStream(file);
System.out.println("The absolute path of the file is:"+ file.getAbsolutePath());
model.read(reader, "RDF/XML");
// Create a SPARQL query from the given string.
String queryString = "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "+
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "+
"PREFIX owl: <http://www.w3.org/2002/07/owl#>" +
"PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" +
//"PREFIX act: <http://www.semanticweb.org/project/ontologies/2016/0/Actor#>" +
"select ?name "+
"where { "+
" ?User a rdf:name"+
"} \n ";
Query query = QueryFactory.create(queryString);
try ( // Execute the query and obtain results
QueryExecution qe = QueryExecutionFactory.create(query, model)) {
ResultSet results = qe.execSelect();
// Output query results
ResultSetFormatter.out(System.out, results, query);
qe.close();
}
model.close();
} catch(Exception e) {
System.out.println(e.getMessage());
}
}
}
I have some problem because my table is empty, it writes in command line only this:
--------
| name |
========
--------
and this is the link to my ontology:
https://github.com/isidoramalkata/IOTOntology.git

I think you need to read some more about RDF and SPARQL first...
Your query is
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX act: <http://www.semanticweb.org/project/ontologies/2016/0/Actor#>
select ?name where {
?User a rdf:name
}
That doesn't make any sense:
You select a variable ?name that doesn't occur in the query
you're using a as predicate which is a shortcut for rdf:type property, which in fact assigns resources to classes, thus, you would aak for resources that are of a type rdf:name
which moreover doesn't exist in your ontology
Have a look at the triple pattern:
?User a rdf:name
Your data is
<owl:NamedIndividual rdf:about="file://SHOnt.owl#UserIsidora">
<rdf:type rdf:resource="file://SHOnt.owl#User"/>
<SHOnt:phoneNumber rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0605222024</SHOnt:phoneNumber>
<SHOnt:name rdf:datatype="http://www.w3.org/2001/XMLSchema#string">Isidora</SHOnt:name>
<SHOnt:email rdf:datatype="http://www.w3.org/2001/XMLSchema#string">isibojovic#gmail.com</SHOnt:email>
</owl:NamedIndividual>
There is no triple in your data that matches this pattern.
RDF triple means
subject predicate object
i.e.
#prefix shont: <file://SHOnt.owl#> .
#prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
shont:UserIsidora shont:name "Isidora"^^xsd:string .
This is Turtle syntax which is what SPARQL is based on, thus, it's always good to have a look at your data in this syntax.
SPARQL query:
prefix shont: <file://SHOnt.owl#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
select ?name where {
?User shont:name ?name
}
As I said, you really should read an RDF tutorial first.

SPARQL Jena iterative to get results about more subjects

I'm trying to get information about some concepts on DBpedia. I found out how to get 1, but for more it fails. I've been told filtering should help, but the processing time is too long and i get timeouts.
The thing i can't do is VALUES ?s { dbpedia:Facebook dbpedia:Google }
So I've looked for an alternative way, but it still isn;t working. Here's where I'm now:
public static String concepts[] = { "Facebook", "Google" };
public static String getQuery(String concept) {
return "prefix dbpediaowl: <http://dbpedia.org/ontology/>"
+ " prefix dbpedia: <http://dbpedia.org/resource/>"
+ " prefix owl: <http://www.w3.org/2002/07/owl#>"
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " PREFIX dbpprop: <http://dbpedia.org/property/>"
+ " prefix dbpedia-owl: <http://dbpedia.org/ontology/>"
+ " select ?s ?p ?o where { "
+ " values ?web { dbpedia:"
+ "Facebook"
+ " } "
+ " { ?web ?p ?o bind( ?web as ?s ) } " + " union "
+ " { ?s ?p ?web bind( ?web as ?o ) } " + " filter( ?p in ( "
+ "dbpprop:available, " + "dbpprop:company, "
+ "dbpprop:inventor, " + "dbpedia-owl:foundedBy, "
+ "dbpedia-owl:subsidiary, " + "dbpprop:foundation, "
+ "dbpprop:founder, " + "dbpprop:industry, "
+ "dbpprop:programmingLanguage, " + "dbpedia-owl:successor )) ";
}
public static void main(String[] args) {
OutputStream os;
PrintStream printStream;
try {
os = new FileOutputStream("C:/Users/alex/Desktop/data.txt");
printStream = new PrintStream(os);
printStream.println("am scris");
for (int i = 0; i < concepts.length; i++) {
printStream.println(i+ " concept");
Query query = QueryFactory.create(getQuery(concepts[i]));
QueryExecution qExe = QueryExecutionFactory.sparqlService(
"http://lod.openlinksw.com/sparql", query);
ResultSet results = qExe.execSelect();
while (results.hasNext()) {
printStream.println(results.nextSolution().toString());
}
}
printStream.close();
os.close();
} catch (Exception e) {
e.printStackTrace();
System.out.println(e.getCause());
}
}
For one concept at a time, in the SPARQL it works fine, but I need to call about 50 of them. So I need to know how i can do it programatically. Also, it would be great if you could help me select the predicates as well ( without the filtering ) because I also need to allow about 30-40 of them.
Thanks a lot! Hope you can help.

I thought we'd solved the problem with VALUES ?s { dbpedia:Facebook dbpedia:Google } in your previous question, Sparql about dbpedia:World_Wide_Web. Instead of doing values ?s { dbpedia:Facebook dbpedia:Google }, you can use filter( ?s in (dbpedia:Facebook, dbpedia:Google) }. This works just fine for subjects and properties. E.g., if you wanted to get the English abstracts and labels for Google and Facebook, you could use a query like this:
select ?s ?p ?o where {
?s ?p ?o
filter( ?s in (dbpedia:Google, dbpedia:Facebook) )
filter( ?p in (rdfs:label, dbpedia-owl:abstract) )
filter( !isLiteral(?o) || langMatches(lang(?o),"en") )
}
SPARQL results
Now, if you can get rid of that union (which you might be able to do, if you're really just looking for the values of certain properties from certain subjects), then you actually can use the values blocks in the way that you'd like to. (Really, I think the problematic behavior seen in your other question is due to a DBpedia bug; I think you should be able to use values and union together.) That is, to select non-literals, and literals with an English language tag for some specified properties, you can do:
select ?s ?p ?o where {
values ?s { dbpedia:Google dbpedia:Facebook }
values ?p { rdfs:label dbpedia-owl:abstract }
?s ?p ?o
filter( !isLiteral(?o) || langMatches(lang(?o),"en") )
}
SPARQL results

Generate SPARQL query for DBpedia giving distinct result

I am using SPARQL for querying the DBpedia database. My first query was the following:
select ?s1 ( bif:search_excerpt ( bif:vector ( 'TENDULKAR', 'SACHIN' ) , ?o1 ) ) where
{
{
{
select ?s1, ( ?sc * 3e-1 ) as ?sc, ?o1, ( sql:rnk_scale ( <LONG::IRI_RANK> ( ?s1 ) ) ) as ? rank, ?g where
{
quad map virtrdf:DefaultQuadMap
{
graph ?g
{
?s1 ?s1textp ?o1 .
?o1 bif:contains ' ( TENDULKAR AND SACHIN ) ' option ( score ?sc ) .
}
}
}
order by desc ( ?sc * 3e-1 + sql:rnk_scale ( <LONG::IRI_RANK> ( ?s1 ) ) ) limit 20 offset 0
}
}
}
Now even if I apply the DISTINCT keyword to the query it gives the same result as before.
I want distinct results from this query.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

select value that occurs most frequently by group - sparql

Related

trying to display data using SPARQL

SPARQL degrees of separation

Can somebody tell me how to list all the users from the ontology in eclipse?

SPARQL Jena iterative to get results about more subjects

Generate SPARQL query for DBpedia giving distinct result

Categories

Resources