Sparql - Count of multiple categories of connected nodes - sparql

In my graph, a node of category 'File' 'Owns' many nodes of varying categories. I'm trying to get a list of Files, including a count of connected nodes of specific categories.
My current best query looks like this:
SELECT ?uuid ?fileName ?tableCount ?toCount
WHERE {
?uuid 'Category' 'File' .
?uuid 'Name' ?fileName .
{
SELECT (COUNT(*) as ?tableCount)
WHERE
{
VALUES (?category ) { ('BaseTable') }
?uuid 'Owns' ?elemUUID .
?elemUUID 'Category' ?category .
}
}
{
SELECT (COUNT(*) as ?toCount)
WHERE
{
VALUES (?category ) { ('TableOccurrence') }
?uuid 'Owns' ?elemUUID .
?elemUUID 'Category' ?category .
}
}
}
The output is a distinct list of files, but the counts are the count for that category across ALL files (i.e. every file shares the same value for ?toCount, and every file shares the same value for ?tableCount).
There's obviously something I'm not understanding about the way subqueries work. Any assistance would be greatly appreciated.

Variables in inner queries are not scoped outside, unless they appear in the selection.
What you should do is a query like this instead (forgive my editing, but this will make it much easier to follow for other users too):
SELECT ?uuid ?fileName ?tableCount ?toCount
WHERE {
?uuid :category 'File' .
?uuid :name ?fileName .
{
SELECT ?uuid (COUNT(*) as ?tableCount)
WHERE
{
VALUES (?category ) { ('BaseTable') }
?uuid :owns ?elemUUID .
?elemUUID :category ?category .
}
GROUP BY ?uuid #This is the missing link
}
{
SELECT ?uuid (COUNT(*) as ?toCount)
WHERE
{
VALUES (?category ) { ('TableOccurrence') }
?uuid :owns ?elemUUID .
?elemUUID :category ?category .
}
GROUP BY ?uuid #and here again
}
}

Related

combining SPARQL queries

I want to fetch the population of the most and least populated state. I know how to use ORDER BY (ASC and DESC). How can I combine these two (ASC and DESC) in a single query?
SELECT ?population
{
?state rdf:type :State
?state :hasPopulation ?population.
} ORDER BY DESC(?population) LIMIT 1
AND
SELECT ?population
{
?state rdf:type :State
?state :hasPopulation ?population.
} ORDER BY ASC(?population) LIMIT 1
SELECT ?population_max ?population_min {
?state_max rdf:type :State .
?state_max :hasPopulation ?population_max .
?state_min rdf:type :State .
?state_min :hasPopulation ?population_min .
} ORDER BY DESC(?population_max) ASC(?population_min) LIMIT 1
Perhaps more efficient:
SELECT * {
{
SELECT (?population AS ?population_max) {
?state rdf:type :State .
?state :hasPopulation ?population .
} ORDER BY DESC(?population) LIMIT 1
}
{
SELECT (?population AS ?population_min) {
?state rdf:type :State .
?state :hasPopulation ?population .
} ORDER BY ASC(?population) LIMIT 1
}
}
With AnzoGraph or Blazegraph, one could use named subqueries in such a case:
SELECT *
WITH {
SELECT ?pop { [] a :State ; :hasPopulation ?pop }
} AS %unsorted
WHERE {
{ SELECT (?pop AS ?max) { INCLUDE %unsorted } ORDER BY DESC(?pop) LIMIT 1 }
{ SELECT (?pop AS ?min) { INCLUDE %unsorted } ORDER BY ASC(?pop) LIMIT 1 }
}
Finally, rather rewriting than combining:
SELECT (MAX(?population) AS ?population_max) (MIN(?population) AS ?population_min) {
?state rdf:type :State .
?state :hasPopulation ?population
}
If aggregates are used... but the GROUP BY term is not used, then this is taken to be a single implicit group, to which all solutions belong.
You could perhaps use UNION to combine 'max' and 'min' population. Here is AnzoGraph syntax:
WITH (
SELECT ?pop
WHERE {
?state a :State ;
:hasPopulation ?pop
}
as <unsorted_pop>
)
SELECT *
WHERE {
{ SELECT (max(?pop) as ?population)
WHERE { QUERY <unsorted_pop> }
}
UNION
{ SELECT (min(?pop) as ?population)
WHERE { QUERY <unsorted_pop> }
}
}

Blank node skolemization in SPARQL without iteration

Is it possible to implement blank node skolemization in SPARQL without iteration? It seems to me that iteration is required to skolemize chains of blank nodes, such as:
#prefix : <http://example.com/> .
[ a :A ;
:p1 [
a :B
]
] .
A SPARQL Update operation for skolemization can start from the blank nodes that appear as subjects only in triples without blank node objects:
DELETE {
?b1 ?outP ?outO .
?inS ?inP ?b1 .
}
INSERT {
?iri ?outP ?outO .
?inS ?inP ?iri .
}
WHERE {
{
SELECT ?b1 (uuid() AS ?iri)
WHERE {
{
SELECT DISTINCT ?b1
WHERE {
?b1 ?p1 [] .
FILTER isBlank(?b1)
FILTER NOT EXISTS {
?b1 ?p2 ?b2 .
FILTER isBlank(?b2)
}
}
}
}
}
?b1 ?outP ?outO .
OPTIONAL {
?inS ?inP ?b1 .
}
}
This operation can be repeated until no blank nodes are found in the data:
ASK {
?bnode ?p [] .
FILTER isBlank(?bnode)
}
Is it possible to avoid the iteration and implement the blank node skolemization in a single SPARQL Update operation?
(Also, this approach assumes there are no "orphan" blank nodes (i.e. blank nodes that appear only as objects).)
I found a two-step solution skolemising subjects and objects separately and storing the blank node aliases (links between blank nodes and IRIs via owl:sameAs) as intermediate data:
PREFIX : <http://example.com/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
####################
# Rewrite subjects #
####################
DELETE {
?bnode ?p ?o .
}
INSERT {
?iri ?p ?o .
GRAPH :aliases {
?bnode owl:sameAs ?iri .
}
}
WHERE {
{
SELECT ?bnode (uuid() AS ?iri)
WHERE {
{
SELECT DISTINCT ?bnode
WHERE {
?bnode ?p [] .
FILTER isBlank(?bnode)
}
}
}
}
?bnode ?p ?o .
}
;
###################
# Rewrite objects #
###################
DELETE {
?s ?p ?bnode .
}
INSERT {
?s ?p ?iri .
}
WHERE {
{
SELECT ?bnode ?iri
WHERE {
{
SELECT DISTINCT ?bnode
WHERE {
[] ?p ?bnode .
FILTER isBlank(?bnode)
}
}
OPTIONAL {
GRAPH :aliases {
?bnode owl:sameAs ?_iri .
}
}
BIND (coalesce(?_iri, uuid()) AS ?iri)
}
}
?s ?p ?bnode .
}
;
############################
# Clear blank node aliases #
############################
CLEAR GRAPH :aliases

Blazegraph full text search with total count

In SPARQL I can perform the following query in order to retrieve the fields and the total count in one query result:
SELECT ?total ?s
WHERE
{
{ SELECT (COUNT(?s) AS ?total)
WHERE
{ ?s <https://some/predicate/for/var1> ?var1 ;
<https://some/predicate/for/var1> ?var2
FILTER ( ( ?var1 = "something" ) && ( ?var2 = "something2" ) )
}
}
{ SELECT ?s
WHERE
{ ?s <https://some/predicate/for/var1> ?var1 ;
<https://some/predicate/for/var1> ?var2
FILTER ( ( ?var1 = "something" ) && ( ?var2 = "something2" ) )
}
LIMIT 3
}
}
Which returns something like this (s fields matched the specific predicate and filter I provided on the query):
+-------+----------------------------------------+
| total | s |
+-------+----------------------------------------+
| 150 | http://the/path/to/the/subject |
| 150 | http://the/path/to/another/subject |
| 150 | http://the/path/to/yet/another/subject |
+-------+----------------------------------------+
I want to do the same for full text search queries, which can be used as follows ?s bds:search “something” .
However, composing a query with the same structure as the previous one, does not work:
PREFIX bds: <http://www.bigdata.com/rdf/search#>
SELECT ?total ?s ?org
WHERE
{
{ SELECT (COUNT(?s) AS ?total)
WHERE{
?matchedValue
bds:search "something" ;
bds:relevance ?score ;
bds:rank ?rank .
?s ?matchedProperty ?matchedValue
FILTER ( ! isBlank(?s) )
}
}
{ SELECT ?s ?matchedProperty ?score ?rank
WHERE{
?matchedValue
bds:search "something" ;
bds:relevance ?score ;
bds:rank ?rank .
?s ?matchedProperty ?matchedValue
FILTER ( ! isBlank(?s) )
}
LIMIT 10
}
}
Even though, those subqueries return the correct result separately.
As correctly mentioned by #stanislav-kralin and coming from the issue in Jira, you have to explicitly use SERVICE clause:
PREFIX bds: <http://www.bigdata.com/rdf/search#>
SELECT ?total ?s ?org
WHERE
{
{ SELECT (COUNT(?s) AS ?total)
WHERE {
SERVICE <http://www.bigdata.com/rdf/search#search> {
?matchedValue
bds:search "something" ;
bds:relevance ?score ;
bds:rank ?rank .
}
?s ?matchedProperty ?matchedValue
FILTER ( ! isBlank(?s) )
}
}
{ SELECT ?s ?matchedProperty ?score ?rank
WHERE {
SERVICE <http://www.bigdata.com/rdf/search#search> {
?matchedValue
bds:search "something" ;
bds:relevance ?score ;
bds:rank ?rank .
}
?s ?matchedProperty ?matchedValue
FILTER ( ! isBlank(?s) )
}
LIMIT 10
}
}

Using SPARQL "from" with both a graph and a graph group in Virtuoso 7.20.3217

I have a graph group <group> of m graphs <group_1>...<group_m> with n total triples. When I do a count together with a graph <graph> with k total triples outside of the graph group, I only get the number of triples n in the graph group:
select count(*)
from <group>
from <graph>
{?s ?p ?o}
Result: n
When I list the graphs in the graph group explicitly, however, I get the correct result:
select count(*)
from <group_1>
from <group_2>
...
from <group_m>
from <graph>
{?s ?p ?o}
Result: n + k
How can I obtain the correct result with the graph group and what is the reason for this behaviour?
You should use either two subqueries like this for example:
select ?n ?k (?n + ?k as ?totalCount) where {
{ select (count(*) as ?n) where {
graph group: { ?s ?p ?o } }
{ select (count(*) as ?k) where {
graph graph: { ?s ?p ?o } }
}
Or use a union:
select (count(?s1) as ?n)
(count(?s2) as ?k)
(?n + ?k as ?totalCount)
where {
{ graph group: { ?s1 ?p1 ?o1 } }
union
{ graph graph: { ?s2 ?p2 ?o2 } }
}

SPARQL DBpedia by taxonomic term

Have the following working SPARQL query that selects items from DBpedia that include the string "fish" in their name.
SELECT ?name, ?kingdom, ?phylum, ?class, ?order, ?family, ?genus, ?species, ?subspecies, ?img, ?abstract
WHERE {
?s dbpedia2:regnum ?hasValue;
rdfs:label ?name
FILTER regex( ?name, "fish", "i" )
FILTER ( langMatches( lang( ?name ), "EN" ))
?animal dbpedia2:name ?name;
foaf:depiction ?img;
dbpedia2:regnum ?kingdom
OPTIONAL { ?animal dbpedia2:ordo ?order . }
OPTIONAL { ?animal dbpedia2:phylum ?phylum . }
OPTIONAL { ?animal dbpedia2:classis ?class . }
OPTIONAL { ?animal dbpedia2:familia ?family . }
OPTIONAL { ?animal dbpedia2:genus ?genus . }
OPTIONAL { ?animal dbpedia2:species ?species . }
OPTIONAL { ?animal dbpedia2:subspecies ?subspecies . }
OPTIONAL {
FILTER ( langMatches( lang( ?abstract ), "EN" ))
}
}
GROUP BY ?name
LIMIT 500
Here is the result on SNORQL.
This approach finds animals with the word "fish" in their name (example: "starfish" which is not a fish but member of the phylum Echinoderm).
Would like a more precise query that selects DBpedia items by phylum, or by class, or by order, etc.
How to change the query to search only on dbpedia2:phylum (Chordata); on dbpedia2:classis (Actinopterygii); on dbpedia2:familia; etc. ?
Looking at Tuna, I see that there is a rdf:type assertion for the class
http://umbel.org/umbel/rc/Fish
that looks useful. E.g.,
select ?fish { ?fish a <http://umbel.org/umbel/rc/Fish> }
SPARQL results (10,000)
There's also the dbpedia-owl:Fish class, which gets more results:
select (count(*) as ?nFish) where {
?fish a dbpedia-owl:Fish .
}
SPARQL results (17,420)
While Wikipedia has lots of scientific classification information, I don't see much of it reflected in DBpedia. E.g,. while the Wikipedia article for Tuna has kingdom, phylum, class, order, etc., I don't see that data in the corresponding DBpedia resource.
Notes
Note that your query, as written, isn't actually legal SPARQL (even if Virtuoso, the SPARQL endpoint that DBpedia uses, accepts it). You can't have commas between the projection variables. Also, once you group by one variable, the non-group variables can't appear in the variable list. You could sample the other values though. E.g., you should end up with something like:
SELECT
?name
(sample(?kingdom) as ?kingdom_)
(sample(?phylum) as ?phylum_)
#-- ...
(sample(?img) as ?img_)
(sample(?abstract) as ?abstract_)
WHERE {
?s dbpedia2:regnum ?hasValue;
rdfs:label ?name
FILTER regex( ?name, "fish", "i" )
FILTER ( langMatches( lang( ?name ), "EN" ))
?animal dbpedia2:name ?name;
foaf:depiction ?img;
dbpedia2:regnum ?kingdom
OPTIONAL { ?animal dbpedia2:ordo ?order . }
OPTIONAL { ?animal dbpedia2:phylum ?phylum . }
OPTIONAL { ?animal dbpedia2:classis ?class . }
OPTIONAL { ?animal dbpedia2:familia ?family . }
OPTIONAL { ?animal dbpedia2:genus ?genus . }
OPTIONAL { ?animal dbpedia2:species ?species . }
OPTIONAL { ?animal dbpedia2:subspecies ?subspecies . }
OPTIONAL {
FILTER ( langMatches( lang( ?abstract ), "EN" ))
}
}
GROUP BY ?name
LIMIT 500