The Linked Data Benchmark Council (LDBC) is an effort to establish benchmarking practices for evaluating graph data management systems. The LDBC Social Network Benchmark (SNB) is a result of this effort. It models a social network graph and introduces different workloads on this common graph. The Interactive Workload specifies a set of read-only traversals that touch a small portion of the graph and is further divided into interactive short (IS) and interactive complex (IC) queries.
In this post, I will focus on writing some of the interactive complex (IC) queries that highlight the different techniques available in DQL.
Schema
Schema in DQL
We add a few reverse edges and indexing to optimize query perfomance.
type tag{
fqid
id
name
url
hasType
}
type tagclass{
fqid
id
name
url
isSubclassOf
}
type comment{
fqid
locationIP
browserUsed
content
length
id
creationDate
hasTag
hasCreator
isLocatedIn
replyOf
}
type forum{
fqid
id
title
creationDate
containerOf
hasMember
hasTag
hasModerator
}
type post{
fqid
locationIP
browserUsed
language
content
length
hasTag
id
hasCreator
isLocatedIn
imageFile
}
type person{
fqid
id
firstName
lastName
gender
birthday
creationDate
browserUsed
containerOf
locationIP
hasInterest
isLocatedIn
likes
workAt
email
language
studyAt
knows
}
type organization{
fqid
id
type
name
url
isLocatedIn
}
type place{
fqid
id
type
name
url
isPartOf
}
fqid : string @index(hash) .
id : int .
name : string @index(hash) .
url : string .
hasType : uid @reverse .
locationIP : string .
browserUsed : string @index(exact) .
content : string .
length : int .
creationDate : dateTime @index(day) .
hasTag : [uid] @reverse .
hasCreator : [uid] @reverse .
isLocatedIn : [uid] .
replyOf : uid @reverse .
title : string @index(exact) .
containerOf : [uid] @reverse .
hasMember : [uid] @reverse .
hasModerator : [uid] @reverse .
language : string .
firstName : string @index(hash) .
lastName : string @index(hash) .
gender : string .
birthday : dateTime @index(year) .
hasInterest : [uid] .
likes : [uid] @reverse .
workAt : [uid] .
studyAt : [uid] .
knows : [uid] .
email : string @index(exact) .
language : string @lang .
type : string .
imageFile : string .
isPartOf : [uid] .
isSubclassOf : [uid] @reverse .
IC-1
Given a start Person, find Persons with a given first name (firstName) that the start Person is con-nected to (excluding startPerson) by at most 3 steps via the knows relationships. Return Persons, including the distance (1…3), summaries of the Persons workplaces and places of study.
DQL query
query all($ID: string, $name: string){
pid as var(func: eq(fqid, $ID))
nid as var(func: eq(firstName, $name))
# fi --> friend at ith level
var(func: uid(pid)){
f1 as knows{
dist1 as math(1)
f2 as knows @filter(NOT uid(pid)){
dist2 as math(2)
f3 as knows @filter(NOT uid(pid)){
dist3 as math(3)
}
}
}
}
ppl as var(func: uid(nid)) @filter(uid(f1, f2, f3))
q(func: uid(ppl), orderasc: lastName, orderasc: fqid, first: 20) {
distance1: val(dist1)
distance2: val(dist2)
distance3: val(dist3)
firstName
lastName
birthday
isLocatedIn{
name
}
studyAt{
name
isLocatedIn{
name
}
}
}
}
Comments:
- The trick to capturing the number of hops while querying is to create dummy predicates (
math(1)) when traversing overknowsedges. This works because we know at which level we are in the query block. - To filter out nodes, it is recommended to capture the
uidin a separate block and use that inside a filter, instead of writing the filter again. This saves processing the filter and identifying the correctuidevery time.
IC-2
Given a start Person (person), find the most recent Messages from all of that Person’s friends (friendnodes). Only consider Messages created before the given maxDate (excluding that day).
DQL Query
query all($ID: string, $date: string){
var(func: eq(fqid, $ID)){
knows{
msgs as ~hasCreator @filter(le(creationDate, $date))
}
}
q(func: uid(msgs), orderdesc: creationDate, orderasc: fqid, first:20){
fqid
content
creationDate
hasCreator{
fqid
firstName
lastName
}
}
}
Comments
- Variables in the query blocks can store filtered nodes.
IC-4
Given a start Person (personId), find Tags that are attached to posts that were created by that Person’s friends. Only include Tags that were attached to friends’ Posts created within a given time interval, and that were never attached to friends’ Posts created before this interval.
DQL Query
query all($ID: string, $startDate: string, $endDate: string){
var(func: eq(fqid, $ID)){
v1 as knows
}
var(func: uid(v1)){
~hasCreator @filter(le(creationDate, $startDate)){
oldTag as hasTag
}
}
var(func: uid(v1)){
~hasCreator @filter(between(creationDate, $startDate, $endDate) ){
id
newTag as hasTag{
name
}
}
}
var(func: uid(newTag)) @filter(NOT uid_in(hasTag, uid(oldTag))){
postCount as count(~hasTag) @filter(ge(creationDate, $startDate) AND le(creationDate, $endDate))
}
q(func: uid(newTag), orderdesc: val(postCount), first: 10){
name
id
dgraph.type
val(postCount)
}
}
IC-5
Given a startPerson, find the Forums which that Person’s friends and friends of friends (excluding startPerson) became Members of after a given date. For each Forum find the number of Posts that were created by any of these Persons. For each Forum and consider only those Persons which joined that particular Forum after the given date (minDate)
DQL Query
query all($ID: string, $minDate: string){
pid as var(func: eq(fqid, $ID))
var(func: uid(pid)){
f1 as knows{
f2 as knows @filter(NOT uid(pid)){
}
}
}
# All friends of friends.
f as var(func: uid(f1,f2))
relevantMembers as var(func: uid(f) ) @cascade {
forums as ~hasMember @facets(ge(joinDate, $minDate))
}
relevantForums as var(func: uid(forums)) @cascade{
containerOf @filter(uid_in(hasCreator, uid(relevantMembers)))
}
var(func: uid(relevantForums)){
postCount as count(containerOf) @filter(uid_in(hasCreator, uid(relevantMembers)))
}
q( func: uid(relevantForums), orderdesc: val(postCount) ){
fqid
val(postCount)
}
}
IC-6
Given a startPerson and some Tag, find the other Tags that occur together with this Tag on Posts that were created by startPerson’s friends and friends of friends (excluding startPerson). Return top 10 Tags, and the count of Posts that were created by these Persons, which contain both this Tag and the given Tag.
DQL Query
query all($ID: string, $tagName: string){
pid as var(func: eq(fqid, $ID))
var(func: uid(pid)){
f1 as knows{
f2 as knows @filter(NOT uid(pid))
}
}
requiredTag as q1(func: eq(fqid, $tagName))
# Collect posts by friends and the tags of those posts.
# Restrict posts to which have the requiredTag
var(func: uid(f1,f2)){
posts as ~hasCreator @filter(uid_in(hasTag, uid(requiredTag))){
otherTag as hasTag
}
}
# Calculate the post count for the tags - only include posts which have the requried tag
# Eliminate the original tag from this tag list
tags as var(func: uid(otherTag)) @filter(uid_in(~hasTag, uid(posts)) AND NOT uid(requiredTag)){
postCount as count(~hasTag) @filter(uid(posts))
}
q(func: uid(tags), orderdesc: val(postCount), first:10){
fqid
postCount: val(postCount)
}
}
IC-8
Given a startPerson, find the most recent Comments that are replies to Messages of the startPerson.Only consider direct (single-hop) replies, not the transitive (multi-hop) ones. Return the reply Comments, and the Person that created each reply Comment.
DQL Query
query all($ID: string){
var(func: eq(fqid, $ID))@cascade{
~hasCreator{
replies as ~replyOf{
date as creationDate
}
}
}
q(func: uid(replies), orderdesc: val(date), first:20 ){
id
content
creationDate
hasCreator{
id
firstName
lastName
}
}
}
IC-9
Given a startPerson, find the most recent Messages created by that Person’s friends or friends of friends (excluding startPerson). Only consider Messages created before the given maxDate (excluding that day).
DQL Query
query all($ID: string, $maxDate: string){
pid as var(func: eq(fqid, $ID))
var(func: uid(pid)){
f1 as knows{
f2 as knows @filter(NOT uid(pid))
}
}
q(func: uid(f1,f2), orderasc: id, first:20)@cascade{
id
firstName
lastName
~hasCreator @filter(le(creationDate, $maxDate )) (orderdesc: creationDate){
id
content
creationDate
}
}
}
IC-11
Given a startPerson, find that Person’s friends and friends of friends (excluding startPerson) who started working in some Company in a given Country, before a given date (year).
DQL Query
query all($ID: string, $name: string, $workFrom: string){
person as var(func: eq(fqid, $ID)){
v1 as knows{
v2 as knows
}
}
country as var(func: eq(fqid, $name))
organisation as var(func: type(organisation)) @filter(uid_in(isLocatedIn, uid(country)))
relevantFriends as var(func: uid(v1, v2))@filter(NOT uid(person) AND uid_in(workAt, uid(organisation)))
q(func: uid(relevantFriends), orderasc: fqid, first:10) @cascade{
id
firstName
workAt @facets(le(workFrom, $workFrom)){
id
name
}
}
}
IC-12
Given a startPerson, find the Comments that this Person’s friends made in reply to Posts, considering only those Comments that are direct (single-hop) replies to Posts, not the transitive (multi-hop) ones. Only consider Posts with a Tag in a given TagClass or in a descendent of that TagClass. Count the number of these reply Comments, and collect the Tags that were attached to the Posts they replied to, but only collect Tags with the given TagClass or with a descendant of that TagClass. Return Persons with at least one reply, the reply count, and the collection of Tags.
DQL Query
query all($ID: string, $tagClass: string){
mainTagClass as var(func: eq(fqid, $tagClass)) @recurse{
subClasses as ~isSubclassOf
}
var(func: uid(mainTagClass, subClasses)){
tags as ~hasType
}
var(func: eq(fqid, $ID)){
friends as knows{
}
}
relevantPosts as var(func: type(post)) @filter(uid_in(hasTag, uid(tags)))
var(func: uid(relevantPosts)){
~replyOf @filter(uid_in(hasCreator, uid(friends))){
relevantFriends as hasCreator
}
}
var(func: uid(relevantFriends)){
replyCount as count(~hasCreator) @filter(uid_in(replyOf, uid(relevantPosts) ))
}
q(func: uid(relevantFriends), orderdesc: val(replyCount), first:20){
firstName
lastName
replyCount : val(replyCount)
}
}
IC-13
Given two Persons, find the shortest path between these two Persons in the subgraph induced by the knows relationships.
DQL Query
query all($ID1: string, $ID2: string){
A as var(func: eq(fqid, $ID1))
M as var(func: eq(fqid, $ID2))
path as shortest(from: uid(A), to: uid(M)) {
knows
}
path(func: uid(path)) {
firstName
}
}
