Examples from the lectures: Difference between revisions

From info216
 
(23 intermediate revisions by the same user not shown)
Line 1: Line 1:
This page contains code examples from the lectures.


==S02==
''This page currently shows the examples used in the Spring of 2023.
It will be updated with examples from 2024 as the course progresses.''


===Getting started with RDFlib===
==Lecture 1: Introduction to KGs==
Turtle example:
<syntaxhighlight>
<syntaxhighlight>
from inspect import formatargspec
@prefix ex: <http://example.org/> .
from rdflib import Graph, Literal, RDF, FOAF, Namespace
ex:Roger_Stone
    ex:name "Roger Stone" ;
    ex:occupation ex:lobbyist ;
    ex:significant_person ex:Donald_Trump .
ex:Donald_Trump
    ex:name "Donald Trump" .
</syntaxhighlight>
 
==Lecture 2: RDF==
Blank nodes for anonymity, or when we have not decided on a URI:
<syntaxhighlight lang="Python">
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
 
EX = Namespace('http://example.org/')


g = Graph()
g = Graph()
g.bind('ex', EX)  # this is why the line '@prefix ex: <http://example.org/> .'
                  # and the 'ex.' prefix are used when we print out Turtle later
robertMueller = BNode()
g.add((robertMueller, RDF.type, EX.Human))
g.add((robertMueller, FOAF.name, Literal('Robert Mueller', lang='en')))
g.add((robertMueller, EX.position_held, Literal('Director of the Federal Bureau of Investigation', lang='en')))


EX = Namespace('http://ex.org/')
print(g.serialize(format='turtle'))
</syntaxhighlight>


g.add((EX.Bob, RDF.type, FOAF.Person))
Blank nodes used to group related properties:
g.add((EX.Alice, RDF.type, FOAF.Person))
<syntaxhighlight>
g.add((EX.Carol, RDF.type, FOAF.Person))
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
g.add((EX.Bob, FOAF.knows, EX.Alice))
g.add((EX.Bob, FOAF.knows, EX.Carol))


g.add((EX.Bob, FOAF.name, Literal('Bob')))
EX = Namespace('http://example.org/')
g.add((EX.Alice, FOAF.name, Literal('Alice')))
g.add((EX.Carol, FOAF.name, Literal('Carol')))


g.namespace_manager.bind('ex', EX)
g = Graph()
g.bind('ex', EX)


print(g.serialize(format='json-ld'))
# This is a task in Exercise 2


for p, o in g[ EX.Bob : : ]:
print(g.serialize(format='turtle'))
    print(p, o)
</syntaxhighlight>
 
Literals:
<syntaxhighlight>
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
 
EX = Namespace('http://example.org/')
 
g = Graph()
g.bind('ex', EX)


g.add((EX.Robert_Mueller, RDF.type, EX.Human))
g.add((EX.Robert_Mueller, FOAF.name, Literal('Robert Mueller', lang='en')))
g.add((EX.Robert_Mueller, FOAF.name, Literal('رابرت مولر', lang='fa')))
g.add((EX.Robert_Mueller, DC.description, Literal('sixth director of the FBI', datatype=XSD.string)))
g.add((EX.Robert_Mueller, EX.start_time, Literal(2001, datatype=XSD.integer)))


g2 = Graph()
print(g.serialize(format='turtle'))
g2.parse('https://www.wikidata.org/entity/Q935079.ttl')
print(g2.serialize())
</syntaxhighlight>
</syntaxhighlight>


==S03==
Alternative container (open):
 
===Local query with RDFlib===
<syntaxhighlight>
<syntaxhighlight>
from rdflib import Graph, Namespace
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD


REX = Namespace('http://example.org/royal#')
EX = Namespace('http://example.org/')


g = Graph()
g = Graph()
g.parse('family.ttl', format='ttl')
g.bind('ex', EX)
res = g.query("""
    CONSTRUCT {
        ?child fam:aunt ?aunt .
    } WHERE {
        ?child fam:hasParent / fam:hasSister ?aunt .
    }
""",
    initNs={
        'fam': 'http://example.org/family#'
    },
    initBindings={
        'child': REX.SverreMagnus
    })
</syntaxhighlight>


==S04==
muellerReportArchives = BNode()
g.add((muellerReportArchives, RDF.type, RDF.Alt))


===Remote query with SPARQLWrapper===
archive1 = 'https://archive.org/details/MuellerReportVolume1Searchable/' \
<syntaxhighlight>
                    'Mueller%20Report%20Volume%201%20Searchable/'
import SPARQLWrapper
archive2 = 'https://edition.cnn.com/2019/04/18/politics/full-mueller-report-pdf/index.html'
archive3 = 'https://www.politico.com/story/2019/04/18/mueller-report-pdf-download-text-file-1280891'


endpoint = 'http://sandbox.i2s.uib.no/bigdata/namespace/kb/sparql'
g.add((muellerReportArchives, RDFS.member, Literal(archive1, datatype=XSD.anyURI)))
g.add((muellerReportArchives, RDFS.member, Literal(archive2, datatype=XSD.anyURI)))
g.add((muellerReportArchives, RDFS.member, Literal(archive3, datatype=XSD.anyURI)))


paper_uri = 'http://semanticscholar.org/paper/c93a37e6922e09f34fc093f7e4f9675775d4557d'
g.add((EX.Mueller_Report, RDF.type, FOAF.Document))
g.add((EX.Mueller_Report, DC.contributor, EX.Robert_Mueller))
g.add((EX.Mueller_Report, SCHEMA.archivedAt, muellerReportArchives))


client = SPARQLWrapper.SPARQLWrapper(endpoint=endpoint)
print(g.serialize(format='turtle'))
client.setReturnFormat('json')
client.setQuery(f'''
    DESCRIBE <{paper_uri}>
''')
res = client.queryAndConvert()
</syntaxhighlight>
</syntaxhighlight>


===Remote update with SPARQLWrapper===
Sequence container (open):
<syntaxhighlight>
<syntaxhighlight>
client.setReturnFormat('xml')
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
client.setMethod('POST')
 
client.setQuery('''
EX = Namespace('http://example.org/')
    DELETE DATA {
 
        <http://example.org/Abra> <http://example.org/ka> <http://example.org/Dabra> .
g = Graph()
    }
g.bind('ex', EX)
''')
 
res = client.queryAndConvert()
donaldTrumpSpouses = BNode()
client.setMethod('GET')
g.add((donaldTrumpSpouses, RDF.type, RDF.Seq))
g.add((donaldTrumpSpouses, RDF._1, EX.IvanaTrump))
g.add((donaldTrumpSpouses, RDF._2, EX.MarlaMaples))
g.add((donaldTrumpSpouses, RDF._3, EX.MelaniaTrump))
 
g.add((EX.Donald_Trump, SCHEMA.spouse, donaldTrumpSpouses))
 
print(g.serialize(format='turtle'))
</syntaxhighlight>
</syntaxhighlight>


===Local update with RDFlib===
Collection (closed list):
<syntaxhighlight>
<syntaxhighlight>
from rdflib import Graph, Namespace
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
 
EX = Namespace('http://example.org/')


g = Graph()
g = Graph()
g.parse('family.ttl', format='turtle')
g.bind('ex', EX)


FAM = Namespace('http://example.org/family#')
from rdflib.collection import Collection
g.update('''
 
    DELETE {
g = Graph()
        ?child fam:hasAunt ?parent .
g.bind('ex', EX)
     } INSERT {
 
        ?child fam:hasAunt ?sister .
donaldTrumpSpouses = BNode()
    } WHERE {
Collection(g, donaldTrumpSpouses, [
        ?child fam:hasParent ?parent .
     EX.IvanaTrump, EX.MarlaMaples, EX.MelaniaTrump
        ?parent fam:hasSister ?sister .
])
    }
g.add((EX.Donald_Trump, SCHEMA.spouse, donaldTrumpSpouses))
    ''',
 
    initNs={
print(g.serialize(format='turtle'))
        'fam': FAM
g.serialize(destination='s02_Donald_Trump_spouses_list.ttl', format='turtle')
    })


print(g.serialize(format='turtle'))
print(g.serialize(format='turtle'))
</syntaxhighlight>
</syntaxhighlight>


== S07 ==
==Lecture 3: SPARQL==


RDFS programming with owlrl .
The KG4News knowledge graph can be accessed online [http://bg.newsangler.uib.no here (namespace ''kb'')] (read-only), or you can load the Turtle file into your own GraphDB repository.
(Remember to save the file with ''.ttl'' extension. You can use ''http://i2s.uib.no/kg4news/'' as base URI.)


=== Syllogism (rule rdfs9) ===
===Limit===
<syntaxhighlight lang="SPARQL">
SELECT ?p WHERE {
    ?s ?p ?o .
}
LIMIT 10</syntaxhighlight>


<syntaxhighlight>
===List distinct properties only (with limit)===
from rdflib import Graph, RDF, RDFS, OWL, Namespace
<syntaxhighlight lang="SPARQL">
import owlrl
SELECT DISTINCT ?p WHERE {
    ?s ?p ?o .
}
LIMIT 10
</syntaxhighlight>
 
===Limit with offset===
<syntaxhighlight lang="SPARQL">
SELECT DISTINCT ?p WHERE {
    ?s ?p ?o .
}
LIMIT 10 OFFSET 9
</syntaxhighlight>


EX = Namespace('http://example.org#')
===List rdf:types===
<syntaxhighlight lang="SPARQL">
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>


g = Graph()
SELECT DISTINCT ?t WHERE {
g.bind('', EX)
     ?s rdf:type ?t .
NS = {
     '': EX,
    'rdf': RDF,
    'rdfs': RDFS,
}
}
g.update("""
LIMIT 50
INSERT DATA {
</syntaxhighlight>
     :Socrates rdf:type :Man .
 
     :Man rdfs:subClassOf :Mortal .
===URI for Tim Berners-Lee===
<syntaxhighlight lang="SPARQL">
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
 
SELECT DISTINCT ?person WHERE {
     ?person foaf:name ?name .
     FILTER(CONTAINS(?name, "Berners-Lee"))
}
}
""", initNs=NS)
LIMIT 10  # best to use limit if something goes wrong
</syntaxhighlight>


rdfs_engine = owlrl.RDFSClosure.RDFS_Semantics(g, True, False, False)
===Describe Tim Berners-Lee===
rdfs_engine.closure()
<syntaxhighlight lang="SPARQL">
rdfs_engine.flush_stored_triples()
DESCRIBE <http://i2s.uib.no/kg4news/author/1432678629>
</syntaxhighlight>
DESCRIBE returns a new RDF graph, whereas SELECT returns a table of rows.
 
===Papers that mention "Semantic Web" in the title===
<syntaxhighlight lang="SPARQL">
PREFIX ss: <http://semanticscholar.org/>
 
SELECT DISTINCT ?paper ?title WHERE {
    ?paper ss:title ?title .
    FILTER(CONTAINS(STR(?title), "Semantic Web"))
}
LIMIT 50
</syntaxhighlight>


res = g.query("""
===Alternative filter that ignores capitalisation (lower/upper case)===
ASK { :Socrates rdf:type :Mortal . }
<syntaxhighlight lang="SPARQL">
""", initNs=NS)
    FILTER(REGEX(STR(?title), "Semantic Web", "i"))
print(res.askAnswer)
</syntaxhighlight>
</syntaxhighlight>


=== Domain and range (rules rdfs2-3) ===
===Authors sorted by name===
<syntaxhighlight lang="SPARQL">
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
 
SELECT DISTINCT * WHERE {
    ?author foaf:name ?name . 
}
ORDER BY ?name
LIMIT 10
</syntaxhighlight>


<syntaxhighlight>
===Sorted by descending name instead===
from rdflib import Graph, RDF, RDFS, OWL, Namespace
<syntaxhighlight lang="SPARQL">
import owlrl
ORDER BY DESC(?name)
</syntaxhighlight>


EX = Namespace('http://example.org#')
===Count papers by author===
<syntaxhighlight lang="SPARQL">
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX ss: <http://semanticscholar.org/>


g = Graph()
SELECT DISTINCT ?author (COUNT(?paper) AS ?count) WHERE {
g.bind('', EX)
     ?author rdf:type ss:Author . 
NS = {
     ?paper  rdf:type ss:Paper ;
     '': EX,
            dct:contributor ?author .
     'rdf': RDF,
    'rdfs': RDFS,
}
}
g.update("""
GROUP BY ?author
INSERT DATA {
LIMIT 10
    :Socrates :husbandOf :Xantippe .
</syntaxhighlight>
    :husbandOf rdfs:domain :Man .
    :husbandOf rdfs:range :Woman .
}
""", initNs=NS)


rdfs_engine = owlrl.RDFSClosure.RDFS_Semantics(g, True, False, False)
===Only list the most prolific authors===
rdfs_engine.closure()
<syntaxhighlight lang="SPARQL">
rdfs_engine.flush_stored_triples()
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX ss: <http://semanticscholar.org/>


res = g.query("""
SELECT DISTINCT ?author (COUNT(?paper) AS ?count) WHERE {
ASK { :Xantippe rdf:type :Woman . }
    ?author rdf:type ss:Author . 
""", initNs=NS)
    ?paper  rdf:type ss:Paper ;
print(res.askAnswer)
            dct:contributor ?author .
}
GROUP BY ?author
HAVING (?count >= 10) # similar to a filter expression
LIMIT 10              # include limit when you test
</syntaxhighlight>
</syntaxhighlight>


==S08==
===Order by descending paper count===
<syntaxhighlight lang="SPARQL">
SELECT ... {
    ...
}
GROUP BY ?person
HAVING (?count > 10)
ORDER BY DESC(?count)
LIMIT 10
</syntaxhighlight>


===owl:inverseOf===
===Order by descending paper count and then by author name===
<syntaxhighlight lang="SPARQL">
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX ss: <http://semanticscholar.org/>


<syntaxhighlight>
SELECT DISTINCT ?person (SAMPLE(?name) AS ?name) (COUNT(?paper) AS ?count) WHERE {
from rdflib import Graph, Namespace, RDF, RDFS, FOAF
    ?person rdf:type ss:Author ;
import owlrl
            foaf:name ?name . 
    ?paper  rdf:type ss:Paper ;
            ss:title ?title ;
            dct:contributor ?person .
}
GROUP BY ?person
HAVING (?count > 10)
ORDER BY DESC(?count)
LIMIT 10
</syntaxhighlight>


===Embedded Wikidata query===
This is a toy example only. Embedded queries like these are better suited for situations where the same URIs are used in more than one triple store and you want to combine data. But Wikidata and KG4News do not use the same URIs. So instead, the example searches for similar labels, and this is something graph databases may not be optimised for. Moreover, Wikidata uses language-tagged strings whereas KG4News uses plain strings, so the labels cannot even be directly compared.
<syntaxhighlight lang="SPARQL">
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>


EX = Namespace('http://ex.org/')
SELECT DISTINCT ?enname ?person ?wdperson WHERE {
 
    BIND("T. Berners-Lee"@en AS ?enname) # language-tagger label
g = Graph()
g.bind('', EX)
g.add((EX.Andreas, RDF.type, EX.Teacher))
g.add((EX.Martin, RDF.type, EX.Teacher))
g.add((EX.Tobias, RDF.type, EX.Teacher))
g.add((EX.Martin, RDF.type, EX.Student))
g.add((EX.Tobias, RDF.type, EX.Student))
g.add((EX.Mariah, RDF.type, EX.Student))
g.add((EX.Bahareh, RDF.type, EX.Student))
# g.add((EX.Teacher, RDFS.subClassOf, FOAF.Person))
# g.add((EX.Student, RDFS.subClassOf, FOAF.Person))


    SERVICE <https://query.wikidata.org/bigdata/namespace/wdq/sparql> {
        # return a Wikidata identifier (URI) with this label as alternative
        SELECT ?wdperson ?enname WHERE {
            ?wdperson skos:altLabel ?enname .
        }
        LIMIT 10  # we use limit in case the label does not match
    }
           
    BIND(STR(?enname) AS ?name)  # the same label, but with language tag removed
    # return a KG4News identifier (URI) with this label as name
    ?person foaf:name ?name . 


g.update("""
INSERT DATA {
    :Martin :hasSupervisor :Andreas .
    :hasSupervisor owl:inverseOf :supervisorOf .
}
}
""")
LIMIT 10
</syntaxhighlight>
</syntaxhighlight>


This gives no response:
===Add one or more triples===
<syntaxhighlight>
From now on you need a Blazegraph that allows writing, for example the [http://sandbox.i2s.uib.no I2S sandbox]. Remember to ''create a new namespace'' first and make sure you ''use'' it afterwards.
res = g.query("""
<syntaxhighlight lang="SPARQL">
SELECT ?person WHERE {
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    :Andreas :supervisorOf ?person .
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX ss: <http://semanticscholar.org/>
 
INSERT DATA {  # note the Turtle-like syntax
    kg:paper_123 rdf:type ss:Paper ;
            ss:title "Semantic Knowledge Graphs for the News: A Review"@en ;
            kg:year 2022 ;
            dct:contributor kg:auth_456, kg:auth_789 .  
}
}
""")
print(res.serialize(format='txt').decode())
</syntaxhighlight>
</syntaxhighlight>


Not this either:
===Remove one or more triples===
<syntaxhighlight>
<syntaxhighlight lang="SPARQL">
engine = owlrl.CombinedClosure.RDFS_OWLRL_Semantics(g, False, False, False)
PREFIX kg: <http://i2s.uib.no/kg4news/>
engine.closure()
engine.flush_stored_triples()


res = g.query("""
DELETE DATA
SELECT ?person WHERE {
{
     :Andreas :supervisorOf ?person .
     kg:paper_123 kg:year 2022  .
}
}
""")
print(res.serialize(format='txt').decode())
</syntaxhighlight>
</syntaxhighlight>


But this prints out a response:
===Pattern-based addition and or removal of triples===
<syntaxhighlight>
<syntaxhighlight lang="SPARQL">
engine = owlrl.CombinedClosure.RDFS_OWLRL_Semantics(g, False, False, False)
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
engine.closure()
PREFIX dct: <http://purl.org/dc/terms/>
engine.flush_stored_triples()
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX ss: <http://semanticscholar.org/>


res = g.query("""
DELETE DATA {   
SELECT ?person WHERE {
    ?paper dct:contributor kg:auth_456
     :Andreas :supervisorOf ?person .
}
INSERT DATA {   
    ?paper dct:contributor kg:auth_654
}
WHERE { # the patterns are similar to SELECT patterns
     ?paper dct:contributor kg:auth_456
}
}
""")
print(res.serialize(format='txt').decode())
</syntaxhighlight>
</syntaxhighlight>


==S11==
==Lecture 8: SHACL==
The examples are for use with the [https://shacl.org/playground/ interactive SHACL Playground].
 
===Minimal example===
First shape graph:
<syntaxhighlight lang="ttl">
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .
 


===Simple DL reasoning in Protégé===
kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property kg:MainPaperYearShape  .


Download the [[:file:ex-prop-res.owl.txt | OWL file]], and remove the ".txt" suffix (required by the wiki platform).  
kg:MainPaperYearShape
        sh:path kg:year .
</syntaxhighlight>


Load the renamed file into Protege-OWL. From the "Reasoner" menu, choose "HermiT ..." and then "Start reasoner" to see the results of reasoning. Whenever you change the ontology, use "Synchronize reasoner" to update the reasoning results.
First data graph:
<syntaxhighlight lang="ttl">
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .


===Simple DL reasoning in Python's owlready2===


First you need to
kg:LOD_Paper
    a kg:MainPaper ;
    dcterm:title "Linked Data - The Story so Far" .
</syntaxhighlight>
''This should not give a validation error.''


     pip install owlready2
===Alternative shape graph===
Keep the prefixes from the first examples. You can also write the property constraint as an anyonymous node like this:
<syntaxhighlight lang="ttl">
kg:MainPaperShape
     a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year
    ] .
</syntaxhighlight>
''This is equivalent to the previous example (no validation error).''


then you can run this:
===Cardinality constraints on properties===
Add a minimum constraint to the ''kg:year'' property:
<syntaxhighlight lang="ttl">
kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year ;
        sh:minCount 1
    ] .
</syntaxhighlight>
''Now you should get a validation error.''


<syntaxhighlight>
* In the data graph, add two ''kg:year'' properties (e.g., 2008 and 2009) to ''kg_LOD_Paper'' to get rid of the error.
import os
* In the shapes graph, add a ''sh:maxCount 1'' constraint to get another validation error.
* In the data graph, remove one ''kg:year'' property value from ''kg:LOD_Paper'' to get rid of the error (2009 is the right year).


from owlready2 import *
===Datatype constraint on literal property values===
Add the following property constraint to the previous example:
<syntaxhighlight lang="ttl">
kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year ;
        sh:minCount 1 ;
        sh:minCount 1 ;
        sh:datatype xsd:integer
    ] .
</syntaxhighlight>
''This should not give a validation error.''


* In the shapes graph, change the datatype constraint to ''sh:datatype xsd:year''. Now you should get an error.
* In the data graph, change the integer ''2009'' to the typed value ''"2009"^^xsd:year'' to get rid of the error.


# on Windows, the HermiT reasoner needs to find Java
===Class and node kind constraints===
TEMP_DIR = 'temp_owlready2/'
Add the following property constraints to the shape graph, either with a URI like this:
if os.name=='nt':
<syntaxhighlight lang="ttl">
     JAVA_EXE = 'C:\\Program Files\\Java\\jre1.8.0_251'
kg:MainPaperShape
     # JAVA_EXE = os.getenv('JRE_HOME')
    a sh:NodeShape ;
     sh:targetClass kg:MainPaper ;
     sh:property kg:MainPaperYearShape, MainPaperContributorShape 


# load the ontology we have saved from Protege-OWL (save in RDF-XML or OWL-XML format)
...
EX = 'file://ex-prop-res.owl'
onto = get_ontology(EX)
onto.load('file://ex-prop-res.owl')


# pick out all the triples inside owlready2 as an rdflib Graph
kg:MainPaperContributorShape
g = default_world.as_rdflib_graph()
        sh:path dcterm:contributor ;
print(g.serialize(format='ttl'))
        sh:minCount 1 ;
        sh:class kg:MainAuthor ;
        sh:nodeKind sh:IRI .
</syntaxhighlight>


# use an owlready2 method to loop through all the individuals in the ontology
Or like this:
for ind in onto.individuals():
<syntaxhighlight lang="ttl">
     print(ind, ind.is_a)
kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year ;
        sh:minCount 1 ;
        sh:maxCount 1 ;
        sh:datatype xsd:year
     ], [
        sh:path dcterm:contributor ;
        sh:minCount 1 ;
        sh:class kg:MainAuthor ;
        sh:nodeKind sh:IRI
    ] .
</syntaxhighlight>
''Either way should give you a validation error.''


# run the built-in HermiT reasoner
* In the data graph, add ''dcterm:contributor "T. Berners-Lee"'' to get rid of the cardinality error. The error goes away, but you get two new ones instead.
sync_reasoner()
* In the data graph, add ''dcterm:contributor [ a kg:MainAuthor ; foaf:name "T. Berners-Lee" ]'' to get rid of the error. The class error goes away, but the IRI error remains.
* In the data graph, create an IRI for Tim-Berners Lee to resolve the error:
<syntaxhighlight lang="ttl">
    ...
    dcterm:contributor kg:TBL .
 
kg:TBL
    a kg:MainAuthor;
    foaf:name "T. Berners-Lee" .


# loop through the individuals again to see the new types added by HermiT
for ind in onto.individuals():
    print(ind, ind.is_a)
</syntaxhighlight>
</syntaxhighlight>


===Simple ontology creation in Python's owlready2===
The code below does the same thing, but instead of reading the ontology from a file, it defines it using owlready2:
<syntaxhighlight>
import os


from owlready2 import *
===Full example===
Final shape graph from the lecture:
<syntaxhighlight lang="ttl">
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .
 
 
kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property kg:MainPaperYearShape, kg:MainPaperContributorShape .
 
kg:MainPaperYearShape
        sh:path kg:year ;
        sh:minCount 1 ;
        sh:maxCount 1 ;
        sh:datatype xsd:year .


kg:MainPaperContributorShape
        sh:path dcterm:contributor ;
        sh:minCount 1 ;
        sh:class kg:MainAuthor ;
        sh:nodeKind sh:IRI .
</syntaxhighlight>


TEMP_DIR = 'temp_owlready2/'
Final data graph from the lecture:
if os.name=='nt':
<syntaxhighlight lang="ttl">
    JAVA_EXE = 'C:\\Program Files\\Java\\jre1.8.0_251'
@prefix sh: <http://www.w3.org/ns/shacl#> .
    # JAVA_EXE = os.getenv('JRE_HOME')
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .


EX = 'http://info216.uib.no/ex-prop-res/'
onto = get_ontology(EX)


# get_ontology(EX) will reload the ontology if it already exists in your workspace,
kg:LOD_Paper
# so we need to empty it
    a kg:MainPaper ;
for ind in onto.individuals():
    dcterm:title "Linked Data - The Story so Far" ;
     destroy_entity(ind)
     kg:year "2006"^^xsd:year ;
for cls in onto.classes():
     dcterm:contributor kg:TBL, kg:CB .
     destroy_entity(cls)


with onto:
kg:TBL
     # add classes, properties and individuals to the 'onto' ontology
     a kg:MainAuthor;
     class Person(Thing): pass
     foaf:name "T. Berners-Lee" .
    class Woman(Person): pass
    class hasSibling(SymmetricProperty):  
kg:CB
        domain = [Person]
    a kg:MainAuthor;
        range = [Person]
    foaf:name "C. Bizer" .
</syntaxhighlight>


    marthaLouise = Woman('MarthaLouise')
==Lecture 8: RDFS==
    ingridAlexandra = Woman('IngridAlexandra')
Create two new GraphDB Repositories, one with RDFS inference and one with No inference. Try the SPARQL statements on both, so you can compare them.
    haakonMagnus = Person('HaakonMagnus')
    sverreMagnus = Person('SverreMagnus')


    haakonMagnus.hasSibling = [marthaLouise]
To test ''rdfs:subClassOf'':
    ingridAlexandra.hasSibling = [sverreMagnus]
<syntaxhighlight lang="ttl">
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>


# have a look at the triples in Turtle
INSERT DATA {
g = default_world.as_rdflib_graph()
kg:LOD_paper rdf:type kg:MainPaper .
print(g.serialize(format='ttl'))
    kg:MainPaper rdfs:subClassOf kg:Paper .
}
</syntaxhighlight>
 
<syntaxhighlight lang="ttl">
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
 
ASK {
kg:LOD_paper rdf:type kg:Paper .
}
</syntaxhighlight>
</syntaxhighlight>


Add the restriction classes too:
<syntaxhighlight>
with onto:
    class Sister(Person):
        equivalent_to = [Woman & hasSibling.some(Thing)]


    class Parent(Person): pass
To test ''rdfs:domain'':
    class Aunt(Person):
<syntaxhighlight lang="ttl">
        equivalent_to = [Sister & hasSibling.some(Parent)]
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>


    marthaLouise.is_a.append(Parent)
INSERT DATA {
     haakonMagnus.is_a.append(Parent)
kg:TBL kg:author kg:LOD_paper .
     kg:author rdfs:domain kg:Author .
}
</syntaxhighlight>
</syntaxhighlight>


Now you can look at the turtle again, and list all the individuals and their types (classes):
<syntaxhighlight lang="ttl">
<syntaxhighlight>
PREFIX kg: <http://i2s.uib.no/kg4news/>
g = default_world.as_rdflib_graph()
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
print(g.serialize(format='ttl'))
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>


for ind in onto.individuals():
ASK {
    print(ind, ind.is_a)
kg:TBL rdf:type kg:Author .
}
</syntaxhighlight>
</syntaxhighlight>


Run the reasoner:
 
<syntaxhighlight>
To see all the triples in the KG:
sync_reasoner()
<syntaxhighlight lang="ttl">
SELECT * WHERE {
  ?s ?p ?o
}
</syntaxhighlight>
</syntaxhighlight>


List the individuals and their types again to see (some of) the results of reasoning.
==Lecture 11: Graph embeddings==
Here is the example that used graph embeddings from ConceptNet. The ''numbernatch-en-19.08.txt.gz'' file can be [https://github.com/commonsense/conceptnet-numberbatch downloaded from GitHub].
 
Precomputed pickle files are [https://universityofbergen-my.sharepoint.com/:f:/g/personal/andreas_opdahl_uib_no/ElJv6Kl6zfJNmQrXMiz5eDoBogFpktjx8xoWgxOgAI70tw?e=99cClh available here (requires UiB login)].
<syntaxhighlight lang="python">
import pickle
 
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
 
 
NUMBERBATCH_FILE= './conceptnet/numberbatch-en-19.08.txt'
EMBEDDINGS_FILE = f'./conceptnet/numberbatch-embeddings.pickle'
N_NEIGHBOURS = 5
NUMBERBATCH_NEIGHBOURS_FILE = f'./conceptnet/numberbatch-neighbours-{N_NEIGHBOURS}.pickle'
 
 
# load numberbatch
reload = False  # set False first time you run, or if you already have the pickle file
if not reload:
    embeddings = {}
    with open(NUMBERBATCH_FILE) as file:
        n_terms, n_dims = map(int, file.readline().strip().split(' '))
        for line in file:
            splits = line.strip().split(' ')
            embeddings[splits[0]] = np.array(list(map(float, splits[1:])))
    with open(EMBEDDINGS_FILE, 'wb') as file:
        pickle.dump(embeddings, file)
else:
    with open(EMBEDDINGS_FILE, 'rb') as file:
        embeddings = pickle.load(file)
 
# find nearest neighbours
reload = False  # set False first time you run, or if you already have the pickle file
if not reload:
    knn = NearestNeighbors(n_neighbors=N_NEIGHBOURS, algorithm='ball_tree')
    np_embeddings = np.array(list(embeddings.values()))
    knn.fit(np_embeddings)
    with open(NUMBERBATCH_NEIGHBOURS_FILE, 'wb') as file:
        pickle.dump(knn, file)
else:
    with open(NUMBERBATCH_NEIGHBOURS_FILE, 'rb') as file:
        knn = pickle.load(file)
   
 
# test
distances, indexes = knn.kneighbors([embeddings['bergen']])
for dist, idx in zip(distances[0], indexes[0]):
    print(f'{dist}:\t{list(embeddings.keys())[idx]}')
 
 
def vector_neighbours(vector):
    distances, indexes = knn.kneighbors([vector])
    for dist, idx in zip(distances[0], indexes[0]):
        print(f'{dist}:\t{list(embeddings.keys())[idx]}')
 
 
vector_neighbours(embeddings['oslo'] - embeddings['norway'] + embeddings['france'])
</syntaxhighlight>

Latest revision as of 11:24, 17 April 2024

This page currently shows the examples used in the Spring of 2023. It will be updated with examples from 2024 as the course progresses.

Lecture 1: Introduction to KGs

Turtle example:

@prefix ex: <http://example.org/> .
ex:Roger_Stone
    ex:name "Roger Stone" ;
    ex:occupation ex:lobbyist ;
    ex:significant_person ex:Donald_Trump .
ex:Donald_Trump
    ex:name "Donald Trump" .

Lecture 2: RDF

Blank nodes for anonymity, or when we have not decided on a URI:

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)  # this is why the line '@prefix ex: <http://example.org/> .'
                  # and the 'ex.' prefix are used when we print out Turtle later

robertMueller = BNode()
g.add((robertMueller, RDF.type, EX.Human))
g.add((robertMueller, FOAF.name, Literal('Robert Mueller', lang='en')))
g.add((robertMueller, EX.position_held, Literal('Director of the Federal Bureau of Investigation', lang='en')))

print(g.serialize(format='turtle'))

Blank nodes used to group related properties:

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)

# This is a task in Exercise 2

print(g.serialize(format='turtle'))

Literals:

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)

g.add((EX.Robert_Mueller, RDF.type, EX.Human))
g.add((EX.Robert_Mueller, FOAF.name, Literal('Robert Mueller', lang='en')))
g.add((EX.Robert_Mueller, FOAF.name, Literal('رابرت مولر', lang='fa')))
g.add((EX.Robert_Mueller, DC.description, Literal('sixth director of the FBI', datatype=XSD.string)))
g.add((EX.Robert_Mueller, EX.start_time, Literal(2001, datatype=XSD.integer)))

print(g.serialize(format='turtle'))

Alternative container (open):

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)

muellerReportArchives = BNode()
g.add((muellerReportArchives, RDF.type, RDF.Alt))

archive1 = 'https://archive.org/details/MuellerReportVolume1Searchable/' \
                    'Mueller%20Report%20Volume%201%20Searchable/'
archive2 = 'https://edition.cnn.com/2019/04/18/politics/full-mueller-report-pdf/index.html'
archive3 = 'https://www.politico.com/story/2019/04/18/mueller-report-pdf-download-text-file-1280891'

g.add((muellerReportArchives, RDFS.member, Literal(archive1, datatype=XSD.anyURI)))
g.add((muellerReportArchives, RDFS.member, Literal(archive2, datatype=XSD.anyURI)))
g.add((muellerReportArchives, RDFS.member, Literal(archive3, datatype=XSD.anyURI)))

g.add((EX.Mueller_Report, RDF.type, FOAF.Document))
g.add((EX.Mueller_Report, DC.contributor, EX.Robert_Mueller))
g.add((EX.Mueller_Report, SCHEMA.archivedAt, muellerReportArchives))

print(g.serialize(format='turtle'))

Sequence container (open):

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)

donaldTrumpSpouses = BNode()
g.add((donaldTrumpSpouses, RDF.type, RDF.Seq))
g.add((donaldTrumpSpouses, RDF._1, EX.IvanaTrump))
g.add((donaldTrumpSpouses, RDF._2, EX.MarlaMaples))
g.add((donaldTrumpSpouses, RDF._3, EX.MelaniaTrump))

g.add((EX.Donald_Trump, SCHEMA.spouse, donaldTrumpSpouses))

print(g.serialize(format='turtle'))

Collection (closed list):

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)

from rdflib.collection import Collection

g = Graph()
g.bind('ex', EX)

donaldTrumpSpouses = BNode()
Collection(g, donaldTrumpSpouses, [
    EX.IvanaTrump, EX.MarlaMaples, EX.MelaniaTrump
])
g.add((EX.Donald_Trump, SCHEMA.spouse, donaldTrumpSpouses))

print(g.serialize(format='turtle'))
g.serialize(destination='s02_Donald_Trump_spouses_list.ttl', format='turtle')

print(g.serialize(format='turtle'))

Lecture 3: SPARQL

The KG4News knowledge graph can be accessed online here (namespace kb) (read-only), or you can load the Turtle file into your own GraphDB repository. (Remember to save the file with .ttl extension. You can use http://i2s.uib.no/kg4news/ as base URI.)

Limit

SELECT ?p WHERE {
    ?s ?p ?o .
}
LIMIT 10

List distinct properties only (with limit)

SELECT DISTINCT ?p WHERE {
    ?s ?p ?o .
}
LIMIT 10

Limit with offset

SELECT DISTINCT ?p WHERE {
    ?s ?p ?o .
}
LIMIT 10 OFFSET 9

List rdf:types

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT DISTINCT ?t WHERE {
    ?s rdf:type ?t .
}
LIMIT 50

URI for Tim Berners-Lee

PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT DISTINCT ?person WHERE {
    ?person foaf:name ?name .  
 
    FILTER(CONTAINS(?name, "Berners-Lee"))
}
LIMIT 10  # best to use limit if something goes wrong

Describe Tim Berners-Lee

DESCRIBE <http://i2s.uib.no/kg4news/author/1432678629>

DESCRIBE returns a new RDF graph, whereas SELECT returns a table of rows.

Papers that mention "Semantic Web" in the title

PREFIX ss: <http://semanticscholar.org/>

SELECT DISTINCT ?paper ?title WHERE {
    ?paper ss:title ?title .  
 
    FILTER(CONTAINS(STR(?title), "Semantic Web"))
}
LIMIT 50

Alternative filter that ignores capitalisation (lower/upper case)

    FILTER(REGEX(STR(?title), "Semantic Web", "i"))

Authors sorted by name

PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT DISTINCT * WHERE {
    ?author foaf:name ?name .  
}
ORDER BY ?name
LIMIT 10

Sorted by descending name instead

ORDER BY DESC(?name)

Count papers by author

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX ss: <http://semanticscholar.org/>

SELECT DISTINCT ?author (COUNT(?paper) AS ?count) WHERE {
    ?author rdf:type ss:Author .  
    ?paper  rdf:type ss:Paper ;
            dct:contributor ?author .
}
GROUP BY ?author
LIMIT 10

Only list the most prolific authors

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX ss: <http://semanticscholar.org/>

SELECT DISTINCT ?author (COUNT(?paper) AS ?count) WHERE {
    ?author rdf:type ss:Author .  
    ?paper  rdf:type ss:Paper ;
            dct:contributor ?author .
}
GROUP BY ?author
HAVING (?count >= 10)  # similar to a filter expression
LIMIT 10               # include limit when you test

Order by descending paper count

SELECT ... {
    ...
}
GROUP BY ?person
HAVING (?count > 10)
ORDER BY DESC(?count)
LIMIT 10

Order by descending paper count and then by author name

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX ss: <http://semanticscholar.org/>

SELECT DISTINCT ?person (SAMPLE(?name) AS ?name) (COUNT(?paper) AS ?count) WHERE {
    ?person rdf:type ss:Author ;
            foaf:name ?name .  
    ?paper  rdf:type ss:Paper ;
            ss:title ?title ;
            dct:contributor ?person .
}
GROUP BY ?person
HAVING (?count > 10)
ORDER BY DESC(?count)
LIMIT 10

Embedded Wikidata query

This is a toy example only. Embedded queries like these are better suited for situations where the same URIs are used in more than one triple store and you want to combine data. But Wikidata and KG4News do not use the same URIs. So instead, the example searches for similar labels, and this is something graph databases may not be optimised for. Moreover, Wikidata uses language-tagged strings whereas KG4News uses plain strings, so the labels cannot even be directly compared.

PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT DISTINCT ?enname ?person ?wdperson WHERE {
    BIND("T. Berners-Lee"@en AS ?enname)  # language-tagger label

    SERVICE <https://query.wikidata.org/bigdata/namespace/wdq/sparql> {
        # return a Wikidata identifier (URI) with this label as alternative
        SELECT ?wdperson ?enname WHERE {
             ?wdperson skos:altLabel ?enname .
        }
        LIMIT 10  # we use limit in case the label does not match
    }
            
    BIND(STR(?enname) AS ?name)  # the same label, but with language tag removed
    # return a KG4News identifier (URI) with this label as name
    ?person foaf:name ?name .  

}
LIMIT 10

Add one or more triples

From now on you need a Blazegraph that allows writing, for example the I2S sandbox. Remember to create a new namespace first and make sure you use it afterwards.

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX ss: <http://semanticscholar.org/>

INSERT DATA {  # note the Turtle-like syntax
    kg:paper_123 rdf:type ss:Paper ;
            ss:title "Semantic Knowledge Graphs for the News: A Review"@en ;
            kg:year 2022 ;
            dct:contributor kg:auth_456, kg:auth_789 . 
}

Remove one or more triples

PREFIX kg: <http://i2s.uib.no/kg4news/>

DELETE DATA
{
    kg:paper_123 kg:year 2022  .
}

Pattern-based addition and or removal of triples

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX ss: <http://semanticscholar.org/>

DELETE DATA {    
    ?paper dct:contributor kg:auth_456
}
INSERT DATA {    
    ?paper dct:contributor kg:auth_654
}
WHERE {  # the patterns are similar to SELECT patterns
    ?paper dct:contributor kg:auth_456
}

Lecture 8: SHACL

The examples are for use with the interactive SHACL Playground.

Minimal example

First shape graph:

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .


kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property kg:MainPaperYearShape  .

kg:MainPaperYearShape
        sh:path kg:year .

First data graph:

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .


kg:LOD_Paper
    a kg:MainPaper ;
    dcterm:title "Linked Data - The Story so Far" .

This should not give a validation error.

Alternative shape graph

Keep the prefixes from the first examples. You can also write the property constraint as an anyonymous node like this:

kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year 
    ] .

This is equivalent to the previous example (no validation error).

Cardinality constraints on properties

Add a minimum constraint to the kg:year property:

kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year ;
        sh:minCount 1
    ] .

Now you should get a validation error.

  • In the data graph, add two kg:year properties (e.g., 2008 and 2009) to kg_LOD_Paper to get rid of the error.
  • In the shapes graph, add a sh:maxCount 1 constraint to get another validation error.
  • In the data graph, remove one kg:year property value from kg:LOD_Paper to get rid of the error (2009 is the right year).

Datatype constraint on literal property values

Add the following property constraint to the previous example:

kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year ;
        sh:minCount 1 ;
        sh:minCount 1 ;
        sh:datatype xsd:integer
    ] .

This should not give a validation error.

  • In the shapes graph, change the datatype constraint to sh:datatype xsd:year. Now you should get an error.
  • In the data graph, change the integer 2009 to the typed value "2009"^^xsd:year to get rid of the error.

Class and node kind constraints

Add the following property constraints to the shape graph, either with a URI like this:

kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property kg:MainPaperYearShape, MainPaperContributorShape  

...

kg:MainPaperContributorShape
        sh:path dcterm:contributor ;
        sh:minCount 1 ;
        sh:class kg:MainAuthor ;
        sh:nodeKind sh:IRI .

Or like this:

kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year ;
        sh:minCount 1 ;
        sh:maxCount 1 ;
        sh:datatype xsd:year 
    ], [
        sh:path dcterm:contributor ;
        sh:minCount 1 ;
        sh:class kg:MainAuthor ;
        sh:nodeKind sh:IRI 
    ] .

Either way should give you a validation error.

  • In the data graph, add dcterm:contributor "T. Berners-Lee" to get rid of the cardinality error. The error goes away, but you get two new ones instead.
  • In the data graph, add dcterm:contributor [ a kg:MainAuthor ; foaf:name "T. Berners-Lee" ] to get rid of the error. The class error goes away, but the IRI error remains.
  • In the data graph, create an IRI for Tim-Berners Lee to resolve the error:
    ...
    dcterm:contributor kg:TBL .

kg:TBL
    a kg:MainAuthor;
    foaf:name "T. Berners-Lee" .


Full example

Final shape graph from the lecture:

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .


kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property kg:MainPaperYearShape, kg:MainPaperContributorShape .

kg:MainPaperYearShape
        sh:path kg:year ;
        sh:minCount 1 ;
        sh:maxCount 1 ;
        sh:datatype xsd:year .

kg:MainPaperContributorShape
        sh:path dcterm:contributor ;
        sh:minCount 1 ;
        sh:class kg:MainAuthor ;
        sh:nodeKind sh:IRI .

Final data graph from the lecture:

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .


kg:LOD_Paper
    a kg:MainPaper ;
    dcterm:title "Linked Data - The Story so Far" ;
    kg:year "2006"^^xsd:year ;
    dcterm:contributor kg:TBL, kg:CB .

kg:TBL
    a kg:MainAuthor;
    foaf:name "T. Berners-Lee" .
 
kg:CB
     a kg:MainAuthor;
     foaf:name "C. Bizer" .

Lecture 8: RDFS

Create two new GraphDB Repositories, one with RDFS inference and one with No inference. Try the SPARQL statements on both, so you can compare them.

To test rdfs:subClassOf:

PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

INSERT DATA { 
	kg:LOD_paper rdf:type kg:MainPaper .
    kg:MainPaper rdfs:subClassOf kg:Paper .
}
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

ASK {
	kg:LOD_paper rdf:type kg:Paper .
}


To test rdfs:domain:

PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

INSERT DATA { 
	kg:TBL kg:author kg:LOD_paper .
    kg:author rdfs:domain kg:Author .
}
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

ASK {
	kg:TBL rdf:type kg:Author .
}


To see all the triples in the KG:

SELECT * WHERE {
  ?s ?p ?o
}

Lecture 11: Graph embeddings

Here is the example that used graph embeddings from ConceptNet. The numbernatch-en-19.08.txt.gz file can be downloaded from GitHub.

Precomputed pickle files are available here (requires UiB login).

import pickle

import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors


NUMBERBATCH_FILE= './conceptnet/numberbatch-en-19.08.txt'
EMBEDDINGS_FILE = f'./conceptnet/numberbatch-embeddings.pickle'
N_NEIGHBOURS = 5
NUMBERBATCH_NEIGHBOURS_FILE = f'./conceptnet/numberbatch-neighbours-{N_NEIGHBOURS}.pickle'


# load numberbatch
reload = False  # set False first time you run, or if you already have the pickle file
if not reload:
    embeddings = {}
    with open(NUMBERBATCH_FILE) as file:
        n_terms, n_dims = map(int, file.readline().strip().split(' '))
        for line in file: 
            splits = line.strip().split(' ')
            embeddings[splits[0]] = np.array(list(map(float, splits[1:])))
    with open(EMBEDDINGS_FILE, 'wb') as file:
        pickle.dump(embeddings, file)
else:
    with open(EMBEDDINGS_FILE, 'rb') as file:
        embeddings = pickle.load(file)

# find nearest neighbours
reload = False  # set False first time you run, or if you already have the pickle file
if not reload:
    knn = NearestNeighbors(n_neighbors=N_NEIGHBOURS, algorithm='ball_tree')
    np_embeddings = np.array(list(embeddings.values()))
    knn.fit(np_embeddings)
    with open(NUMBERBATCH_NEIGHBOURS_FILE, 'wb') as file:
        pickle.dump(knn, file)
else:
    with open(NUMBERBATCH_NEIGHBOURS_FILE, 'rb') as file:
        knn = pickle.load(file)
     

# test
distances, indexes = knn.kneighbors([embeddings['bergen']])
for dist, idx in zip(distances[0], indexes[0]):
    print(f'{dist}:\t{list(embeddings.keys())[idx]}')


def vector_neighbours(vector):
    distances, indexes = knn.kneighbors([vector])
    for dist, idx in zip(distances[0], indexes[0]):
        print(f'{dist}:\t{list(embeddings.keys())[idx]}')


vector_neighbours(embeddings['oslo'] - embeddings['norway'] + embeddings['france'])