[TASK] Initial commit. Working tool.

This commit is contained in:
Jan Philipp Timme 2014-02-14 15:01:34 +01:00
commit bfc2ad5373
1 changed files with 145 additions and 0 deletions

145
triplify.py Normal file
View File

@ -0,0 +1,145 @@
#SQL
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
"""
class HsHMembership(Base):
__tablename__ = 'membership'
id = Column(Integer, primary_key=True)
person = Column(Integer, ForeignKey('person.id'))
organizational_unit = Column(Integer)
start_date = Column(DateTime)
end_date = Column(DateTime)
active = Column(String)
business_role = Column(Integer)
mPerson = relationship("HsHPerson")
"""
class HsHPerson(Base):
__tablename__ = 'person'
id = Column(Integer, primary_key=True)
name = Column(String)
firstname = Column(String)
title_prefix = Column(String)
title_suffix = Column(String)
account = Column(Integer)
gender = Column(String)
birthday = Column(DateTime) #TODO: without timezone!
#memberships = relationship("HsHMembership", backref='person')
class HsHTelephone(Base):
__tablename__ = 'telephone'
id = Column(Integer, primary_key=True)
call_number = Column(String)
description = Column(String)
#RDF
from rdflib import Namespace
from rdfalchemy import rdfSingle
from rdfalchemy.rdfSubject import rdfSubject
from rdflib import Literal, BNode, Namespace, URIRef
from rdflib import RDF, RDFS, Graph, OWL
from rdflib.namespace import XSD
foaf = Namespace('http://xmlns.com/foaf/0.1/')
core = Namespace('http://vivoweb.org/ontology/core#')
vitro = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/0.7')
vcard = Namespace('http://www.w3.org/2006/vcard/ns#')
obo = Namespace('http://purl.obolibrary.org/obo/')
local = Namespace('http://vivo.bib.hs-hannover.de/person')
hsh = Namespace('http://vivo.bib.hs-hannover.de/ontology/hshOntologie#')
def get_graph():
"""Small little helper to create graph with namespaces ;-)"""
g = rdfSubject.db
g.bind('foaf', foaf)
g.bind('core', core)
g.bind('vitro', vitro)
g.bind('vcard', vcard)
g.bind('obo', obo)
g.bind('local', local)
g.bind('hsh', hsh)
return g
class IdSequence:
"""Small Helper for easy sequences"""
def __init__(self, start):
self.num = start - 1
def getNext(self):
self.num += 1
return self.num
class Thing(rdfSubject):
rdf_type = OWL.Thing
label = rdfSingle(RDFS.label)
class hshThing(Thing):
rdf_type = hsh.hshLocal
class Person(hshThing):
rdf_type = foaf.Person
firstname = rdfSingle(vcard.givenName)
name = rdfSingle(vcard.familyName)
hasContactInfo = rdfSingle(obo.ARG_2000028)
label = rdfSingle(RDFS.label)
class ContactInfo(Thing):
rdf_type = vcard.Individual
hasTitle = rdfSingle(vcard.hasTitle)
contactInformationFor = rdfSingle(obo.ARG_2000029)
class Title(Thing):
rdf_type = vcard.Title
title = rdfSingle(vcard.title)
def createRDFFromSQL():
"""Fetch data from postgres and map them into RDF"""
engine = create_engine("postgresql://hshinfo:hshinfotest@141.71.2.152/hshinfo")
session = sessionmaker(bind=engine)()
persons = session.query(HsHPerson)
g = get_graph()
additionalIdSeq = IdSequence(2000000000) #don't care sequence
for count, sqlP in enumerate(persons):
combined_title = ''
if sqlP.title_prefix != None:
combined_title = sqlP.title_prefix
if sqlP.title_suffix != None:
if combined_title == '':
combined_title = sqlP.title_suffix
else:
combined_title += ' ' + sqlP.title_suffix
person_uri = URIRef("%s/%s" % (local, sqlP.id))
rdfP = hshThing(person_uri)
rdfP = Person(person_uri) #WTF?!
rdfP.firstname = sqlP.firstname;
rdfP.name = sqlP.name;
rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname)
# If there is a title to add, do it.
if(combined_title != ''):
title_uri = URIRef("%s/%s" % (local, additionalIdSeq.getNext()))
rdfTitle = Title(title_uri)
rdfTitle.title = combined_title
contact_info_uri = URIRef("%s/%s" % (local, additionalIdSeq.getNext()))
rdfCi = ContactInfo(contact_info_uri)
rdfCi.hasTitle = rdfTitle
rdfCi.contactInformationFor = rdfP
rdfP.hasContactInfo = rdfCi
break
triples = g.serialize(format='n3')
g.close()
print(triples)
f = open('data.n3', 'wb')
f.write(triples)
f.close()
if __name__ == '__main__':
createRDFFromSQL()