[TASK] Now supports OE-Tree and OE<->Person relationships.

Also supports different types of persons.
This commit is contained in:
Jan Philipp Timme 2014-02-17 17:42:40 +01:00
parent bfc2ad5373
commit e7e9ab3d23
1 changed files with 155 additions and 55 deletions

View File

@ -2,9 +2,25 @@
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import relationship, backref
Base = declarative_base()
"""
class HsHOrganizationalUnitType(Base):
__tablename__ = 'organizational_unit_type'
id = Column(Integer, primary_key=True)
name = Column(String)
class HsHOrganizationalUnit(Base):
__tablename__ = 'organizational_unit';
id = Column(Integer, primary_key=True)
name = Column(String)
parent = Column(Integer, ForeignKey('organizational_unit.id'))
acronym = Column(String)
post_address = Column(Integer)
organizational_unit_type = Column(Integer, ForeignKey('organizational_unit_type.id'))
oParent = relationship("HsHOrganizationalUnit", remote_side=[id])
class HsHMembership(Base):
__tablename__ = 'membership'
id = Column(Integer, primary_key=True)
@ -13,9 +29,16 @@ class HsHMembership(Base):
start_date = Column(DateTime)
end_date = Column(DateTime)
active = Column(String)
business_role = Column(Integer)
business_role = Column(Integer, ForeignKey('business_role.id'))
mPerson = relationship("HsHPerson")
"""
mBusinessRole = relationship("HsHBusinessRole")
class HsHBusinessRole(Base):
__tablename__ = 'business_role'
id = Column(Integer, primary_key=True)
name = Column(String)
description = Column(String)
class HsHPerson(Base):
__tablename__ = 'person'
id = Column(Integer, primary_key=True)
@ -25,8 +48,8 @@ class HsHPerson(Base):
title_suffix = Column(String)
account = Column(Integer)
gender = Column(String)
birthday = Column(DateTime) #TODO: without timezone!
#memberships = relationship("HsHMembership", backref='person')
#birthday = Column(DateTime) #TODO: without timezone!
memberships = relationship("HsHMembership")
class HsHTelephone(Base):
__tablename__ = 'telephone'
@ -46,31 +69,28 @@ from rdflib.namespace import XSD
foaf = Namespace('http://xmlns.com/foaf/0.1/')
core = Namespace('http://vivoweb.org/ontology/core#')
vitro = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/0.7')
vivo = Namespace('http://vivoweb.org/ontology/core#')
vcard = Namespace('http://www.w3.org/2006/vcard/ns#')
obo = Namespace('http://purl.obolibrary.org/obo/')
local = Namespace('http://vivo.bib.hs-hannover.de/person')
hsh = Namespace('http://vivo.bib.hs-hannover.de/ontology/hshOntologie#')
localPerson = Namespace('http://vivo.bib.hs-hannover.de/person')
localOrg = Namespace('http://vivo.bib.hs-hannover.de/org')
def get_graph():
"""Small little helper to create graph with namespaces ;-)"""
g = rdfSubject.db
g.bind('foaf', foaf)
g.bind('core', core)
g.bind('vitro', vitro)
g.bind('vivo', vivo)
g.bind('vcard', vcard)
g.bind('obo', obo)
g.bind('local', local)
g.bind('localperson', localPerson)
g.bind('localorg', localOrg)
g.bind('hsh', hsh)
return g
class IdSequence:
"""Small Helper for easy sequences"""
def __init__(self, start):
self.num = start - 1
def getNext(self):
self.num += 1
return self.num
class Thing(rdfSubject):
rdf_type = OWL.Thing
label = rdfSingle(RDFS.label)
@ -84,62 +104,142 @@ class Person(hshThing):
name = rdfSingle(vcard.familyName)
hasContactInfo = rdfSingle(obo.ARG_2000028)
label = rdfSingle(RDFS.label)
associatedOe = rdfSingle(vivo.relatedBy)
class FacultyMember(Person):
rdf_type = vivo.FacultyMember
class NonFacultyAcademic(Person):
rdf_type = vivo.NonFacultyAcademic
class NonAcademic(Person):
rdf_type = vivo.NonAcademic
class ContactInfo(Thing):
rdf_type = vcard.Individual
hasTitle = rdfSingle(vcard.hasTitle)
contactInformationFor = rdfSingle(obo.ARG_2000029)
hasTitle = rdfSingle(vcard.hasTitle,range_type=vcard.Title)
contactInformationFor = rdfSingle(obo.ARG_2000029,range_type=foaf.Person)
class Title(Thing):
rdf_type = vcard.Title
title = rdfSingle(vcard.title)
title = rdfSingle(vcard.title,range_type=XSD.String)
def createRDFFromSQL():
"""Fetch data from postgres and map them into RDF"""
class Organization(hshThing):
rdf_type = foaf.Organization
label = rdfSingle(RDFS.label)
parentOe = rdfSingle(obo.BFO_0000050)
class IdSequence:
"""Small Helper for easy sequences"""
def __init__(self, start):
self.num = start - 1
def getNext(self):
self.num += 1
return self.num
def addPersonToGraph(sqlP, additionalIdSeq):
"""Add a given HsHPerson to the graph."""
combined_title = ''
if sqlP.title_prefix != None:
combined_title = sqlP.title_prefix
if sqlP.title_suffix != None:
if combined_title == '':
combined_title = sqlP.title_suffix
else:
combined_title += ' ' + sqlP.title_suffix
person_uri = URIRef("%s/%s" % (localPerson, sqlP.id))
hshThing(person_uri)
rdfP = Person(person_uri) #WTF?!
rdfP.firstname = sqlP.firstname;
rdfP.name = sqlP.name;
rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname)
# If there is a title to add, do it.
if(combined_title != ''):
title_uri = URIRef("%s/%s" % (localPerson, additionalIdSeq.getNext()))
rdfTitle = Title(title_uri)
rdfTitle.title = combined_title
contact_info_uri = URIRef("%s/%s" % (localPerson, additionalIdSeq.getNext()))
rdfCi = ContactInfo(contact_info_uri)
rdfCi.hasTitle = rdfTitle
rdfCi.contactInformationFor = rdfP
rdfP.hasContactInfo = rdfCi
return person_uri
def processPersons(session, additionalIdSeq):
"""Fetch persons, create triples for them."""
persons = session.query(HsHPerson)
for count, sqlP in enumerate(persons):
if sqlP.memberships == []:
continue #SKIP PEOPLE WITHOUT ANY MEMBERSHIPS AT ALL!
#create person
personUri = addPersonToGraph(sqlP, additionalIdSeq)
#used to determine whether person is non-academic type
isNonAcademic = True
#use memberships to determine person type
for i, membership in enumerate(sqlP.memberships):
if membership.active != 'Y':
continue #SKIP INACTIVE MEMBERSHIPS!
#This is an active membership with an id and a business_role.
if membership.mBusinessRole.name == 'ProfessorIn':
isNonAcademic = False
organization_uri = URIRef("%s/%s" % (localOrg, membership.organizational_unit))
rdfP = FacultyMember(personUri)
rdfP.associatedOe = organization_uri
if membership.mBusinessRole.name == 'WiMi':
isNonAcademic = False
organization_uri = URIRef("%s/%s" % (localOrg, membership.organizational_unit))
rdfP = NonFacultyAcademic(personUri)
rdfP.associatedOe = organization_uri
#assign non-academic person type if neccessary
if isNonAcademic:
NonAcademic(personUri)
pass
def processOrganizations(session, additionalIdSeq):
"""Generate triples for organizational units."""
organizations = session.query(HsHOrganizationalUnit)
for count, sqlO in enumerate(organizations):
organization_uri = URIRef("%s/%s" % (localOrg, sqlO.id))
rdfO = hshThing(organization_uri)
rdfO = Organization(organization_uri)
rdfO.label = sqlO.name
if sqlO.oParent != None:
parent_organization_uri = URIRef("%s/%s" % (localOrg, sqlO.oParent.id))
rdfO.parentOe = parent_organization_uri
def createTriples():
"""Fetch memberships, evaluate them, create persons,..."""
engine = create_engine("postgresql://hshinfo:hshinfotest@141.71.2.152/hshinfo")
session = sessionmaker(bind=engine)()
persons = session.query(HsHPerson)
g = get_graph()
additionalIdSeq = IdSequence(2000000000) #don't care sequence
for count, sqlP in enumerate(persons):
combined_title = ''
if sqlP.title_prefix != None:
combined_title = sqlP.title_prefix
if sqlP.title_suffix != None:
if combined_title == '':
combined_title = sqlP.title_suffix
else:
combined_title += ' ' + sqlP.title_suffix
person_uri = URIRef("%s/%s" % (local, sqlP.id))
rdfP = hshThing(person_uri)
rdfP = Person(person_uri) #WTF?!
rdfP.firstname = sqlP.firstname;
rdfP.name = sqlP.name;
rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname)
# If there is a title to add, do it.
if(combined_title != ''):
title_uri = URIRef("%s/%s" % (local, additionalIdSeq.getNext()))
rdfTitle = Title(title_uri)
rdfTitle.title = combined_title
processOrganizations(session, additionalIdSeq)
processPersons(session, additionalIdSeq)
contact_info_uri = URIRef("%s/%s" % (local, additionalIdSeq.getNext()))
rdfCi = ContactInfo(contact_info_uri)
rdfCi.hasTitle = rdfTitle
rdfCi.contactInformationFor = rdfP
rdfP.hasContactInfo = rdfCi
break
#we're done.
triples = g.serialize(format='n3')
g.close()
print(triples)
f = open('data.n3', 'wb')
f.write(triples)
f.close()
return triples
if __name__ == '__main__':
createRDFFromSQL()
writeToFile = True
triples = createTriples()
print(triples)
if writeToFile == True:
f = open('data.n3', 'wb')
f.write(triples)
f.close()