2014-02-18 17:24:44 +01:00
|
|
|
# coding=utf-8
|
2014-02-14 15:01:34 +01:00
|
|
|
#SQL
|
|
|
|
from sqlalchemy import *
|
|
|
|
from sqlalchemy.ext.declarative import declarative_base
|
|
|
|
from sqlalchemy.orm import sessionmaker
|
2014-02-17 17:42:40 +01:00
|
|
|
from sqlalchemy.orm import relationship, backref
|
2014-02-14 15:01:34 +01:00
|
|
|
|
|
|
|
Base = declarative_base()
|
2014-02-17 17:42:40 +01:00
|
|
|
|
|
|
|
class HsHOrganizationalUnitType(Base):
|
|
|
|
__tablename__ = 'organizational_unit_type'
|
|
|
|
id = Column(Integer, primary_key=True)
|
|
|
|
name = Column(String)
|
|
|
|
|
|
|
|
class HsHOrganizationalUnit(Base):
|
|
|
|
__tablename__ = 'organizational_unit';
|
|
|
|
id = Column(Integer, primary_key=True)
|
|
|
|
name = Column(String)
|
|
|
|
parent = Column(Integer, ForeignKey('organizational_unit.id'))
|
|
|
|
acronym = Column(String)
|
|
|
|
post_address = Column(Integer)
|
|
|
|
organizational_unit_type = Column(Integer, ForeignKey('organizational_unit_type.id'))
|
|
|
|
oParent = relationship("HsHOrganizationalUnit", remote_side=[id])
|
|
|
|
|
2014-02-14 15:01:34 +01:00
|
|
|
class HsHMembership(Base):
|
|
|
|
__tablename__ = 'membership'
|
|
|
|
id = Column(Integer, primary_key=True)
|
|
|
|
person = Column(Integer, ForeignKey('person.id'))
|
2014-02-18 14:54:47 +01:00
|
|
|
organizational_unit = Column(Integer, ForeignKey('organizational_unit.id'))
|
2014-02-14 15:01:34 +01:00
|
|
|
start_date = Column(DateTime)
|
|
|
|
end_date = Column(DateTime)
|
|
|
|
active = Column(String)
|
2014-02-17 17:42:40 +01:00
|
|
|
business_role = Column(Integer, ForeignKey('business_role.id'))
|
2014-02-14 15:01:34 +01:00
|
|
|
mPerson = relationship("HsHPerson")
|
2014-02-17 17:42:40 +01:00
|
|
|
mBusinessRole = relationship("HsHBusinessRole")
|
2014-02-18 14:54:47 +01:00
|
|
|
mOrg = relationship("HsHOrganizationalUnit")
|
2014-02-17 17:42:40 +01:00
|
|
|
|
|
|
|
class HsHBusinessRole(Base):
|
|
|
|
__tablename__ = 'business_role'
|
|
|
|
id = Column(Integer, primary_key=True)
|
|
|
|
name = Column(String)
|
|
|
|
description = Column(String)
|
|
|
|
|
2014-02-14 15:01:34 +01:00
|
|
|
class HsHPerson(Base):
|
|
|
|
__tablename__ = 'person'
|
|
|
|
id = Column(Integer, primary_key=True)
|
|
|
|
name = Column(String)
|
|
|
|
firstname = Column(String)
|
|
|
|
title_prefix = Column(String)
|
|
|
|
title_suffix = Column(String)
|
|
|
|
gender = Column(String)
|
2014-02-17 17:42:40 +01:00
|
|
|
#birthday = Column(DateTime) #TODO: without timezone!
|
|
|
|
memberships = relationship("HsHMembership")
|
2014-02-14 15:01:34 +01:00
|
|
|
|
|
|
|
class HsHTelephone(Base):
|
|
|
|
__tablename__ = 'telephone'
|
|
|
|
id = Column(Integer, primary_key=True)
|
|
|
|
call_number = Column(String)
|
|
|
|
description = Column(String)
|
|
|
|
|
|
|
|
#RDF
|
|
|
|
from rdflib import Namespace
|
2014-02-18 17:48:28 +01:00
|
|
|
from rdfalchemy import rdfSingle, rdfMultiple
|
2014-02-14 15:01:34 +01:00
|
|
|
from rdfalchemy.rdfSubject import rdfSubject
|
|
|
|
|
|
|
|
from rdflib import Literal, BNode, Namespace, URIRef
|
|
|
|
from rdflib import RDF, RDFS, Graph, OWL
|
|
|
|
from rdflib.namespace import XSD
|
|
|
|
|
|
|
|
foaf = Namespace('http://xmlns.com/foaf/0.1/')
|
|
|
|
vitro = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/0.7')
|
2014-02-17 17:42:40 +01:00
|
|
|
vivo = Namespace('http://vivoweb.org/ontology/core#')
|
2014-02-14 15:01:34 +01:00
|
|
|
vcard = Namespace('http://www.w3.org/2006/vcard/ns#')
|
|
|
|
obo = Namespace('http://purl.obolibrary.org/obo/')
|
2014-03-19 17:05:24 +01:00
|
|
|
hsh = Namespace('http://data.hs-hannover.de/ontology/hshOntologie#')
|
2014-02-14 15:01:34 +01:00
|
|
|
|
2014-03-19 17:05:24 +01:00
|
|
|
localPerson = Namespace('http://data.hs-hannover.de/individual/p')
|
|
|
|
localOrg = Namespace('http://data.hs-hannover.de/individual/o')
|
|
|
|
localMembership = Namespace('http://data.hs-hannover.de/individual/m')
|
2014-02-17 17:42:40 +01:00
|
|
|
|
2014-02-14 15:01:34 +01:00
|
|
|
def get_graph():
|
|
|
|
"""Small little helper to create graph with namespaces ;-)"""
|
|
|
|
g = rdfSubject.db
|
|
|
|
g.bind('foaf', foaf)
|
|
|
|
g.bind('vitro', vitro)
|
2014-02-17 17:42:40 +01:00
|
|
|
g.bind('vivo', vivo)
|
2014-02-14 15:01:34 +01:00
|
|
|
g.bind('vcard', vcard)
|
|
|
|
g.bind('obo', obo)
|
2014-02-17 17:42:40 +01:00
|
|
|
g.bind('localperson', localPerson)
|
|
|
|
g.bind('localorg', localOrg)
|
2014-02-18 14:54:47 +01:00
|
|
|
g.bind('localMembership', localMembership)
|
2014-02-14 15:01:34 +01:00
|
|
|
g.bind('hsh', hsh)
|
|
|
|
return g
|
|
|
|
|
|
|
|
class Thing(rdfSubject):
|
|
|
|
rdf_type = OWL.Thing
|
|
|
|
label = rdfSingle(RDFS.label)
|
|
|
|
|
|
|
|
class hshThing(Thing):
|
|
|
|
rdf_type = hsh.hshLocal
|
|
|
|
|
|
|
|
class Person(hshThing):
|
|
|
|
rdf_type = foaf.Person
|
|
|
|
firstname = rdfSingle(vcard.givenName)
|
|
|
|
name = rdfSingle(vcard.familyName)
|
|
|
|
hasContactInfo = rdfSingle(obo.ARG_2000028)
|
|
|
|
label = rdfSingle(RDFS.label)
|
2014-02-17 17:42:40 +01:00
|
|
|
associatedOe = rdfSingle(vivo.relatedBy)
|
|
|
|
|
|
|
|
class FacultyMember(Person):
|
|
|
|
rdf_type = vivo.FacultyMember
|
|
|
|
|
|
|
|
class NonFacultyAcademic(Person):
|
|
|
|
rdf_type = vivo.NonFacultyAcademic
|
|
|
|
|
|
|
|
class NonAcademic(Person):
|
|
|
|
rdf_type = vivo.NonAcademic
|
2014-02-14 15:01:34 +01:00
|
|
|
|
|
|
|
class ContactInfo(Thing):
|
|
|
|
rdf_type = vcard.Individual
|
2014-02-17 17:42:40 +01:00
|
|
|
hasTitle = rdfSingle(vcard.hasTitle,range_type=vcard.Title)
|
|
|
|
contactInformationFor = rdfSingle(obo.ARG_2000029,range_type=foaf.Person)
|
2014-02-14 15:01:34 +01:00
|
|
|
|
|
|
|
class Title(Thing):
|
|
|
|
rdf_type = vcard.Title
|
2014-02-17 17:42:40 +01:00
|
|
|
title = rdfSingle(vcard.title,range_type=XSD.String)
|
|
|
|
|
|
|
|
class Organization(hshThing):
|
|
|
|
rdf_type = foaf.Organization
|
|
|
|
label = rdfSingle(RDFS.label)
|
|
|
|
parentOe = rdfSingle(obo.BFO_0000050)
|
2014-02-18 14:54:47 +01:00
|
|
|
acronym = rdfSingle(vivo.abbreviation)
|
|
|
|
|
|
|
|
class OboMembership(Thing):
|
|
|
|
rdf_type = obo.BFO_0000020
|
|
|
|
|
|
|
|
class Membership(OboMembership):
|
|
|
|
rdf_type = vivo.Position
|
|
|
|
label = rdfSingle(RDFS.label)
|
2014-02-18 17:48:28 +01:00
|
|
|
relates = rdfMultiple(vivo.relates)
|
2014-02-18 14:54:47 +01:00
|
|
|
dateTimeInterval = rdfSingle(vivo.dateTimeInterval)
|
|
|
|
|
|
|
|
class FacultyPosition(Membership):
|
|
|
|
rdf_type = vivo.FacultyPosition
|
|
|
|
|
|
|
|
class NonFacultyPosition(Membership):
|
|
|
|
rdf_type = vivo.NonFacultyPosition
|
|
|
|
|
|
|
|
class NonAcademicPosition(Membership):
|
|
|
|
rdf_type = vivo.NonAcademicPosition
|
|
|
|
|
|
|
|
class OboDateTimeInterval(Thing):
|
|
|
|
rdf_type = obo.BFO_0000038
|
|
|
|
|
|
|
|
class DateTimeInterval(OboDateTimeInterval):
|
|
|
|
rdf_type = vivo.DateTimeInterval
|
|
|
|
start = rdfSingle(vivo.start)
|
|
|
|
end = rdfSingle(vivo.end)
|
|
|
|
|
|
|
|
class OboDateTimeValue(Thing):
|
|
|
|
rdf_type = obo.BFO_0000148
|
|
|
|
|
|
|
|
class DateTimeValue(OboDateTimeValue):
|
|
|
|
rdf_type = vivo.DateTimeValue
|
|
|
|
dateTime = rdfSingle(vivo.dateTime)
|
|
|
|
dateTimePrecision = rdfSingle(vivo.dateTimePrecision)
|
2014-02-14 15:01:34 +01:00
|
|
|
|
2014-02-17 17:42:40 +01:00
|
|
|
class IdSequence:
|
|
|
|
"""Small Helper for easy sequences"""
|
|
|
|
def __init__(self, start):
|
|
|
|
self.num = start - 1
|
|
|
|
def getNext(self):
|
|
|
|
self.num += 1
|
|
|
|
return self.num
|
|
|
|
|
2014-02-18 17:24:44 +01:00
|
|
|
def getBusinessRoleName(membership):
|
|
|
|
"""Gender based mapping for business role names."""
|
|
|
|
names = {
|
|
|
|
"M": {
|
|
|
|
1000000000: "Beschäftigter",
|
|
|
|
1000000001: "Professor",
|
|
|
|
1000000002: "Wissenschaftlicher Mitarbeiter",
|
|
|
|
1000000003: "Auszubildender",
|
|
|
|
1000000004: "Lehrkraft für besondere Aufgaben",
|
|
|
|
1000000005: "Lehrbeauftragter",
|
|
|
|
1000000006: "Praktikant",
|
|
|
|
1000000007: "Wissenschaftliche Hilfskraft",
|
|
|
|
1000000008: "Studentische Hilfskraft",
|
|
|
|
1000000009: "Lehrkraft",
|
|
|
|
1000000010: "Leiter"
|
|
|
|
},
|
|
|
|
"F": {
|
|
|
|
1000000000: "Beschäftigte",
|
|
|
|
1000000001: "Professorin",
|
|
|
|
1000000002: "Wissenschaftliche Mitarbeiterin",
|
|
|
|
1000000003: "Auszubildende",
|
|
|
|
1000000004: "Lehrkraft für besondere Aufgaben",
|
|
|
|
1000000005: "Lehrbeauftragte",
|
|
|
|
1000000006: "Praktikantin",
|
|
|
|
1000000007: "Wissenschaftliche Hilfskraft",
|
|
|
|
1000000008: "Studentische Hilfskraft",
|
|
|
|
1000000009: "Lehrkraft",
|
|
|
|
1000000010: "Leiterin"
|
|
|
|
},
|
|
|
|
"?": {
|
|
|
|
1000000000: "Beschäftigte(r)",
|
|
|
|
1000000001: "ProfessorIn",
|
|
|
|
1000000002: "Wissenschaftliche(r) MitarbeiterIn",
|
|
|
|
1000000003: "Auszubildende(r)",
|
|
|
|
1000000004: "Lehrkraft für besondere Aufgaben",
|
|
|
|
1000000005: "Lehrbeauftragte(r)",
|
|
|
|
1000000006: "PraktikantIn",
|
|
|
|
1000000007: "Wissenschaftliche Hilfskraft",
|
|
|
|
1000000008: "Studentische Hilfskraft",
|
|
|
|
1000000009: "Lehrkraft",
|
|
|
|
1000000010: "LeiterIn"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
gender = membership.mPerson.gender
|
|
|
|
business_role = membership.mBusinessRole.id
|
|
|
|
if gender == None:
|
|
|
|
gender = "?"
|
|
|
|
return names[gender][business_role]
|
|
|
|
|
2014-02-17 17:42:40 +01:00
|
|
|
def addPersonToGraph(sqlP, additionalIdSeq):
|
|
|
|
"""Add a given HsHPerson to the graph."""
|
|
|
|
combined_title = ''
|
|
|
|
if sqlP.title_prefix != None:
|
|
|
|
combined_title = sqlP.title_prefix
|
|
|
|
if sqlP.title_suffix != None:
|
|
|
|
if combined_title == '':
|
|
|
|
combined_title = sqlP.title_suffix
|
|
|
|
else:
|
|
|
|
combined_title += ' ' + sqlP.title_suffix
|
|
|
|
|
2014-02-18 14:54:47 +01:00
|
|
|
person_uri = URIRef("%s%s" % (localPerson, sqlP.id))
|
2014-02-17 17:42:40 +01:00
|
|
|
hshThing(person_uri)
|
|
|
|
rdfP = Person(person_uri) #WTF?!
|
|
|
|
rdfP.firstname = sqlP.firstname;
|
|
|
|
rdfP.name = sqlP.name;
|
|
|
|
rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname)
|
|
|
|
# If there is a title to add, do it.
|
|
|
|
if(combined_title != ''):
|
2014-02-18 14:54:47 +01:00
|
|
|
title_uri = URIRef("%s%s" % (localPerson, additionalIdSeq.getNext()))
|
2014-02-17 17:42:40 +01:00
|
|
|
rdfTitle = Title(title_uri)
|
|
|
|
rdfTitle.title = combined_title
|
|
|
|
|
2014-02-18 14:54:47 +01:00
|
|
|
contact_info_uri = URIRef("%s%s" % (localPerson, additionalIdSeq.getNext()))
|
2014-02-17 17:42:40 +01:00
|
|
|
rdfCi = ContactInfo(contact_info_uri)
|
|
|
|
rdfCi.hasTitle = rdfTitle
|
|
|
|
rdfCi.contactInformationFor = rdfP
|
|
|
|
|
|
|
|
rdfP.hasContactInfo = rdfCi
|
|
|
|
return person_uri
|
|
|
|
|
2014-02-18 14:54:47 +01:00
|
|
|
def processMembership(membership, additionalIdSeq):
|
|
|
|
#create membership
|
|
|
|
membershipUri = URIRef("%s%s" % (localMembership, membership.id))
|
|
|
|
personUri = URIRef("%s%s" % (localPerson, membership.mPerson.id))
|
|
|
|
organizationUri = URIRef("%s%s" % (localOrg, membership.mOrg.id))
|
|
|
|
|
2014-03-01 11:20:22 +01:00
|
|
|
#dtiUri = URIRef("%s%s" % (localMembership, additionalIdSeq.getNext()))
|
|
|
|
#dtsUri = URIRef("%s%s" % (localMembership, additionalIdSeq.getNext()))
|
|
|
|
#dteUri = URIRef("%s%s" % (localMembership, additionalIdSeq.getNext()))
|
2014-02-18 14:54:47 +01:00
|
|
|
|
|
|
|
rdfM = OboMembership(membershipUri)
|
|
|
|
rdfM = Membership(membershipUri)
|
2014-02-18 17:48:28 +01:00
|
|
|
rdfM.relates = [personUri, organizationUri]
|
2014-02-18 17:24:44 +01:00
|
|
|
rdfM.label = getBusinessRoleName(membership)
|
2014-03-01 11:20:22 +01:00
|
|
|
#rdfM.dateTimeInterval = dtiUri
|
2014-02-18 14:54:47 +01:00
|
|
|
|
|
|
|
if membership.mBusinessRole.name == "ProfessorIn":
|
|
|
|
rdfM = FacultyPosition(membershipUri)
|
|
|
|
elif membership.mBusinessRole.name == "WiMi":
|
|
|
|
rdfM = NonFacultyPosition(membershipUri)
|
|
|
|
else:
|
|
|
|
rdfM = NonAcademicPosition(membershipUri)
|
|
|
|
|
2014-03-01 11:20:22 +01:00
|
|
|
#rdfDts = OboDateTimeValue(dtsUri)
|
|
|
|
#rdfDts = DateTimeValue(dtsUri)
|
|
|
|
#rdfDts.dateTime = membership.start_date
|
|
|
|
#rdfDts.dateTimePrecision = vivo.yearPrecision
|
2014-02-18 14:54:47 +01:00
|
|
|
|
2014-03-01 11:20:22 +01:00
|
|
|
#rdfDte = OboDateTimeValue(dteUri)
|
|
|
|
#rdfDte = DateTimeValue(dteUri)
|
|
|
|
#rdfDte.dateTime = membership.end_date
|
|
|
|
#rdfDte.dateTimePrecision = vivo.yearPrecision
|
2014-02-18 14:54:47 +01:00
|
|
|
|
2014-03-01 11:20:22 +01:00
|
|
|
#rdfDti = OboDateTimeInterval(dtiUri)
|
|
|
|
#rdfDti = DateTimeInterval(dtiUri)
|
|
|
|
#rdfDti.start = dtsUri
|
|
|
|
#rdfDti.end = dteUri
|
2014-02-18 14:54:47 +01:00
|
|
|
|
2014-02-17 17:42:40 +01:00
|
|
|
def processPersons(session, additionalIdSeq):
|
|
|
|
"""Fetch persons, create triples for them."""
|
|
|
|
persons = session.query(HsHPerson)
|
|
|
|
for count, sqlP in enumerate(persons):
|
|
|
|
if sqlP.memberships == []:
|
|
|
|
continue #SKIP PEOPLE WITHOUT ANY MEMBERSHIPS AT ALL!
|
|
|
|
|
|
|
|
#create person
|
|
|
|
personUri = addPersonToGraph(sqlP, additionalIdSeq)
|
|
|
|
|
|
|
|
#used to determine whether person is non-academic type
|
|
|
|
isNonAcademic = True
|
|
|
|
|
|
|
|
#use memberships to determine person type
|
|
|
|
for i, membership in enumerate(sqlP.memberships):
|
|
|
|
if membership.active != 'Y':
|
|
|
|
continue #SKIP INACTIVE MEMBERSHIPS!
|
|
|
|
|
2014-02-18 14:54:47 +01:00
|
|
|
processMembership(membership, additionalIdSeq)
|
|
|
|
|
2014-02-17 17:42:40 +01:00
|
|
|
#This is an active membership with an id and a business_role.
|
|
|
|
if membership.mBusinessRole.name == 'ProfessorIn':
|
|
|
|
isNonAcademic = False
|
2014-02-18 14:54:47 +01:00
|
|
|
organization_uri = URIRef("%s%s" % (localOrg, membership.organizational_unit))
|
2014-02-17 17:42:40 +01:00
|
|
|
rdfP = FacultyMember(personUri)
|
|
|
|
rdfP.associatedOe = organization_uri
|
2014-02-18 14:54:47 +01:00
|
|
|
elif membership.mBusinessRole.name == 'WiMi':
|
2014-02-17 17:42:40 +01:00
|
|
|
isNonAcademic = False
|
2014-02-18 14:54:47 +01:00
|
|
|
organization_uri = URIRef("%s%s" % (localOrg, membership.organizational_unit))
|
2014-02-17 17:42:40 +01:00
|
|
|
rdfP = NonFacultyAcademic(personUri)
|
|
|
|
rdfP.associatedOe = organization_uri
|
2014-02-18 13:15:29 +01:00
|
|
|
|
2014-02-17 17:42:40 +01:00
|
|
|
#assign non-academic person type if neccessary
|
|
|
|
if isNonAcademic:
|
|
|
|
NonAcademic(personUri)
|
|
|
|
|
|
|
|
def processOrganizations(session, additionalIdSeq):
|
|
|
|
"""Generate triples for organizational units."""
|
|
|
|
organizations = session.query(HsHOrganizationalUnit)
|
|
|
|
for count, sqlO in enumerate(organizations):
|
2014-02-18 14:54:47 +01:00
|
|
|
organization_uri = URIRef("%s%s" % (localOrg, sqlO.id))
|
2014-02-17 17:42:40 +01:00
|
|
|
rdfO = hshThing(organization_uri)
|
|
|
|
rdfO = Organization(organization_uri)
|
|
|
|
rdfO.label = sqlO.name
|
2014-02-18 14:54:47 +01:00
|
|
|
if sqlO.acronym != None:
|
|
|
|
rdfO.acronym = sqlO.acronym
|
2014-02-17 17:42:40 +01:00
|
|
|
if sqlO.oParent != None:
|
2014-02-18 14:54:47 +01:00
|
|
|
parent_organization_uri = URIRef("%s%s" % (localOrg, sqlO.oParent.id))
|
2014-02-17 17:42:40 +01:00
|
|
|
rdfO.parentOe = parent_organization_uri
|
|
|
|
|
|
|
|
def createTriples():
|
|
|
|
"""Fetch memberships, evaluate them, create persons,..."""
|
2014-02-14 15:01:34 +01:00
|
|
|
engine = create_engine("postgresql://hshinfo:hshinfotest@141.71.2.152/hshinfo")
|
|
|
|
session = sessionmaker(bind=engine)()
|
|
|
|
|
|
|
|
g = get_graph()
|
|
|
|
additionalIdSeq = IdSequence(2000000000) #don't care sequence
|
2014-02-17 17:42:40 +01:00
|
|
|
|
|
|
|
processOrganizations(session, additionalIdSeq)
|
|
|
|
processPersons(session, additionalIdSeq)
|
|
|
|
|
|
|
|
#we're done.
|
2014-02-14 15:01:34 +01:00
|
|
|
triples = g.serialize(format='n3')
|
|
|
|
g.close()
|
|
|
|
|
2014-02-17 17:42:40 +01:00
|
|
|
return triples
|
2014-02-14 15:01:34 +01:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2014-02-17 17:42:40 +01:00
|
|
|
writeToFile = True
|
|
|
|
|
|
|
|
triples = createTriples()
|
|
|
|
print(triples)
|
|
|
|
|
|
|
|
if writeToFile == True:
|
|
|
|
f = open('data.n3', 'wb')
|
|
|
|
f.write(triples)
|
2014-03-19 17:05:24 +01:00
|
|
|
f.close()
|