# coding=utf-8 #SQL from sqlalchemy import * from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import relationship, backref Base = declarative_base() class HsHOrganizationalUnitType(Base): __tablename__ = 'organizational_unit_type' id = Column(Integer, primary_key=True) name = Column(String) class HsHOrganizationalUnit(Base): __tablename__ = 'organizational_unit'; id = Column(Integer, primary_key=True) name = Column(String) parent = Column(Integer, ForeignKey('organizational_unit.id')) acronym = Column(String) post_address = Column(Integer) organizational_unit_type = Column(Integer, ForeignKey('organizational_unit_type.id')) oParent = relationship("HsHOrganizationalUnit", remote_side=[id]) class HsHMembership(Base): __tablename__ = 'membership' id = Column(Integer, primary_key=True) person = Column(Integer, ForeignKey('person.id')) organizational_unit = Column(Integer, ForeignKey('organizational_unit.id')) start_date = Column(DateTime) end_date = Column(DateTime) active = Column(String) business_role = Column(Integer, ForeignKey('business_role.id')) mPerson = relationship("HsHPerson") mBusinessRole = relationship("HsHBusinessRole") mOrg = relationship("HsHOrganizationalUnit") class HsHBusinessRole(Base): __tablename__ = 'business_role' id = Column(Integer, primary_key=True) name = Column(String) description = Column(String) class HsHPerson(Base): __tablename__ = 'person' id = Column(Integer, primary_key=True) name = Column(String) firstname = Column(String) title_prefix = Column(String) title_suffix = Column(String) account = Column(Integer) gender = Column(String) #birthday = Column(DateTime) #TODO: without timezone! memberships = relationship("HsHMembership") class HsHTelephone(Base): __tablename__ = 'telephone' id = Column(Integer, primary_key=True) call_number = Column(String) description = Column(String) #RDF from rdflib import Namespace from rdfalchemy import rdfSingle, rdfMultiple from rdfalchemy.rdfSubject import rdfSubject from rdflib import Literal, BNode, Namespace, URIRef from rdflib import RDF, RDFS, Graph, OWL from rdflib.namespace import XSD foaf = Namespace('http://xmlns.com/foaf/0.1/') core = Namespace('http://vivoweb.org/ontology/core#') vitro = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/0.7') vivo = Namespace('http://vivoweb.org/ontology/core#') vcard = Namespace('http://www.w3.org/2006/vcard/ns#') obo = Namespace('http://purl.obolibrary.org/obo/') hsh = Namespace('http://vivo.bib.hs-hannover.de/ontology/hshOntologie#') localPerson = Namespace('http://vivo.bib.hs-hannover.de/individual/p') localOrg = Namespace('http://vivo.bib.hs-hannover.de/individual/o') localMembership = Namespace('http://vivo.bib.hs-hannover.de/individual/m') def get_graph(): """Small little helper to create graph with namespaces ;-)""" g = rdfSubject.db g.bind('foaf', foaf) g.bind('core', core) g.bind('vitro', vitro) g.bind('vivo', vivo) g.bind('vcard', vcard) g.bind('obo', obo) g.bind('localperson', localPerson) g.bind('localorg', localOrg) g.bind('localMembership', localMembership) g.bind('hsh', hsh) return g class Thing(rdfSubject): rdf_type = OWL.Thing label = rdfSingle(RDFS.label) class hshThing(Thing): rdf_type = hsh.hshLocal class Person(hshThing): rdf_type = foaf.Person firstname = rdfSingle(vcard.givenName) name = rdfSingle(vcard.familyName) hasContactInfo = rdfSingle(obo.ARG_2000028) label = rdfSingle(RDFS.label) associatedOe = rdfSingle(vivo.relatedBy) class FacultyMember(Person): rdf_type = vivo.FacultyMember class NonFacultyAcademic(Person): rdf_type = vivo.NonFacultyAcademic class NonAcademic(Person): rdf_type = vivo.NonAcademic class ContactInfo(Thing): rdf_type = vcard.Individual hasTitle = rdfSingle(vcard.hasTitle,range_type=vcard.Title) contactInformationFor = rdfSingle(obo.ARG_2000029,range_type=foaf.Person) class Title(Thing): rdf_type = vcard.Title title = rdfSingle(vcard.title,range_type=XSD.String) class Organization(hshThing): rdf_type = foaf.Organization label = rdfSingle(RDFS.label) parentOe = rdfSingle(obo.BFO_0000050) acronym = rdfSingle(vivo.abbreviation) class OboMembership(Thing): rdf_type = obo.BFO_0000020 class Membership(OboMembership): rdf_type = vivo.Position label = rdfSingle(RDFS.label) relates = rdfMultiple(vivo.relates) dateTimeInterval = rdfSingle(vivo.dateTimeInterval) class FacultyPosition(Membership): rdf_type = vivo.FacultyPosition class NonFacultyPosition(Membership): rdf_type = vivo.NonFacultyPosition class NonAcademicPosition(Membership): rdf_type = vivo.NonAcademicPosition class OboDateTimeInterval(Thing): rdf_type = obo.BFO_0000038 class DateTimeInterval(OboDateTimeInterval): rdf_type = vivo.DateTimeInterval start = rdfSingle(vivo.start) end = rdfSingle(vivo.end) class OboDateTimeValue(Thing): rdf_type = obo.BFO_0000148 class DateTimeValue(OboDateTimeValue): rdf_type = vivo.DateTimeValue dateTime = rdfSingle(vivo.dateTime) dateTimePrecision = rdfSingle(vivo.dateTimePrecision) class IdSequence: """Small Helper for easy sequences""" def __init__(self, start): self.num = start - 1 def getNext(self): self.num += 1 return self.num def getBusinessRoleName(membership): """Gender based mapping for business role names.""" names = { "M": { 1000000000: "Beschäftigter", 1000000001: "Professor", 1000000002: "Wissenschaftlicher Mitarbeiter", 1000000003: "Auszubildender", 1000000004: "Lehrkraft für besondere Aufgaben", 1000000005: "Lehrbeauftragter", 1000000006: "Praktikant", 1000000007: "Wissenschaftliche Hilfskraft", 1000000008: "Studentische Hilfskraft", 1000000009: "Lehrkraft", 1000000010: "Leiter" }, "F": { 1000000000: "Beschäftigte", 1000000001: "Professorin", 1000000002: "Wissenschaftliche Mitarbeiterin", 1000000003: "Auszubildende", 1000000004: "Lehrkraft für besondere Aufgaben", 1000000005: "Lehrbeauftragte", 1000000006: "Praktikantin", 1000000007: "Wissenschaftliche Hilfskraft", 1000000008: "Studentische Hilfskraft", 1000000009: "Lehrkraft", 1000000010: "Leiterin" }, "?": { 1000000000: "Beschäftigte(r)", 1000000001: "ProfessorIn", 1000000002: "Wissenschaftliche(r) MitarbeiterIn", 1000000003: "Auszubildende(r)", 1000000004: "Lehrkraft für besondere Aufgaben", 1000000005: "Lehrbeauftragte(r)", 1000000006: "PraktikantIn", 1000000007: "Wissenschaftliche Hilfskraft", 1000000008: "Studentische Hilfskraft", 1000000009: "Lehrkraft", 1000000010: "LeiterIn" } } gender = membership.mPerson.gender business_role = membership.mBusinessRole.id if gender == None: gender = "?" return names[gender][business_role] def addPersonToGraph(sqlP, additionalIdSeq): """Add a given HsHPerson to the graph.""" combined_title = '' if sqlP.title_prefix != None: combined_title = sqlP.title_prefix if sqlP.title_suffix != None: if combined_title == '': combined_title = sqlP.title_suffix else: combined_title += ' ' + sqlP.title_suffix person_uri = URIRef("%s%s" % (localPerson, sqlP.id)) hshThing(person_uri) rdfP = Person(person_uri) #WTF?! rdfP.firstname = sqlP.firstname; rdfP.name = sqlP.name; rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname) # If there is a title to add, do it. if(combined_title != ''): title_uri = URIRef("%s%s" % (localPerson, additionalIdSeq.getNext())) rdfTitle = Title(title_uri) rdfTitle.title = combined_title contact_info_uri = URIRef("%s%s" % (localPerson, additionalIdSeq.getNext())) rdfCi = ContactInfo(contact_info_uri) rdfCi.hasTitle = rdfTitle rdfCi.contactInformationFor = rdfP rdfP.hasContactInfo = rdfCi return person_uri def processMembership(membership, additionalIdSeq): #create membership membershipUri = URIRef("%s%s" % (localMembership, membership.id)) personUri = URIRef("%s%s" % (localPerson, membership.mPerson.id)) organizationUri = URIRef("%s%s" % (localOrg, membership.mOrg.id)) dtiUri = URIRef("%s%s" % (localMembership, additionalIdSeq.getNext())) dtsUri = URIRef("%s%s" % (localMembership, additionalIdSeq.getNext())) dteUri = URIRef("%s%s" % (localMembership, additionalIdSeq.getNext())) rdfM = OboMembership(membershipUri) rdfM = Membership(membershipUri) rdfM.relates = [personUri, organizationUri] rdfM.label = getBusinessRoleName(membership) rdfM.dateTimeInterval = dtiUri if membership.mBusinessRole.name == "ProfessorIn": rdfM = FacultyPosition(membershipUri) elif membership.mBusinessRole.name == "WiMi": rdfM = NonFacultyPosition(membershipUri) else: rdfM = NonAcademicPosition(membershipUri) rdfDts = OboDateTimeValue(dtsUri) rdfDts = DateTimeValue(dtsUri) rdfDts.dateTime = membership.start_date rdfDts.dateTimePrecision = vivo.yearPrecision rdfDte = OboDateTimeValue(dteUri) rdfDte = DateTimeValue(dteUri) rdfDte.dateTime = membership.end_date rdfDte.dateTimePrecision = vivo.yearPrecision rdfDti = OboDateTimeInterval(dtiUri) rdfDti = DateTimeInterval(dtiUri) rdfDti.start = dtsUri rdfDti.end = dteUri def processPersons(session, additionalIdSeq): """Fetch persons, create triples for them.""" persons = session.query(HsHPerson) for count, sqlP in enumerate(persons): if sqlP.memberships == []: continue #SKIP PEOPLE WITHOUT ANY MEMBERSHIPS AT ALL! #create person personUri = addPersonToGraph(sqlP, additionalIdSeq) #used to determine whether person is non-academic type isNonAcademic = True #use memberships to determine person type for i, membership in enumerate(sqlP.memberships): if membership.active != 'Y': continue #SKIP INACTIVE MEMBERSHIPS! processMembership(membership, additionalIdSeq) #This is an active membership with an id and a business_role. if membership.mBusinessRole.name == 'ProfessorIn': isNonAcademic = False organization_uri = URIRef("%s%s" % (localOrg, membership.organizational_unit)) rdfP = FacultyMember(personUri) rdfP.associatedOe = organization_uri elif membership.mBusinessRole.name == 'WiMi': isNonAcademic = False organization_uri = URIRef("%s%s" % (localOrg, membership.organizational_unit)) rdfP = NonFacultyAcademic(personUri) rdfP.associatedOe = organization_uri #assign non-academic person type if neccessary if isNonAcademic: NonAcademic(personUri) def processOrganizations(session, additionalIdSeq): """Generate triples for organizational units.""" organizations = session.query(HsHOrganizationalUnit) for count, sqlO in enumerate(organizations): organization_uri = URIRef("%s%s" % (localOrg, sqlO.id)) rdfO = hshThing(organization_uri) rdfO = Organization(organization_uri) rdfO.label = sqlO.name if sqlO.acronym != None: rdfO.acronym = sqlO.acronym if sqlO.oParent != None: parent_organization_uri = URIRef("%s%s" % (localOrg, sqlO.oParent.id)) rdfO.parentOe = parent_organization_uri def createTriples(): """Fetch memberships, evaluate them, create persons,...""" engine = create_engine("postgresql://hshinfo:hshinfotest@141.71.2.152/hshinfo") session = sessionmaker(bind=engine)() g = get_graph() additionalIdSeq = IdSequence(2000000000) #don't care sequence processOrganizations(session, additionalIdSeq) processPersons(session, additionalIdSeq) #we're done. triples = g.serialize(format='n3') g.close() return triples if __name__ == '__main__': writeToFile = True triples = createTriples() print(triples) if writeToFile == True: f = open('data.n3', 'wb') f.write(triples) f.close()