From e7e9ab3d23030fc01b0f717292d437538f0d69ad Mon Sep 17 00:00:00 2001 From: Jan Philipp Timme Date: Mon, 17 Feb 2014 17:42:40 +0100 Subject: [PATCH] [TASK] Now supports OE-Tree and OE<->Person relationships. Also supports different types of persons. --- triplify.py | 210 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 155 insertions(+), 55 deletions(-) diff --git a/triplify.py b/triplify.py index 3286fd4..090640f 100644 --- a/triplify.py +++ b/triplify.py @@ -2,9 +2,25 @@ from sqlalchemy import * from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import relationship, backref Base = declarative_base() -""" + +class HsHOrganizationalUnitType(Base): + __tablename__ = 'organizational_unit_type' + id = Column(Integer, primary_key=True) + name = Column(String) + +class HsHOrganizationalUnit(Base): + __tablename__ = 'organizational_unit'; + id = Column(Integer, primary_key=True) + name = Column(String) + parent = Column(Integer, ForeignKey('organizational_unit.id')) + acronym = Column(String) + post_address = Column(Integer) + organizational_unit_type = Column(Integer, ForeignKey('organizational_unit_type.id')) + oParent = relationship("HsHOrganizationalUnit", remote_side=[id]) + class HsHMembership(Base): __tablename__ = 'membership' id = Column(Integer, primary_key=True) @@ -13,9 +29,16 @@ class HsHMembership(Base): start_date = Column(DateTime) end_date = Column(DateTime) active = Column(String) - business_role = Column(Integer) + business_role = Column(Integer, ForeignKey('business_role.id')) mPerson = relationship("HsHPerson") -""" + mBusinessRole = relationship("HsHBusinessRole") + +class HsHBusinessRole(Base): + __tablename__ = 'business_role' + id = Column(Integer, primary_key=True) + name = Column(String) + description = Column(String) + class HsHPerson(Base): __tablename__ = 'person' id = Column(Integer, primary_key=True) @@ -25,8 +48,8 @@ class HsHPerson(Base): title_suffix = Column(String) account = Column(Integer) gender = Column(String) - birthday = Column(DateTime) #TODO: without timezone! - #memberships = relationship("HsHMembership", backref='person') + #birthday = Column(DateTime) #TODO: without timezone! + memberships = relationship("HsHMembership") class HsHTelephone(Base): __tablename__ = 'telephone' @@ -46,31 +69,28 @@ from rdflib.namespace import XSD foaf = Namespace('http://xmlns.com/foaf/0.1/') core = Namespace('http://vivoweb.org/ontology/core#') vitro = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/0.7') +vivo = Namespace('http://vivoweb.org/ontology/core#') vcard = Namespace('http://www.w3.org/2006/vcard/ns#') obo = Namespace('http://purl.obolibrary.org/obo/') -local = Namespace('http://vivo.bib.hs-hannover.de/person') hsh = Namespace('http://vivo.bib.hs-hannover.de/ontology/hshOntologie#') +localPerson = Namespace('http://vivo.bib.hs-hannover.de/person') +localOrg = Namespace('http://vivo.bib.hs-hannover.de/org') + def get_graph(): """Small little helper to create graph with namespaces ;-)""" g = rdfSubject.db g.bind('foaf', foaf) g.bind('core', core) g.bind('vitro', vitro) + g.bind('vivo', vivo) g.bind('vcard', vcard) g.bind('obo', obo) - g.bind('local', local) + g.bind('localperson', localPerson) + g.bind('localorg', localOrg) g.bind('hsh', hsh) return g -class IdSequence: - """Small Helper for easy sequences""" - def __init__(self, start): - self.num = start - 1 - def getNext(self): - self.num += 1 - return self.num - class Thing(rdfSubject): rdf_type = OWL.Thing label = rdfSingle(RDFS.label) @@ -84,62 +104,142 @@ class Person(hshThing): name = rdfSingle(vcard.familyName) hasContactInfo = rdfSingle(obo.ARG_2000028) label = rdfSingle(RDFS.label) + associatedOe = rdfSingle(vivo.relatedBy) + +class FacultyMember(Person): + rdf_type = vivo.FacultyMember + +class NonFacultyAcademic(Person): + rdf_type = vivo.NonFacultyAcademic + +class NonAcademic(Person): + rdf_type = vivo.NonAcademic class ContactInfo(Thing): rdf_type = vcard.Individual - hasTitle = rdfSingle(vcard.hasTitle) - contactInformationFor = rdfSingle(obo.ARG_2000029) + hasTitle = rdfSingle(vcard.hasTitle,range_type=vcard.Title) + contactInformationFor = rdfSingle(obo.ARG_2000029,range_type=foaf.Person) class Title(Thing): rdf_type = vcard.Title - title = rdfSingle(vcard.title) + title = rdfSingle(vcard.title,range_type=XSD.String) -def createRDFFromSQL(): - """Fetch data from postgres and map them into RDF""" +class Organization(hshThing): + rdf_type = foaf.Organization + label = rdfSingle(RDFS.label) + parentOe = rdfSingle(obo.BFO_0000050) + +class IdSequence: + """Small Helper for easy sequences""" + def __init__(self, start): + self.num = start - 1 + def getNext(self): + self.num += 1 + return self.num + +def addPersonToGraph(sqlP, additionalIdSeq): + """Add a given HsHPerson to the graph.""" + combined_title = '' + if sqlP.title_prefix != None: + combined_title = sqlP.title_prefix + if sqlP.title_suffix != None: + if combined_title == '': + combined_title = sqlP.title_suffix + else: + combined_title += ' ' + sqlP.title_suffix + + person_uri = URIRef("%s/%s" % (localPerson, sqlP.id)) + hshThing(person_uri) + rdfP = Person(person_uri) #WTF?! + rdfP.firstname = sqlP.firstname; + rdfP.name = sqlP.name; + rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname) + # If there is a title to add, do it. + if(combined_title != ''): + title_uri = URIRef("%s/%s" % (localPerson, additionalIdSeq.getNext())) + rdfTitle = Title(title_uri) + rdfTitle.title = combined_title + + contact_info_uri = URIRef("%s/%s" % (localPerson, additionalIdSeq.getNext())) + rdfCi = ContactInfo(contact_info_uri) + rdfCi.hasTitle = rdfTitle + rdfCi.contactInformationFor = rdfP + + rdfP.hasContactInfo = rdfCi + return person_uri + +def processPersons(session, additionalIdSeq): + """Fetch persons, create triples for them.""" + persons = session.query(HsHPerson) + for count, sqlP in enumerate(persons): + if sqlP.memberships == []: + continue #SKIP PEOPLE WITHOUT ANY MEMBERSHIPS AT ALL! + + #create person + personUri = addPersonToGraph(sqlP, additionalIdSeq) + + #used to determine whether person is non-academic type + isNonAcademic = True + + #use memberships to determine person type + for i, membership in enumerate(sqlP.memberships): + if membership.active != 'Y': + continue #SKIP INACTIVE MEMBERSHIPS! + + #This is an active membership with an id and a business_role. + if membership.mBusinessRole.name == 'ProfessorIn': + isNonAcademic = False + organization_uri = URIRef("%s/%s" % (localOrg, membership.organizational_unit)) + rdfP = FacultyMember(personUri) + rdfP.associatedOe = organization_uri + + if membership.mBusinessRole.name == 'WiMi': + isNonAcademic = False + organization_uri = URIRef("%s/%s" % (localOrg, membership.organizational_unit)) + rdfP = NonFacultyAcademic(personUri) + rdfP.associatedOe = organization_uri + + #assign non-academic person type if neccessary + if isNonAcademic: + NonAcademic(personUri) + pass + +def processOrganizations(session, additionalIdSeq): + """Generate triples for organizational units.""" + organizations = session.query(HsHOrganizationalUnit) + for count, sqlO in enumerate(organizations): + organization_uri = URIRef("%s/%s" % (localOrg, sqlO.id)) + rdfO = hshThing(organization_uri) + rdfO = Organization(organization_uri) + rdfO.label = sqlO.name + if sqlO.oParent != None: + parent_organization_uri = URIRef("%s/%s" % (localOrg, sqlO.oParent.id)) + rdfO.parentOe = parent_organization_uri + +def createTriples(): + """Fetch memberships, evaluate them, create persons,...""" engine = create_engine("postgresql://hshinfo:hshinfotest@141.71.2.152/hshinfo") session = sessionmaker(bind=engine)() - persons = session.query(HsHPerson) g = get_graph() - additionalIdSeq = IdSequence(2000000000) #don't care sequence - for count, sqlP in enumerate(persons): - combined_title = '' - if sqlP.title_prefix != None: - combined_title = sqlP.title_prefix - if sqlP.title_suffix != None: - if combined_title == '': - combined_title = sqlP.title_suffix - else: - combined_title += ' ' + sqlP.title_suffix - person_uri = URIRef("%s/%s" % (local, sqlP.id)) - rdfP = hshThing(person_uri) - rdfP = Person(person_uri) #WTF?! - rdfP.firstname = sqlP.firstname; - rdfP.name = sqlP.name; - rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname) - # If there is a title to add, do it. - if(combined_title != ''): - title_uri = URIRef("%s/%s" % (local, additionalIdSeq.getNext())) - rdfTitle = Title(title_uri) - rdfTitle.title = combined_title + processOrganizations(session, additionalIdSeq) + processPersons(session, additionalIdSeq) - contact_info_uri = URIRef("%s/%s" % (local, additionalIdSeq.getNext())) - rdfCi = ContactInfo(contact_info_uri) - rdfCi.hasTitle = rdfTitle - rdfCi.contactInformationFor = rdfP - - rdfP.hasContactInfo = rdfCi - break + #we're done. triples = g.serialize(format='n3') g.close() - print(triples) - - f = open('data.n3', 'wb') - f.write(triples) - f.close() + return triples if __name__ == '__main__': - createRDFFromSQL() \ No newline at end of file + writeToFile = True + + triples = createTriples() + print(triples) + + if writeToFile == True: + f = open('data.n3', 'wb') + f.write(triples) + f.close() \ No newline at end of file