#SQL from sqlalchemy import * from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import relationship, backref Base = declarative_base() class HsHOrganizationalUnitType(Base): __tablename__ = 'organizational_unit_type' id = Column(Integer, primary_key=True) name = Column(String) class HsHOrganizationalUnit(Base): __tablename__ = 'organizational_unit'; id = Column(Integer, primary_key=True) name = Column(String) parent = Column(Integer, ForeignKey('organizational_unit.id')) acronym = Column(String) post_address = Column(Integer) organizational_unit_type = Column(Integer, ForeignKey('organizational_unit_type.id')) oParent = relationship("HsHOrganizationalUnit", remote_side=[id]) class HsHMembership(Base): __tablename__ = 'membership' id = Column(Integer, primary_key=True) person = Column(Integer, ForeignKey('person.id')) organizational_unit = Column(Integer) start_date = Column(DateTime) end_date = Column(DateTime) active = Column(String) business_role = Column(Integer, ForeignKey('business_role.id')) mPerson = relationship("HsHPerson") mBusinessRole = relationship("HsHBusinessRole") class HsHBusinessRole(Base): __tablename__ = 'business_role' id = Column(Integer, primary_key=True) name = Column(String) description = Column(String) class HsHPerson(Base): __tablename__ = 'person' id = Column(Integer, primary_key=True) name = Column(String) firstname = Column(String) title_prefix = Column(String) title_suffix = Column(String) account = Column(Integer) gender = Column(String) #birthday = Column(DateTime) #TODO: without timezone! memberships = relationship("HsHMembership") class HsHTelephone(Base): __tablename__ = 'telephone' id = Column(Integer, primary_key=True) call_number = Column(String) description = Column(String) #RDF from rdflib import Namespace from rdfalchemy import rdfSingle from rdfalchemy.rdfSubject import rdfSubject from rdflib import Literal, BNode, Namespace, URIRef from rdflib import RDF, RDFS, Graph, OWL from rdflib.namespace import XSD foaf = Namespace('http://xmlns.com/foaf/0.1/') core = Namespace('http://vivoweb.org/ontology/core#') vitro = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/0.7') vivo = Namespace('http://vivoweb.org/ontology/core#') vcard = Namespace('http://www.w3.org/2006/vcard/ns#') obo = Namespace('http://purl.obolibrary.org/obo/') hsh = Namespace('http://vivo.bib.hs-hannover.de/ontology/hshOntologie#') localPerson = Namespace('http://vivo.bib.hs-hannover.de/individual/person') localOrg = Namespace('http://vivo.bib.hs-hannover.de/individual/org') def get_graph(): """Small little helper to create graph with namespaces ;-)""" g = rdfSubject.db g.bind('foaf', foaf) g.bind('core', core) g.bind('vitro', vitro) g.bind('vivo', vivo) g.bind('vcard', vcard) g.bind('obo', obo) g.bind('localperson', localPerson) g.bind('localorg', localOrg) g.bind('hsh', hsh) return g class Thing(rdfSubject): rdf_type = OWL.Thing label = rdfSingle(RDFS.label) class hshThing(Thing): rdf_type = hsh.hshLocal class Person(hshThing): rdf_type = foaf.Person firstname = rdfSingle(vcard.givenName) name = rdfSingle(vcard.familyName) hasContactInfo = rdfSingle(obo.ARG_2000028) label = rdfSingle(RDFS.label) associatedOe = rdfSingle(vivo.relatedBy) class FacultyMember(Person): rdf_type = vivo.FacultyMember class NonFacultyAcademic(Person): rdf_type = vivo.NonFacultyAcademic class NonAcademic(Person): rdf_type = vivo.NonAcademic class ContactInfo(Thing): rdf_type = vcard.Individual hasTitle = rdfSingle(vcard.hasTitle,range_type=vcard.Title) contactInformationFor = rdfSingle(obo.ARG_2000029,range_type=foaf.Person) class Title(Thing): rdf_type = vcard.Title title = rdfSingle(vcard.title,range_type=XSD.String) class Organization(hshThing): rdf_type = foaf.Organization label = rdfSingle(RDFS.label) parentOe = rdfSingle(obo.BFO_0000050) class IdSequence: """Small Helper for easy sequences""" def __init__(self, start): self.num = start - 1 def getNext(self): self.num += 1 return self.num def addPersonToGraph(sqlP, additionalIdSeq): """Add a given HsHPerson to the graph.""" combined_title = '' if sqlP.title_prefix != None: combined_title = sqlP.title_prefix if sqlP.title_suffix != None: if combined_title == '': combined_title = sqlP.title_suffix else: combined_title += ' ' + sqlP.title_suffix person_uri = URIRef("%s/%s" % (localPerson, sqlP.id)) hshThing(person_uri) rdfP = Person(person_uri) #WTF?! rdfP.firstname = sqlP.firstname; rdfP.name = sqlP.name; rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname) # If there is a title to add, do it. if(combined_title != ''): title_uri = URIRef("%s/%s" % (localPerson, additionalIdSeq.getNext())) rdfTitle = Title(title_uri) rdfTitle.title = combined_title contact_info_uri = URIRef("%s/%s" % (localPerson, additionalIdSeq.getNext())) rdfCi = ContactInfo(contact_info_uri) rdfCi.hasTitle = rdfTitle rdfCi.contactInformationFor = rdfP rdfP.hasContactInfo = rdfCi return person_uri def processPersons(session, additionalIdSeq): """Fetch persons, create triples for them.""" persons = session.query(HsHPerson) for count, sqlP in enumerate(persons): if sqlP.memberships == []: continue #SKIP PEOPLE WITHOUT ANY MEMBERSHIPS AT ALL! #create person personUri = addPersonToGraph(sqlP, additionalIdSeq) #used to determine whether person is non-academic type isNonAcademic = True #use memberships to determine person type for i, membership in enumerate(sqlP.memberships): if membership.active != 'Y': continue #SKIP INACTIVE MEMBERSHIPS! #This is an active membership with an id and a business_role. if membership.mBusinessRole.name == 'ProfessorIn': isNonAcademic = False organization_uri = URIRef("%s/%s" % (localOrg, membership.organizational_unit)) rdfP = FacultyMember(personUri) rdfP.associatedOe = organization_uri if membership.mBusinessRole.name == 'WiMi': isNonAcademic = False organization_uri = URIRef("%s/%s" % (localOrg, membership.organizational_unit)) rdfP = NonFacultyAcademic(personUri) rdfP.associatedOe = organization_uri #assign non-academic person type if neccessary if isNonAcademic: NonAcademic(personUri) pass def processOrganizations(session, additionalIdSeq): """Generate triples for organizational units.""" organizations = session.query(HsHOrganizationalUnit) for count, sqlO in enumerate(organizations): organization_uri = URIRef("%s/%s" % (localOrg, sqlO.id)) rdfO = hshThing(organization_uri) rdfO = Organization(organization_uri) rdfO.label = sqlO.name if sqlO.oParent != None: parent_organization_uri = URIRef("%s/%s" % (localOrg, sqlO.oParent.id)) rdfO.parentOe = parent_organization_uri def createTriples(): """Fetch memberships, evaluate them, create persons,...""" engine = create_engine("postgresql://hshinfo:hshinfotest@141.71.2.152/hshinfo") session = sessionmaker(bind=engine)() g = get_graph() additionalIdSeq = IdSequence(2000000000) #don't care sequence processOrganizations(session, additionalIdSeq) processPersons(session, additionalIdSeq) #we're done. triples = g.serialize(format='n3') g.close() return triples if __name__ == '__main__': writeToFile = True triples = createTriples() print(triples) if writeToFile == True: f = open('data.n3', 'wb') f.write(triples) f.close()