commit bfc2ad53730ec4ec6c51bf66f60f70c8f683ff59 Author: Jan Philipp Timme Date: Fri Feb 14 15:01:34 2014 +0100 [TASK] Initial commit. Working tool. diff --git a/triplify.py b/triplify.py new file mode 100644 index 0000000..3286fd4 --- /dev/null +++ b/triplify.py @@ -0,0 +1,145 @@ +#SQL +from sqlalchemy import * +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker + +Base = declarative_base() +""" +class HsHMembership(Base): + __tablename__ = 'membership' + id = Column(Integer, primary_key=True) + person = Column(Integer, ForeignKey('person.id')) + organizational_unit = Column(Integer) + start_date = Column(DateTime) + end_date = Column(DateTime) + active = Column(String) + business_role = Column(Integer) + mPerson = relationship("HsHPerson") +""" +class HsHPerson(Base): + __tablename__ = 'person' + id = Column(Integer, primary_key=True) + name = Column(String) + firstname = Column(String) + title_prefix = Column(String) + title_suffix = Column(String) + account = Column(Integer) + gender = Column(String) + birthday = Column(DateTime) #TODO: without timezone! + #memberships = relationship("HsHMembership", backref='person') + +class HsHTelephone(Base): + __tablename__ = 'telephone' + id = Column(Integer, primary_key=True) + call_number = Column(String) + description = Column(String) + +#RDF +from rdflib import Namespace +from rdfalchemy import rdfSingle +from rdfalchemy.rdfSubject import rdfSubject + +from rdflib import Literal, BNode, Namespace, URIRef +from rdflib import RDF, RDFS, Graph, OWL +from rdflib.namespace import XSD + +foaf = Namespace('http://xmlns.com/foaf/0.1/') +core = Namespace('http://vivoweb.org/ontology/core#') +vitro = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/0.7') +vcard = Namespace('http://www.w3.org/2006/vcard/ns#') +obo = Namespace('http://purl.obolibrary.org/obo/') +local = Namespace('http://vivo.bib.hs-hannover.de/person') +hsh = Namespace('http://vivo.bib.hs-hannover.de/ontology/hshOntologie#') + +def get_graph(): + """Small little helper to create graph with namespaces ;-)""" + g = rdfSubject.db + g.bind('foaf', foaf) + g.bind('core', core) + g.bind('vitro', vitro) + g.bind('vcard', vcard) + g.bind('obo', obo) + g.bind('local', local) + g.bind('hsh', hsh) + return g + +class IdSequence: + """Small Helper for easy sequences""" + def __init__(self, start): + self.num = start - 1 + def getNext(self): + self.num += 1 + return self.num + +class Thing(rdfSubject): + rdf_type = OWL.Thing + label = rdfSingle(RDFS.label) + +class hshThing(Thing): + rdf_type = hsh.hshLocal + +class Person(hshThing): + rdf_type = foaf.Person + firstname = rdfSingle(vcard.givenName) + name = rdfSingle(vcard.familyName) + hasContactInfo = rdfSingle(obo.ARG_2000028) + label = rdfSingle(RDFS.label) + +class ContactInfo(Thing): + rdf_type = vcard.Individual + hasTitle = rdfSingle(vcard.hasTitle) + contactInformationFor = rdfSingle(obo.ARG_2000029) + +class Title(Thing): + rdf_type = vcard.Title + title = rdfSingle(vcard.title) + +def createRDFFromSQL(): + """Fetch data from postgres and map them into RDF""" + engine = create_engine("postgresql://hshinfo:hshinfotest@141.71.2.152/hshinfo") + session = sessionmaker(bind=engine)() + persons = session.query(HsHPerson) + + g = get_graph() + + additionalIdSeq = IdSequence(2000000000) #don't care sequence + for count, sqlP in enumerate(persons): + combined_title = '' + if sqlP.title_prefix != None: + combined_title = sqlP.title_prefix + if sqlP.title_suffix != None: + if combined_title == '': + combined_title = sqlP.title_suffix + else: + combined_title += ' ' + sqlP.title_suffix + + person_uri = URIRef("%s/%s" % (local, sqlP.id)) + rdfP = hshThing(person_uri) + rdfP = Person(person_uri) #WTF?! + rdfP.firstname = sqlP.firstname; + rdfP.name = sqlP.name; + rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname) + # If there is a title to add, do it. + if(combined_title != ''): + title_uri = URIRef("%s/%s" % (local, additionalIdSeq.getNext())) + rdfTitle = Title(title_uri) + rdfTitle.title = combined_title + + contact_info_uri = URIRef("%s/%s" % (local, additionalIdSeq.getNext())) + rdfCi = ContactInfo(contact_info_uri) + rdfCi.hasTitle = rdfTitle + rdfCi.contactInformationFor = rdfP + + rdfP.hasContactInfo = rdfCi + break + triples = g.serialize(format='n3') + g.close() + + print(triples) + + f = open('data.n3', 'wb') + f.write(triples) + f.close() + +if __name__ == '__main__': + createRDFFromSQL() \ No newline at end of file