[TASK] Now supports OE-Tree and OE<->Person relationships.
Also supports different types of persons.
This commit is contained in:
		
							parent
							
								
									bfc2ad5373
								
							
						
					
					
						commit
						e7e9ab3d23
					
				
							
								
								
									
										210
									
								
								triplify.py
									
									
									
									
									
								
							
							
						
						
									
										210
									
								
								triplify.py
									
									
									
									
									
								
							| @ -2,9 +2,25 @@ | ||||
| from sqlalchemy import * | ||||
| from sqlalchemy.ext.declarative import declarative_base | ||||
| from sqlalchemy.orm import sessionmaker | ||||
| from sqlalchemy.orm import relationship, backref | ||||
| 
 | ||||
| Base = declarative_base() | ||||
| """ | ||||
| 
 | ||||
| class HsHOrganizationalUnitType(Base): | ||||
| 	__tablename__ = 'organizational_unit_type' | ||||
| 	id = Column(Integer, primary_key=True) | ||||
| 	name = Column(String) | ||||
| 
 | ||||
| class HsHOrganizationalUnit(Base): | ||||
| 	__tablename__ = 'organizational_unit'; | ||||
| 	id = Column(Integer, primary_key=True) | ||||
| 	name = Column(String) | ||||
| 	parent = Column(Integer, ForeignKey('organizational_unit.id')) | ||||
| 	acronym = Column(String) | ||||
| 	post_address = Column(Integer) | ||||
| 	organizational_unit_type = Column(Integer, ForeignKey('organizational_unit_type.id')) | ||||
| 	oParent = relationship("HsHOrganizationalUnit", remote_side=[id]) | ||||
| 
 | ||||
| class HsHMembership(Base): | ||||
| 	__tablename__ = 'membership' | ||||
| 	id = Column(Integer, primary_key=True) | ||||
| @ -13,9 +29,16 @@ class HsHMembership(Base): | ||||
| 	start_date = Column(DateTime) | ||||
| 	end_date = Column(DateTime) | ||||
| 	active = Column(String) | ||||
| 	business_role = Column(Integer) | ||||
| 	business_role = Column(Integer, ForeignKey('business_role.id')) | ||||
| 	mPerson = relationship("HsHPerson") | ||||
| """ | ||||
| 	mBusinessRole = relationship("HsHBusinessRole") | ||||
| 
 | ||||
| class HsHBusinessRole(Base): | ||||
| 	__tablename__ = 'business_role' | ||||
| 	id = Column(Integer, primary_key=True) | ||||
| 	name = Column(String) | ||||
| 	description = Column(String) | ||||
| 
 | ||||
| class HsHPerson(Base): | ||||
| 	__tablename__ = 'person' | ||||
| 	id = Column(Integer, primary_key=True) | ||||
| @ -25,8 +48,8 @@ class HsHPerson(Base): | ||||
| 	title_suffix = Column(String) | ||||
| 	account = Column(Integer) | ||||
| 	gender = Column(String) | ||||
| 	birthday = Column(DateTime) #TODO: without timezone! | ||||
| 	#memberships = relationship("HsHMembership", backref='person') | ||||
| 	#birthday = Column(DateTime) #TODO: without timezone! | ||||
| 	memberships = relationship("HsHMembership") | ||||
| 
 | ||||
| class HsHTelephone(Base): | ||||
| 	__tablename__ = 'telephone' | ||||
| @ -46,31 +69,28 @@ from rdflib.namespace import XSD | ||||
| foaf = Namespace('http://xmlns.com/foaf/0.1/') | ||||
| core = Namespace('http://vivoweb.org/ontology/core#') | ||||
| vitro = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/0.7') | ||||
| vivo = Namespace('http://vivoweb.org/ontology/core#') | ||||
| vcard = Namespace('http://www.w3.org/2006/vcard/ns#') | ||||
| obo = Namespace('http://purl.obolibrary.org/obo/') | ||||
| local = Namespace('http://vivo.bib.hs-hannover.de/person') | ||||
| hsh = Namespace('http://vivo.bib.hs-hannover.de/ontology/hshOntologie#') | ||||
| 
 | ||||
| localPerson = Namespace('http://vivo.bib.hs-hannover.de/person') | ||||
| localOrg = Namespace('http://vivo.bib.hs-hannover.de/org') | ||||
| 
 | ||||
| def get_graph(): | ||||
| 	"""Small little helper to create graph with namespaces ;-)""" | ||||
| 	g = rdfSubject.db | ||||
| 	g.bind('foaf', foaf) | ||||
| 	g.bind('core', core) | ||||
| 	g.bind('vitro', vitro) | ||||
| 	g.bind('vivo', vivo) | ||||
| 	g.bind('vcard', vcard) | ||||
| 	g.bind('obo', obo) | ||||
| 	g.bind('local', local) | ||||
| 	g.bind('localperson', localPerson) | ||||
| 	g.bind('localorg', localOrg) | ||||
| 	g.bind('hsh', hsh) | ||||
| 	return g | ||||
| 
 | ||||
| class IdSequence: | ||||
| 	"""Small Helper for easy sequences""" | ||||
| 	def __init__(self, start): | ||||
| 		self.num = start - 1 | ||||
| 	def getNext(self): | ||||
| 		self.num += 1 | ||||
| 		return self.num | ||||
| 
 | ||||
| class Thing(rdfSubject): | ||||
|     rdf_type = OWL.Thing | ||||
|     label = rdfSingle(RDFS.label) | ||||
| @ -84,62 +104,142 @@ class Person(hshThing): | ||||
|     name = rdfSingle(vcard.familyName) | ||||
|     hasContactInfo = rdfSingle(obo.ARG_2000028) | ||||
|     label = rdfSingle(RDFS.label) | ||||
|     associatedOe = rdfSingle(vivo.relatedBy) | ||||
| 
 | ||||
| class FacultyMember(Person): | ||||
| 	rdf_type = vivo.FacultyMember | ||||
| 
 | ||||
| class NonFacultyAcademic(Person): | ||||
| 	rdf_type = vivo.NonFacultyAcademic | ||||
| 
 | ||||
| class NonAcademic(Person): | ||||
| 	rdf_type = vivo.NonAcademic | ||||
| 
 | ||||
| class ContactInfo(Thing): | ||||
| 	rdf_type = vcard.Individual | ||||
| 	hasTitle = rdfSingle(vcard.hasTitle) | ||||
| 	contactInformationFor = rdfSingle(obo.ARG_2000029) | ||||
| 	hasTitle = rdfSingle(vcard.hasTitle,range_type=vcard.Title) | ||||
| 	contactInformationFor = rdfSingle(obo.ARG_2000029,range_type=foaf.Person) | ||||
| 
 | ||||
| class Title(Thing): | ||||
| 	rdf_type = vcard.Title | ||||
| 	title = rdfSingle(vcard.title) | ||||
| 	title = rdfSingle(vcard.title,range_type=XSD.String) | ||||
| 
 | ||||
| def createRDFFromSQL(): | ||||
| 	"""Fetch data from postgres and map them into RDF""" | ||||
| class Organization(hshThing): | ||||
| 	rdf_type = foaf.Organization | ||||
| 	label = rdfSingle(RDFS.label) | ||||
| 	parentOe = rdfSingle(obo.BFO_0000050) | ||||
| 
 | ||||
| class IdSequence: | ||||
| 	"""Small Helper for easy sequences""" | ||||
| 	def __init__(self, start): | ||||
| 		self.num = start - 1 | ||||
| 	def getNext(self): | ||||
| 		self.num += 1 | ||||
| 		return self.num | ||||
| 
 | ||||
| def addPersonToGraph(sqlP, additionalIdSeq): | ||||
| 	"""Add a given HsHPerson to the graph.""" | ||||
| 	combined_title = '' | ||||
| 	if sqlP.title_prefix != None: | ||||
| 		combined_title = sqlP.title_prefix | ||||
| 	if sqlP.title_suffix != None: | ||||
| 		if combined_title == '': | ||||
| 			combined_title = sqlP.title_suffix | ||||
| 		else: | ||||
| 			combined_title += ' ' + sqlP.title_suffix | ||||
| 
 | ||||
| 	person_uri = URIRef("%s/%s" % (localPerson, sqlP.id)) | ||||
| 	hshThing(person_uri) | ||||
| 	rdfP = Person(person_uri) #WTF?! | ||||
| 	rdfP.firstname = sqlP.firstname; | ||||
| 	rdfP.name = sqlP.name; | ||||
| 	rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname) | ||||
| 	# If there is a title to add, do it. | ||||
| 	if(combined_title != ''): | ||||
| 		title_uri = URIRef("%s/%s" % (localPerson, additionalIdSeq.getNext())) | ||||
| 		rdfTitle = Title(title_uri) | ||||
| 		rdfTitle.title = combined_title | ||||
| 
 | ||||
| 		contact_info_uri = URIRef("%s/%s" % (localPerson, additionalIdSeq.getNext())) | ||||
| 		rdfCi = ContactInfo(contact_info_uri) | ||||
| 		rdfCi.hasTitle = rdfTitle | ||||
| 		rdfCi.contactInformationFor = rdfP | ||||
| 
 | ||||
| 		rdfP.hasContactInfo = rdfCi | ||||
| 	return person_uri | ||||
| 
 | ||||
| def processPersons(session, additionalIdSeq): | ||||
| 	"""Fetch persons, create triples for them.""" | ||||
| 	persons = session.query(HsHPerson) | ||||
| 	for count, sqlP in enumerate(persons): | ||||
| 		if sqlP.memberships == []: | ||||
| 			continue #SKIP PEOPLE WITHOUT ANY MEMBERSHIPS AT ALL! | ||||
| 
 | ||||
| 		#create person | ||||
| 		personUri = addPersonToGraph(sqlP, additionalIdSeq) | ||||
| 
 | ||||
| 		#used to determine whether person is non-academic type | ||||
| 		isNonAcademic = True | ||||
| 
 | ||||
| 		#use memberships to determine person type | ||||
| 		for i, membership in enumerate(sqlP.memberships): | ||||
| 			if membership.active != 'Y': | ||||
| 				continue #SKIP INACTIVE MEMBERSHIPS! | ||||
| 
 | ||||
| 			#This is an active membership with an id and a business_role. | ||||
| 			if membership.mBusinessRole.name == 'ProfessorIn': | ||||
| 				isNonAcademic = False | ||||
| 				organization_uri = URIRef("%s/%s" % (localOrg, membership.organizational_unit)) | ||||
| 				rdfP = FacultyMember(personUri) | ||||
| 				rdfP.associatedOe = organization_uri | ||||
| 
 | ||||
| 			if membership.mBusinessRole.name == 'WiMi': | ||||
| 				isNonAcademic = False | ||||
| 				organization_uri = URIRef("%s/%s" % (localOrg, membership.organizational_unit)) | ||||
| 				rdfP = NonFacultyAcademic(personUri) | ||||
| 				rdfP.associatedOe = organization_uri | ||||
| 				 | ||||
| 		#assign non-academic person type if neccessary | ||||
| 		if isNonAcademic: | ||||
| 			NonAcademic(personUri) | ||||
| 			pass | ||||
| 
 | ||||
| def processOrganizations(session, additionalIdSeq): | ||||
| 	"""Generate triples for organizational units.""" | ||||
| 	organizations = session.query(HsHOrganizationalUnit) | ||||
| 	for count, sqlO in enumerate(organizations): | ||||
| 		organization_uri = URIRef("%s/%s" % (localOrg, sqlO.id)) | ||||
| 		rdfO = hshThing(organization_uri) | ||||
| 		rdfO = Organization(organization_uri) | ||||
| 		rdfO.label = sqlO.name | ||||
| 		if sqlO.oParent != None: | ||||
| 			parent_organization_uri = URIRef("%s/%s" % (localOrg, sqlO.oParent.id)) | ||||
| 			rdfO.parentOe = parent_organization_uri | ||||
| 
 | ||||
| def createTriples(): | ||||
| 	"""Fetch memberships, evaluate them, create persons,...""" | ||||
| 	engine = create_engine("postgresql://hshinfo:hshinfotest@141.71.2.152/hshinfo") | ||||
| 	session = sessionmaker(bind=engine)() | ||||
| 	persons = session.query(HsHPerson) | ||||
| 
 | ||||
| 	g = get_graph() | ||||
| 
 | ||||
| 	additionalIdSeq = IdSequence(2000000000) #don't care sequence | ||||
| 	for count, sqlP in enumerate(persons): | ||||
| 		combined_title = '' | ||||
| 		if sqlP.title_prefix != None: | ||||
| 			combined_title = sqlP.title_prefix | ||||
| 		if sqlP.title_suffix != None: | ||||
| 			if combined_title == '': | ||||
| 				combined_title = sqlP.title_suffix | ||||
| 			else: | ||||
| 				combined_title += ' ' + sqlP.title_suffix | ||||
| 
 | ||||
| 		person_uri = URIRef("%s/%s" % (local, sqlP.id)) | ||||
| 		rdfP = hshThing(person_uri) | ||||
| 		rdfP = Person(person_uri) #WTF?! | ||||
| 		rdfP.firstname = sqlP.firstname; | ||||
| 		rdfP.name = sqlP.name; | ||||
| 		rdfP.label = "%s, %s" % (sqlP.name, sqlP.firstname) | ||||
| 		# If there is a title to add, do it. | ||||
| 		if(combined_title != ''): | ||||
| 			title_uri = URIRef("%s/%s" % (local, additionalIdSeq.getNext())) | ||||
| 			rdfTitle = Title(title_uri) | ||||
| 			rdfTitle.title = combined_title | ||||
| 	processOrganizations(session, additionalIdSeq) | ||||
| 	processPersons(session, additionalIdSeq) | ||||
| 
 | ||||
| 			contact_info_uri = URIRef("%s/%s" % (local, additionalIdSeq.getNext())) | ||||
| 			rdfCi = ContactInfo(contact_info_uri) | ||||
| 			rdfCi.hasTitle = rdfTitle | ||||
| 			rdfCi.contactInformationFor = rdfP | ||||
| 
 | ||||
| 			rdfP.hasContactInfo = rdfCi | ||||
| 			break | ||||
| 	#we're done. | ||||
| 	triples = g.serialize(format='n3') | ||||
| 	g.close() | ||||
| 
 | ||||
| 	print(triples) | ||||
| 
 | ||||
| 	f = open('data.n3', 'wb') | ||||
| 	f.write(triples) | ||||
| 	f.close() | ||||
| 	return triples | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
| 	createRDFFromSQL() | ||||
| 	writeToFile = True | ||||
| 
 | ||||
| 	triples = createTriples() | ||||
| 	print(triples) | ||||
| 
 | ||||
| 	if writeToFile == True: | ||||
| 		f = open('data.n3', 'wb') | ||||
| 		f.write(triples) | ||||
| 		f.close() | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user