#!/usr/bin/python3
# -*- coding: utf-8 -*-

import Graph as gr # Graph library from part 1 of the project

def loadOBO(filename):
	# parse OBO file to create a DAG
	# obsolete terms are discarded
	# only is_a and part_of relationships are loaded
# [Term]
# id: GO:0000028
# name: ribosomal small subunit assembly
# namespace: biological_process
# def: "The aggregation, arrangement and bonding together of constituent RNAs and proteins to form the small ribosomal subunit." [GOC:jl]
# subset: gosubset_prok
# synonym: "30S ribosomal subunit assembly" NARROW [GOC:mah]
# synonym: "40S ribosomal subunit assembly" NARROW [GOC:mah]
# is_a: GO:0022618 ! ribonucleoprotein complex assembly
# relationship: part_of GO:0042255 ! ribosome assembly
# relationship: part_of GO:0042274 ! ribosomal small subunit biogenesis
	def parseTerm(lines):
		# search for obsolete
		for l in lines:
			if l.startswith('is_obsolete: true'):
				return
		# otherwise create node
		id = lines.pop(0)[4:].rstrip()
		term = gr.add_node(g,id)
		term['id'] = id
		term['type'] = 'GOTerm'
		for line in lines:
			# attributes (name, namespace, def)
			if line.startswith('name:'): term['name'] = line[6:]
			elif line.startswith('namespace:'): term['namespace'] = line[11:]
			elif line.startswith('def:'): term['def'] = line[5:]
			# relationships
			elif line.startswith('is_a:'): # is_a
				parent = line[6:line.index('!')].rstrip()
				e = gr.add_egde(g,id, parent)
				e['type'] = 'is_a'
			elif line.startswith('relationship: part_of '): # part_of
				line = line[line.index('GO:'):]
				dest = line[:line.index(' ')]
				e = gr.add_egde(g,id, dest)
				e['type'] = 'part_of'
	#
	g=gr.createGraph(directed=True, weighted=False)
	with open(filename) as f: 
		line = f.readline().rstrip()
		# skip header to reach 1st Term
		while not line.startswith('[Term]'): 
			line = f.readline().rstrip()
		buff = []
		line = f.readline()
		stop = False
		while line and not stop:
			# buffer lines until the next Term is found
			line = line.rstrip()
			# new Term
			if line.startswith('[Term]'):
				# next Term found: create corresponding node and edges in parseTerm and empty buffer
				parseTerm(buff)
				buff=[]
			# last Term
			elif line.startswith('[Typedef]'):
				parseTerm(buff)
				stop=True
			# or append to buffer
			else:
				buff.append(line)
			line = f.readline()
	return g

def loadGOA(go, filename):
# !gaf-version: 2.1
# !GO-version: http://purl.obolibrary.org/obo/go/releases/2016-10-29/go.owl
# UniProtKB  A5A605  ykfM      GO:0006974  PMID:20128927   IMP              P  Uncharacterized protein YkfM    YKFM_ECOLI|ykfM|b4586         protein taxon:83333  20100901  EcoCyc
# UniProtKB  A5A605  ykfM      GO:0016020  GO_REF:0000037  IEA              C  Uncharacterized protein YkfM    YKFM_ECOLI|ykfM|b4586         protein taxon:83333  20161029  UniProt
# UniProtKB  P00448  sodA      GO:0004784  GO_REF:0000003  IEA  EC:1.15.1.1 F  Superoxide dismutase [Mn]       SODM_ECOLI|sodA|JW3879|b3908  protein taxon:83333  20161029  UniProt
# UniProtKB  P00393  ndh  NOT  GO:0005737  PMID:6784762    IDA              C  NADH dehydrogenase              DHNA_ECOLI|ndh|JW1095|b1109   protein taxon:83333  20100621  EcoliWiki
#     0        1       2   3       4             5          6        7      8             9                              10
#              id    name        go_id               evidence-codes                     desc                           aliases
	names = {}
	go['names'] = names
	with open(filename) as f: 
		line = f.readline()
		while line:
			if not line.startswith('!'):
				cols = line.rstrip().split('\t')
				id = cols[1]
				if id not in go['nodes']:
					g = gr.add_node(go,id)
					g['id'] = id
					g['type'] = 'GeneProduct'
					names[cols[2]] = id
				gp = go['nodes'][id]
				gp['name'] = cols[2]
				gp['desc'] = cols[9]
				gp['aliases'] = cols[10]
				go_id = cols[4]
				if go_id not in go['nodes']:
					gr.add_node(go,go_id)
				go_term = go['nodes'][go_id]
				e = gr.add_egde(go, id, go_id)
				e['type'] = 'annotation'
				if 'evidence-codes' not in e: e['evidence-codes'] = []
				e['evidence-codes'].append( cols[6] )
			line = f.readline()


##### lib tests #####
if __name__ == "__main__":
	print('GeneOntology lib tests')