#!/usr/bin/env python
# Benjamin Vernot (using code from blastq.py, by Jacob Joseph)

from JJutil import pgutils
import os

class orf_q:

    # the database handle
    dbw = None

    # the most recent version of SGD
    sgd_ver = None
    
    def __init__( self ):
        self.dbw = pgutils.dbwrap( debug=False)
        
        self.sgd_ver = self.fetch_source_ver()

        return

    ##
    #  Helper functions for getting non orf info out of DurandLab2    
    ##

    #########
    # fetch_SGD_gene_description : get one line description for given seq_id
    #
    #
    # JJ: Consider this method of calling the database
    def fetch_SGD_gene_description(self,gene):
        seq_id = self.fetch_seq_id_from_orf(gene)
        if seq_id :
            q = """select description from sgd_feature
            where seq_id = %(seq_id)s
            """

            return self.dbw.fetchsingle( q, locals() )
        else:
            return "No Description"

        
    #########
    # fetch_source_id : gets the source_id for a given source (specified
    #                   with a string)
    # inputs - source_name : string representing source to lookup
    
    def fetch_source_id(self, source_name = 'SGD'):
        q = """select source_id from prot_seq_source
        where source_name = %s
        """

        self.dbw.execute(q, source_name)
        return self.dbw.fetchsingle()

    #########
    # fetch_source_ver : gets the source_ver_id for SGD 

    def fetch_source_ver(self, ver = None):
        
        q = """ select source_ver_id from prot_seq_source_ver
        where source_id = %d order by source_ver_id desc limit 1
        """        
        
        source_id = self.fetch_source_id()
        
        self.dbw.execute(q % source_id)
        
        return self.dbw.fetchsingle()
    
    ##
    # Orf retrieval and conversion functions
    ##

    #########
    # fetch_seq_id_from_orf : convert an orf to DurandLab2 id ( seq_id ) 
    # inputs - orf_name : string that represents the
    #                     orf_name we want the seq_id for
    
    def fetch_seq_id_from_orf(self, orf_name):
        
        q = """ select seq_id
        from sgd_feature join
        prot_seq_version using (seq_id)
        where source_ver_id = %d and orf_name = '%s'
        """

        q_alias = """ select seq_id
        from sgd_gene_name join
        prot_seq_version using (seq_id)
        where source_ver_id = %d and gene_name = '%s'
        """
        self.dbw.execute( q % (self.sgd_ver,orf_name))
        seq_id = self.dbw.fetchsingle()
        if  seq_id :
            return seq_id
        else:
            self.dbw.execute( q_alias % (self.sgd_ver,orf_name))            
            alias_seq_id = self.dbw.fetchsingle()
            return alias_seq_id

    #########
    # fetch_orf_from_seq_id : convert a DurandLab2 id ( seq_id ) to an orf 
    # inputs - seq_id : seq_id we want an orf for
    
    def fetch_orf_from_seq_id(self, seq_id):
        
        q = """ select orf_name from sgd_feature join prot_seq_version using (seq_id) where source_ver_id = %d and seq_id = %d
        """
        self.dbw.execute( q % (self.sgd_ver,seq_id))
        orf = self.dbw.fetchsingle()
        if  orf :
            return orf
        else:
            return False

    #########
    # fetch_is_orf : determine if the orf name specified is actually an orf.
    # inputs - orf : string representing orf name to lookup
    
    def fetch_is_orf( self, orf):
        #q = "select ORF from SGD.genes where ORF ='" + orf + "'"
        
        orf = self.fetch_seq_id_from_orf(orf)
        if  orf :
            return True
        else:
            return False
        pass
    
    #########
    # fetch_orf_from_name : get orf name from gene name 
    # inputs - gene_name : string representing gene name
    
    def fetch_orf_from_name( self, name):
        
        q = """select orf_name from sgd_feature join sgd_gene_name
        using (seq_id) join prot_seq_version using (seq_id) where
        source_ver_id = %d and gene_name = '%s'
        """
        
        self.dbw.execute( q % (self.sgd_ver,name) )
        orf = self.dbw.fetchsingle()
        
        if orf is not None:
            return orf
        
        #        return None
        return name

    

    #########
    # fetch_name_from_orf : get gene name from orf name 
    # inputs - orf : string representing orf name
    
    def fetch_name_from_orf( self, orf):
        if self.fetch_is_orf(orf):
            return self.fetch_names_from_orf(orf)[0]
        else:
            return False

    #########
    # fetch_names_from_orf : get list of gene names from orf name 
    # inputs - orf : string representing orf name
       
    def fetch_names_from_orf( self, orf):
        if not self.fetch_is_orf(orf):
            print "%s is not an orf (in orf_q.fetch_names..)" % orf
            # will return the value given if the value is not an orf
            # this is probably not the best behaviour, but it guarantees that
            # we get some string that we can work with.
            # return None
            return [orf]
        
        q = """ select gene_name
        from sgd_gene_name join sgd_feature using (seq_id)
        join prot_seq_version using (seq_id)
        where source_ver_id = %d and orf_name = '%s'
        """
        
        self.dbw.execute( q % (self.sgd_ver,orf) )
        gene_names = self.dbw.fetchall()

        if ( not gene_names ):
            return [orf]

        
        gene_names_to_return = [i[0] for i in gene_names]
        
        return gene_names_to_return
 
        
if __name__ == '__main__':
    orf_q_i = orf_q()
    print orf_q_i.sgd_ver
    orf_name = "YGR271C-A"
    gene_name = "MNN6"
    isOrf = orf_q_i.fetch_is_orf(orf_name)
    if isOrf:
        print orf_q_i.fetch_seq_id_from_orf(orf_name)
        print orf_q_i.fetch_orf_from_seq_id(orf_q_i.fetch_seq_id_from_orf(orf_name))
        print "found orf"
    else:
        print "no orf"
        #   print orf_q_i.fetch_orf_from_name(gene_name)
        #   print orf_q_i.fetch_names_from_orf(orf_name)
