#!/usr/bin/env python # Benjamin Vernot (using code from blastq.py, by Jacob Joseph) from JJutil import pgutils import os class orf_q: # the database handle dbw = None # the most recent version of SGD sgd_ver = None def __init__( self ): self.dbw = pgutils.dbwrap( debug=False) self.sgd_ver = self.fetch_source_ver() return ## # Helper functions for getting non orf info out of DurandLab2 ## ######### # fetch_SGD_gene_description : get one line description for given seq_id # # # JJ: Consider this method of calling the database def fetch_SGD_gene_description(self,gene): seq_id = self.fetch_seq_id_from_orf(gene) if seq_id : q = """select description from sgd_feature where seq_id = %(seq_id)s """ return self.dbw.fetchsingle( q, locals() ) else: return "No Description" ######### # fetch_source_id : gets the source_id for a given source (specified # with a string) # inputs - source_name : string representing source to lookup def fetch_source_id(self, source_name = 'SGD'): q = """select source_id from prot_seq_source where source_name = %s """ self.dbw.execute(q, source_name) return self.dbw.fetchsingle() ######### # fetch_source_ver : gets the source_ver_id for SGD def fetch_source_ver(self, ver = None): q = """ select source_ver_id from prot_seq_source_ver where source_id = %d order by source_ver_id desc limit 1 """ source_id = self.fetch_source_id() self.dbw.execute(q % source_id) return self.dbw.fetchsingle() ## # Orf retrieval and conversion functions ## ######### # fetch_seq_id_from_orf : convert an orf to DurandLab2 id ( seq_id ) # inputs - orf_name : string that represents the # orf_name we want the seq_id for def fetch_seq_id_from_orf(self, orf_name): q = """ select seq_id from sgd_feature join prot_seq_version using (seq_id) where source_ver_id = %d and orf_name = '%s' """ q_alias = """ select seq_id from sgd_gene_name join prot_seq_version using (seq_id) where source_ver_id = %d and gene_name = '%s' """ self.dbw.execute( q % (self.sgd_ver,orf_name)) seq_id = self.dbw.fetchsingle() if seq_id : return seq_id else: self.dbw.execute( q_alias % (self.sgd_ver,orf_name)) alias_seq_id = self.dbw.fetchsingle() return alias_seq_id ######### # fetch_orf_from_seq_id : convert a DurandLab2 id ( seq_id ) to an orf # inputs - seq_id : seq_id we want an orf for def fetch_orf_from_seq_id(self, seq_id): q = """ select orf_name from sgd_feature join prot_seq_version using (seq_id) where source_ver_id = %d and seq_id = %d """ self.dbw.execute( q % (self.sgd_ver,seq_id)) orf = self.dbw.fetchsingle() if orf : return orf else: return False ######### # fetch_is_orf : determine if the orf name specified is actually an orf. # inputs - orf : string representing orf name to lookup def fetch_is_orf( self, orf): #q = "select ORF from SGD.genes where ORF ='" + orf + "'" orf = self.fetch_seq_id_from_orf(orf) if orf : return True else: return False pass ######### # fetch_orf_from_name : get orf name from gene name # inputs - gene_name : string representing gene name def fetch_orf_from_name( self, name): q = """select orf_name from sgd_feature join sgd_gene_name using (seq_id) join prot_seq_version using (seq_id) where source_ver_id = %d and gene_name = '%s' """ self.dbw.execute( q % (self.sgd_ver,name) ) orf = self.dbw.fetchsingle() if orf is not None: return orf # return None return name ######### # fetch_name_from_orf : get gene name from orf name # inputs - orf : string representing orf name def fetch_name_from_orf( self, orf): if self.fetch_is_orf(orf): return self.fetch_names_from_orf(orf)[0] else: return False ######### # fetch_names_from_orf : get list of gene names from orf name # inputs - orf : string representing orf name def fetch_names_from_orf( self, orf): if not self.fetch_is_orf(orf): print "%s is not an orf (in orf_q.fetch_names..)" % orf # will return the value given if the value is not an orf # this is probably not the best behaviour, but it guarantees that # we get some string that we can work with. # return None return [orf] q = """ select gene_name from sgd_gene_name join sgd_feature using (seq_id) join prot_seq_version using (seq_id) where source_ver_id = %d and orf_name = '%s' """ self.dbw.execute( q % (self.sgd_ver,orf) ) gene_names = self.dbw.fetchall() if ( not gene_names ): return [orf] gene_names_to_return = [i[0] for i in gene_names] return gene_names_to_return if __name__ == '__main__': orf_q_i = orf_q() print orf_q_i.sgd_ver orf_name = "YGR271C-A" gene_name = "MNN6" isOrf = orf_q_i.fetch_is_orf(orf_name) if isOrf: print orf_q_i.fetch_seq_id_from_orf(orf_name) print orf_q_i.fetch_orf_from_seq_id(orf_q_i.fetch_seq_id_from_orf(orf_name)) print "found orf" else: print "no orf" # print orf_q_i.fetch_orf_from_name(gene_name) # print orf_q_i.fetch_names_from_orf(orf_name)