#!/usr/bin/env python

from JJutil import mysqlutils
import time


def parse_hgnc_full():
    """Parse fields in the full hgnc dump.  Tab-delimited columns:
hgnc_id, approved_symbol, approved_name, status, locus_type,
previous_symbols, previous_names, aliases, chromosome, date_approved,
date_modified, date_name_changed, accession_numbers, enzyme_id,
entrez_gene_id, mgd_id, misc_ids, pubmed_ids, refseq_ids,
gene_family_name, gdb_id_mapped, entrez_gene_id_mapped,
omim_id_mapped, refseq_mapped, swissprot_id_mapped"""

    f = open(infile)
    for l in f:
        if l.startswith('HGNC ID'): continue # header
        entry = l.split('\t')

        if len(entry) != 25:
            print entry
            break

        # drop the trailing newline
        #if entry[-1] == "\n": entry = entry[:-1]

        # strip any whitespace
        for i in range(len(entry)):
            entry[i] = entry[i].strip()

        # set missing integer columns to NULL
        # hgnc_id, entrez_gene_id, entrez_gene_id_mapped,
        # omim_id_mapped
        for i in (0,14, 21, 22):
            if entry[i]=='': entry[i]='NULL'
            else: entry[i] = int(entry[i])
                
        # format dates appropriately
        # fields 9, 10, 11
        for i in range(9,12):
            if entry[i]=='':
                entry[i]='NULL'
            else:
                entry[i] = time.strftime(
                    '%Y-%m-%d', time.strptime(entry[i], '%d/%m/%Y'))

        insert_hgnc_entry( entry)

def insert_hgnc_entry( entry):
    q = """INSERT INTO %s VALUES ('%s', '%s', '%s', '%s', '%s',
    '%s', '%s', '%s', '%s', '%s',
    '%s', '%s', '%s', '%s',
    '%s', '%s', '%s', '%s', '%s',
    '%s', '%s', '%s',
    '%s', '%s', '%s')"""

    inputtup = (db_table,) + tuple(entry)
    inputtup = tuple(map(mysqlutils.quote, inputtup))

    dbw.execute( q % inputtup)
    dbw.commit()
    return

if __name__ == "__main__":
    dbhost = "goby.compbio.cs.cmu.edu"
    dbuser = "jmjoseph"
    dbpass = "pQ1rng8c"
    db = "DurandLab2"

    dbw = mysqlutils.dbwrap( dbhost, dbuser, dbpass, db)

    infile = "/net/goby/usr1/jmjoseph/tmp/hgnc_2007_05_21"
    db_table = "hgnc_20070521"
    
    parse_hgnc_full()
