#!/usr/bin/env python

# Jacob Joseph
# 5 Jan 2010

# Output a list of hierarchical clusters formed by cutting the tree at
# some distance.

import sys
from JJcluster import cluster_sql
from DurandDB import seqq

if __name__ == "__main__":
    
    cr_id = int( sys.argv[1])
    distance = float( sys.argv[2])
    set_id_filter = int(sys.argv[3]) if len(sys.argv)==4 else None

    sq = seqq.seqq( debug=False)

    csql = cluster_sql.hcluster( cluster_run_id = cr_id)

    print "Cluster run %d: %s" % (cr_id, csql.cluster_comment)
    #print "Cut at distance: ", distance
    print "Cut at similarity: ", 1.0-distance
    print "=============================================="

    clusters = csql.cut_tree( distance, set_id_filter = set_id_filter)
    cluster_list = clusters.items()
    cluster_list.sort( key=lambda a: len(a[1]), reverse=True)
    
    for c,items in cluster_list:
        items = clusters[c]
        
        size = len(items)

        print "\nCluster %d -- Size: %d" % (c, size)
        print "----------------------------------------------"
        
        #for name in [sq.fetch_seq_acc(x) for x in items]:
        #    print name

        for name in [sq.fetch_fa_descr(x) for x in items]:
            print name


    
    
    
