#!/usr/bin/env python # Jacob Joseph # 5 Jan 2010 # Output a list of hierarchical clusters formed by cutting the tree at # some distance. import sys from JJcluster import cluster_sql from DurandDB import seqq if __name__ == "__main__": cr_id = int( sys.argv[1]) distance = float( sys.argv[2]) set_id_filter = int(sys.argv[3]) if len(sys.argv)==4 else None sq = seqq.seqq( debug=False) csql = cluster_sql.hcluster( cluster_run_id = cr_id) print "Cluster run %d: %s" % (cr_id, csql.cluster_comment) #print "Cut at distance: ", distance print "Cut at similarity: ", 1.0-distance print "==============================================" clusters = csql.cut_tree( distance, set_id_filter = set_id_filter) cluster_list = clusters.items() cluster_list.sort( key=lambda a: len(a[1]), reverse=True) for c,items in cluster_list: items = clusters[c] size = len(items) print "\nCluster %d -- Size: %d" % (c, size) print "----------------------------------------------" #for name in [sq.fetch_seq_acc(x) for x in items]: # print name for name in [sq.fetch_fa_descr(x) for x in items]: print name