Friday, March 18, 2011

Mutual information (2)

We're working on a paper from Michael Laub's lab at MIT (Skerker et al 2009 PMID 18555780). The first post is here.

In this part we'll load the alignment (supplementary data file S4---the annotation on the page is incorrect), and crunch the numbers. I just write the results to disk.

We'll do the analysis in another post.

python info.py > results.txt


import sys
from utils import load_data
import info_helper as ih

#aln = 'AASSASSTTT\nNMWWNTTKKS\nGTSNTYRSTA\nGGGGGGGGGG'
fn = 'cell3925mmc4.fa'
data = load_data(fn)
data = data.strip().split('>')[1:]
data = [e.split('\n')[1].strip() for e in data]

def show(data):
print 'starting:', len(data)
for i in range(7):
print i,
L = [e for e in data if e.count('-') <= i]
print len(L)
sys.exit()
#show(data)

def transpose(L):
R = range(len(L[0]))
rL = list()
for i in R:
rL.append(''.join([item[i] for item in L]))
return rL

data = [e for e in data if e.count('-') <= 4]
#data = data[:100]
cols = transpose(data)
pD = ih.make_prob_dict(cols)
info = dict()

for i in range(len(cols)):
for j in range(i+1):
info[(i,j)] = ih.get_info(i,j,cols,pD,v=False)
for i,j in sorted(info.keys()):
print i,j,round(info[(i,j)],3)