xpktools.py

# xpktools.py: A python module containing function definitions and classes# useful for manipulating data from nmrview .xpk peaklist files.## ********** INDEX of functions and classes **********## XpkEntry class: A class suited for handling single lines of# non-header data from an nmrview .xpk file. This class# provides methods for extracting data by the field name# which is listed in the last line of the peaklist header.import string
# * * * * * INITIALIZATIONS * * * * *
HEADERLEN=6
# * * * * * _______________ * * * * *class XpkEntry:
# Usage: XpkEntry(xpkentry,xpkheadline) where xpkentry is the line# from an nmrview .xpk file and xpkheadline is the line from# the header file that gives the names of the entries# which is typcially the sixth line of the header (counting fm 1)# Variables are accessed by either their name in the header line as in# self.field["H1.P"] will return the H1.P entry for example.# self.field["entrynum"] returns the line number (1st field of line)def __init__(self,entry,headline):
self.fields={} # Holds all fields from input line in a dictionary# keys are data labels from the .xpk header
datlist = string.split(entry)
headlist = string.split(headline)
i=0
for i in range(len(datlist)-1):
self.fields[headlist[i]]=datlist[i+1]
i=i+1
try:
self.fields["entrynum"]=datlist[0]
except IndexError, e:
passclass Peaklist:
# This class reads in an entire xpk file and returns# Header file lines are available as attributes# The data lines are available as a listdef __init__(self,infn):
self.data=[] # init the data line list
infile=open(infn,'r')# Read in the header lines
self.firstline=string.split(infile.readline(),"\012")[0]
self.axislabels=string.split(infile.readline(),"\012")[0]
self.dataset=string.split(infile.readline(),"\012")[0]
self.sw=string.split(infile.readline(),"\012")[0]
self.sf=string.split(infile.readline(),"\012")[0]
self.datalabels=string.split(infile.readline(),"\012")[0]
# Read in the data lines to a list
line=infile.readline()
while line:
self.data.append(string.split(line,"\012")[0])
line=infile.readline()
def residue_dict(self,index):
# Generate a dictionary idexed by residue number or a nucleus# The nucleus should be given as the input argument in the# same form as it appears in the xpk label line (H1, 15N for example)
maxres=-1; minres=-1
# Cast the data lines into the xpentry class
self.dict={}
for i in range(len(self.data)):
line=self.data[i]
ind=XpkEntry(line,self.datalabels).fields[index+".L"]
key=string.split(ind,".")[0]
res=string.atoi(key)
if (maxres==-1):
maxres=res
if (minres==-1):
minres=res
maxres=max([maxres,res])
minres=min([minres,res])
if self.dict.has_key(str(res)):
# Append additional data to list under same key
templst=self.dict[str(res)]
templst.append(line)
self.dict[str(res)]=templst
else:
# This is a new residue, start a new list
self.dict[str(res)]=[line] # Use [] for list type
self.dict["maxres"]=maxres
self.dict["minres"]=minres
return self.dict
def write_header(self,outfn):
outfile=_try_open_write(outfn)
outfile.write(self.firstline);outfile.write("\012")
outfile.write(self.axislabels);outfile.write("\012")
outfile.write(self.dataset);outfile.write("\012")
outfile.write(self.sw);outfile.write("\012")
outfile.write(self.sf);outfile.write("\012")
outfile.write(self.datalabels);outfile.write("\012")
outfile.close()
def _try_open_read(fn):
# Try to open a file for reading. Exit on IOErrortry:
infile=open(fn,'r')except IOError, e:
print"file", fn, "could not be opened for reading - quitting."
sys.exit(0)
return infile
def _try_open_write(fn):
# Try to open a file for writing. Exit on IOErrortry:
infile=open(fn,'w')
except IOError, e:
print"file", fn, "could not be opened for writing - quitting."
sys.exit(0)
return infile
def replace_entry(line,fieldn,newentry):
# Replace an entry in a string by the field number# No padding is implemented currently. Spacing will change if# the original field entry and the new field entry are of# different lengths.# This method depends on xpktools._find_start_entry
start=_find_start_entry(line,fieldn)
leng=len(string.splitfields(line[start:])[0])
newline=line[:start]+str(newentry)+line[(start+leng):]
return newline
def _find_start_entry(line,n):
# find the starting point character for the n'th entry in# a space delimited line. n is counted starting with 1# The n=1 field by definition begins at the first character# This function is used by replace_entry
infield=0 # A flag that indicates that the counter is in a fieldif (n==1):
return 0 # Special case# Count the number of fields by counting spaces
c=1
leng=len(line)
# Initialize variables according to whether the first character# is a space or a characterif (line[0]==" "):
infield=0
field=0
else:
infield=1
field=1
while (c<leng and field<n):
if (infield):
if (line[c]==" "andnot (line[c-1]==" ")):
infield=0
else:
if (not line[c]==" "):
infield=1
field=field+1
c=c+1
return c-1
def data_table(fn_list, datalabel, keyatom):
# Generate and generate a data table from a list of# input xpk files <fn_list>. The data element reported is# <datalabel> and the index for the data table is by the # nucleus indicated by <keyatom>.
outlist=[]
[dict_list,label_line_list]=_read_dicts(fn_list,keyatom)
# Find global max and min residue numbers
minr=dict_list[0]["minres"]; maxr=dict_list[0]["maxres"]
for dict in dict_list:
if (maxr < dict["maxres"]):
maxr = dict["maxres"]
if (minr > dict["minres"]):
minr = dict["minres"]
res=minr
while res <= maxr: # s.t. res numbers
count=0
line=str(res)
for dict in dict_list: # s.t. dictionaries
label=label_line_list[count]
if ( dict.has_key(str(res)) ):
line=line+"\t"+XpkEntry(dict[str(res)][0],label).fields[datalabel]
else:
line=line+"\t"+"*"
count=count+1
line=line+"\n"
outlist.append(line)
res=res+1
return outlist
def _sort_keys(dict):
keys=dict.keys()
sorted_keys=keys.sort()
return sorted_keys
def _read_dicts(fn_list, keyatom):
# Read multiple files into a list of residue dictionaries
dict_list=[]; datalabel_list=[]
for fn in fn_list:
peaklist=Peaklist(fn); dict=peaklist.residue_dict(keyatom)
dict_list.append(dict)
datalabel_list.append(peaklist.datalabels)
return [dict_list, datalabel_list]