#!/usr/bin/env python"""Application to convert AXT file to LAV file. Reads an AXT file from standard input and writes a LAV file to standard out; some statistics are written to standard error.usage: %prog primary_spec secondary_spec [--silent] < axt_file > lav_fileEach spec is of the form seq_file[:species_name]:lengths_file.- seq_file should be a format string for the file names for the individual sequences, with %s to be replaced by the alignment's src field. For example, "hg18/%s.nib" would prescribe files named "hg18/chr1.nib", "hg18/chr2.nib", etc.- species_name is optional. If present, it is prepended to the alignment's src field.- Lengths files provide the length of each chromosome (lav format needs this information but axt file does not contain it). The format is a series of lines of the form: <chromosome name> <length> The chromosome field in each axt block must match some <chromosome name> in the lengths file."""__author__="Bob Harris (rsharris@bx.psu.edu)"importsysimportcopyimportbx.align.axtimportbx.align.lavdefusage(s=None):message=__doc__if(s==None):sys.exit(message)else:sys.exit("%s\n%s"%(s,message))defmain():globaldebug# parse the command lineprimary=Nonesecondary=Nonesilent=False# pick off optionsargs=sys.argv[1:]while(len(args)>0):arg=args.pop(0)val=Nonefields=arg.split("=",1)if(len(fields)==2):arg=fields[0]val=fields[1]if(val==""):usage("missing a value in %s="%arg)if(arg=="--silent")and(val==None):silent=Trueelif(primary==None)and(val==None):primary=argelif(secondary==None)and(val==None):secondary=argelse:usage("unknown argument: %s"%arg)if(primary==None):usage("missing primary file name and length")if(secondary==None):usage("missing secondary file name and length")try:(primaryFile,primary,primaryLengths)=parse_spec(primary)except:usage("bad primary spec (must be seq_file[:species_name]:lengths_file")try:(secondaryFile,secondary,secondaryLengths)=parse_spec(secondary)except:usage("bad secondary spec (must be seq_file[:species_name]:lengths_file")# read the lengthsspeciesToLengths={}speciesToLengths[primary]=read_lengths(primaryLengths)speciesToLengths[secondary]=read_lengths(secondaryLengths)# read the alignmentsout=bx.align.lav.Writer(sys.stdout, \
attributes={"name_format_1":primaryFile,"name_format_2":secondaryFile})axtsRead=0axtsWritten=0foraxtBlockinbx.align.axt.Reader(sys.stdin, \
species_to_lengths=speciesToLengths,species1=primary,species2=secondary,support_ids=True):axtsRead+=1out.write(axtBlock)axtsWritten+=1out.close()if(notsilent):sys.stderr.write("%d blocks read, %d written\n"%(axtsRead,axtsWritten))defparse_spec(spec):# returns (seq_file,species_name,lengths_file)fields=spec.split(":")if(len(fields)==2):return(fields[0],"",fields[1])elif(len(fields)==3):return(fields[0],fields[1],fields[2])else:raiseValueErrordefread_lengths(fileName):chromToLength={}f=file(fileName,"r")forlineNumber,lineinenumerate(f):line=line.strip()if(line==""):continueif(line.startswith("#")):continuefields=line.split()if(len(fields)!=2):raise"bad lengths line (%s:%d): %s"%(fileName,lineNumber,line)chrom=fields[0]try:length=int(fields[1])except:raise"bad lengths line (%s:%d): %s"%(fileName,lineNumber,line)if(chrominchromToLength):raise"%s appears more than once (%s:%d): %s" \
%(chrom,fileName,lineNumber)chromToLength[chrom]=lengthf.close()returnchromToLengthif__name__=="__main__":main()