moduleBio.SeqLoc.Transcript(-- * Type for splice junctionsJunction(..),fromDonorAcceptor,donor,acceptor,junctions-- * Representation of transcript,Transcript(..),utr5,utr3,cdsLocation,sortContigs)whereimportControl.ApplicativeimportControl.MonadimportqualifiedData.ByteString.Char8asBSimportData.ListimportData.OrdimportqualifiedData.Attoparsec.ZeptoasZPimportBio.SeqLoc.LocReprimportqualifiedBio.SeqLoc.LocationasLocimportBio.SeqLoc.OnSeqimportqualifiedBio.SeqLoc.PositionasPosimportqualifiedBio.SeqLoc.SpliceLocationasSpLocimportBio.SeqLoc.Strand-- | Splice junctions, which are isomorphic to the introns they span,-- but which support other biologically relevant constructors and-- accessors.newtypeJunction=Junction{intron::Loc.ContigLoc}deriving(Show)slash::BS.ByteStringslash=BS.pack"/"instanceLocReprJunctionwherereprj=BS.concat[repr.donor$j,slash,repr.acceptor$j]unrepr=fromDonorAcceptor<$>unrepr<*>(ZP.stringslash*>unrepr)-- | Create a splice junction from a donor position (the last position-- in the 5' exon) and the acceptor position (the first position in-- the 3' exon).fromDonorAcceptor::Pos.Pos->Pos.Pos->JunctionfromDonorAcceptorda=letlen=1+abs(Pos.offseta-Pos.offsetd)incasePos.stranddofPlus->Junction$!Loc.fromPosLen(Pos.slided1)lenMinus->Junction$!Loc.fromPosLen(Pos.slided(-1))len-- | Donor position, i.e., the last position in the 5' exon around a-- junction.donor::Junction->Pos.Posdonor=Loc.startPos.Loc.extend(1,0).intron-- | Acceptor position, i.e., the first position in the 3' exon around-- a junction.acceptor::Junction->Pos.Posacceptor=Loc.endPos.Loc.extend(0,1).intron-- | List of splice junctions from a spliced location, in order.junctions::SpLoc.SpliceLoc->[Junction]junctionssploc=zipWithjunctioncontigs(drop1contigs)wherecontigs=Loc.toContigssplocjunctionc5c3=letp5=Loc.endPos.Loc.extend(0,1)$c5p3=Loc.startPos.Loc.extend(1,0)$c3len=1+abs(Pos.offsetp3-Pos.offsetp5)inJunction$Loc.fromPosLenp5len-- | Representation of a genomic transcript, with a gene and a-- transcript identifier, along with the genomic location of the-- processed transcript and an optional coding sequence on that-- transcript.dataTranscript=Transcript{geneId::!SeqLabel-- ^ Gene or locus name for a collection of transcripts,trxId::!SeqLabel-- ^ Specific transcript identifier,location::!SpliceSeqLoc-- ^ Sequence location of processed transcript,cds::!(MaybeLoc.ContigLoc)-- ^ Location of CDS on the transcript}-- | 'Just' the location of the 5' UTR on the transcript, or 'Nothing'-- if there is no 'cds' on the transcript or if the 'cds' location-- begins at the first nucleotide of the transcript--if a region is-- returned it will have positive length.utr5::Transcript->MaybeLoc.ContigLocutr5trx=cdstrx>>=utr5locwhereutr5loccdsloc=caseLoc.startPoscdslocof(Pos.PosstartoffPlus)|startoff>0->Just$!Loc.fromBoundsStrand0(startoff-1)Plus_->Nothing-- | 'Just' the location of the 3' UTR on the transcript, or 'Nothing'-- if there is no 'cds' on the transcript or if the 'cds' location-- ends at the last nucleotide of the transcript--if a region is-- returned it will have positive length.utr3::Transcript->MaybeLoc.ContigLocutr3trx=cdstrx>>=utr3locwhereutr3loccdsloc=caseLoc.endPoscdslocof(Pos.PosendoffPlus)|endoff<trxlast->Just$!Loc.fromBoundsStrand(endoff+1)trxlastPlus_->Nothingtrxlast=(Loc.length.unOnSeq.location$trx)-1-- | Genomic location of CDS within the transcriptcdsLocation::Transcript->MaybeSpliceSeqLoccdsLocationtrx=cdstrx>>=liftM(OnSeqname).flipLoc.clocOutoflocwhere(OnSeqnameloc)=locationtrx-- | 'Just' the input contigs sorted in stranded order, when all lie-- on the same strand, or 'Nothing' if they are not all on the same-- strand.sortContigs::[Loc.ContigLoc]->Maybe[Loc.ContigLoc]sortContigs[]=NothingsortContigscs@(c0:_)=liftMsortStrandcontigStrandwherecontigStrand|all((==Loc.strandc0).Loc.strand)cs=Just.Loc.strand$c0|otherwise=NothingsortStrandPlus=sortBy(comparingLoc.offset5)cssortStrandMinus=sortBy(comparing(negate.Loc.offset5))cs