{-# LANGUAGE CPP #-}#include "fusion-phases.h"-- | Parallel segment descriptors.---- See "Data.Array.Parallel.Unlifted" for how this works.--moduleData.Array.Parallel.Unlifted.Parallel.UPSegd(-- * TypesUPSegd(..),valid-- * Constructors,mkUPSegd,fromUSegd,empty,singleton,fromLengths-- * Projections,length,takeUSegd,takeDistributed,takeLengths,takeIndices,takeElements-- * Indices,indicesP-- * Replicate,replicateWithP-- * Segmented Folds,foldWithP,fold1WithP,sumWithP,foldSegsWithP)whereimportData.Array.Parallel.Unlifted.DistributedimportData.Array.Parallel.Unlifted.Sequential.USegd(USegd)importqualifiedData.Array.Parallel.Unlifted.Distributed.USegdasUSegdimportqualifiedData.Array.Parallel.Unlifted.SequentialasSeqimportqualifiedData.Array.Parallel.Unlifted.Sequential.VectorasUSimportqualifiedData.Array.Parallel.Unlifted.Sequential.USegdasUSegdimportData.Array.Parallel.Prettyhiding(empty)importData.Array.Parallel.Unlifted.Sequential.Vector(Vector,MVector,Unbox)importControl.Monad.STimportPreludehiding(length)here::String->Stringheres="Data.Array.Parallel.Unlifted.Parallel.UPSegd."++s-- | A parallel segment descriptor holds a global (undistributed) segment-- desciptor, as well as a distributed version. The distributed version-- describes how to split work on the segmented array over the gang. dataUPSegd=UPSegd{upsegd_usegd::!USegd-- ^ Segment descriptor that describes the whole array.,upsegd_dsegd::Dist((USegd,Int),Int)-- ^ Segment descriptor for each chunk, -- along with segment id of first slice in the chunk,-- and the offset of that slice in its segment.-- See docs of `splitSegdOfElemsD` for an example.}-- Pretty ---------------------------------------------------------------------instancePprPhysicalUPSegdwherepprp(UPSegdusegddsegd)=text"UPSegd"$$(nest7$vcat[text"usegd: "<+>pprpusegd,text"dsegd: "<+>pprpdsegd])-- Valid ------------------------------------------------------------------------ | O(1).-- Check the internal consistency of a parallel segment descriptor.--- -- * TODO: this doesn't do any checks yetvalid::UPSegd->Boolvalid_=True{-# NOINLINE valid #-}-- NOINLINE because it's only used during debugging anyway.-- Constructors ----------------------------------------------------------------- | O(1). Construct a new parallel segment descriptor.mkUPSegd::VectorInt-- ^ Length of each segment.->VectorInt-- ^ Starting index of each segment.->Int-- ^ Total number of elements in the flat array.->UPSegdmkUPSegdlensidxsn=fromUSegd(USegd.mkUSegdlensidxsn){-# INLINE_UP mkUPSegd #-}-- | Convert a global `USegd` to a parallel `UPSegd` by distributing -- it across the gang.fromUSegd::USegd->UPSegdfromUSegdsegd=UPSegdsegd(USegd.splitSegdOnElemsDtheGangsegd){-# INLINE_UP fromUSegd #-}-- | O(1). Construct an empty segment descriptor, with no elements or segments.empty::UPSegdempty=fromUSegdUSegd.empty{-# INLINE_UP empty #-}-- | O(1). Construct a singleton segment descriptor.-- The single segment covers the given number of elements.singleton::Int->UPSegdsingletonn=fromUSegd$USegd.singletonn{-# INLINE_UP singleton #-}-- | O(n). Convert an array of segment lengths into a parallel segment descriptor.-- -- The array contains the length of each segment, and we compute the -- indices from that. Runtime is O(n) in the number of segments.--fromLengths::VectorInt->UPSegdfromLengths=fromUSegd.USegd.fromLengths{-# INLINE_UP fromLengths #-}-- Projections ------------------------------------------------------------------ INLINE trivial projections as they'll expand to a single record selector.-- | O(1). Yield the overall number of segments.length::UPSegd->Intlength=USegd.length.upsegd_usegd{-# INLINE length #-}-- | O(1). Yield the global `USegd` of a `UPSegd`.takeUSegd::UPSegd->USegdtakeUSegd=upsegd_usegd{-# INLINE takeUSegd #-}-- | O(1). Yield the distributed `USegd` of a `UPSegd`.-- -- We get a plain `USegd` for each chunk, the segment id of the first-- slice in the chunk, and the starting offset of that slice in its segment.-- takeDistributed::UPSegd->Dist((USegd,Int),Int)takeDistributed=upsegd_dsegd{-# INLINE takeDistributed #-}-- | O(1). Yield the lengths of the individual segments.takeLengths::UPSegd->VectorInttakeLengths=USegd.takeLengths.upsegd_usegd{-# INLINE takeLengths #-}-- | O(1). Yield the segment indices.takeIndices::UPSegd->VectorInttakeIndices=USegd.takeIndices.upsegd_usegd{-# INLINE takeIndices #-}-- | O(1). Yield the total number of array elements.-- -- @takeElements upsegd = sum (takeLengths upsegd)@--takeElements::UPSegd->InttakeElements=USegd.takeElements.upsegd_usegd{-# INLINE takeElements #-}-- Indices ---------------------------------------------------------------------- | O(n). Yield a vector containing indicies that give the position of each -- member of the flat array in its corresponding segment.---- @indicesP (fromLengths [5, 2, 3]) = [0,1,2,3,4,0,1,0,1,2]@--indicesP::UPSegd->VectorIntindicesP=joinDtheGangbalanced.mapDtheGangindices.takeDistributedwhereindices((segd,_k),off)=Seq.indicesSU'offsegd{-# NOINLINE indicesP #-}-- NOINLINE because we're not using it yet.-- Replicate -------------------------------------------------------------------- | Copying segmented replication. Each element of the vector is physically -- copied according to the length of each segment in the segment descriptor.---- @replicateWith (fromLengths [3, 1, 2]) [5, 6, 7] = [5, 5, 5, 6, 7, 7]@--replicateWithP::Unboxa=>UPSegd->Vectora->VectorareplicateWithPsegd!xs=joinDtheGangbalanced.mapDtheGangrep$takeDistributedsegdwhererep((dsegd,di),_)=Seq.replicateSUdsegd$US.slice(here"replicateWithP")xsdi(USegd.lengthdsegd){-# INLINE_UP replicateWithP #-}-- Fold ------------------------------------------------------------------------- | Fold segments specified by a `UPSegd`.foldWithP::Unboxa=>(a->a->a)->a->UPSegd->Vectora->VectorafoldWithPf!z=foldSegsWithPf(Seq.foldlSUfz){-# INLINE_UP foldWithP #-}-- | Fold segments specified by a `UPSegd`, with a non-empty vector.fold1WithP::Unboxa=>(a->a->a)->UPSegd->Vectora->Vectorafold1WithPf=foldSegsWithPf(Seq.fold1SUf){-# INLINE_UP fold1WithP #-}-- | Sum up segments specified by a `UPSegd`.sumWithP::(Nume,Unboxe)=>UPSegd->Vectore->VectoresumWithP=foldWithP(+)0{-# INLINE_UP sumWithP #-}-- | Fold the segments specified by a `UPSegd`.---- This low level function takes a per-element worker and a per-segment worker.-- It folds all the segments with the per-segment worker, then uses the-- per-element worker to fixup the partial results when a segment -- is split across multiple threads.-- foldSegsWithP::Unboxa=>(a->a->a)->(USegd->Vectora->Vectora)->UPSegd->Vectora->Vectora{-# INLINE_UP foldSegsWithP #-}foldSegsWithPfElemfSegsegdxs=dcarry`seq`drs`seq`runST(domrs<-joinDMtheGangdrsfixupFoldfElemmrsdcarryUS.unsafeFreezemrs)where(dcarry,drs)=unzipD$mapDtheGangpartial$zipD(takeDistributedsegd)(splitDtheGangbalancedxs)partial(((segd',k),off),as)=letrs=fSegsegd'as{-# INLINE [0] n #-}n|off==0=0|otherwise=1in((k,US.takenrs),US.dropnrs)fixupFold::Unboxa=>(a->a->a)->MVectorsa->Dist(Int,Vectora)->STs(){-# NOINLINE fixupFold #-}fixupFoldf!mrs!dcarry=go1where!p=gangSizetheGanggoi|i>=p=return()|US.nullc=go(i+1)|otherwise=dox<-US.readmrskUS.writemrsk(fx(US.index(here"fixupFold")c0))go(i+1)where(k,c)=indexD(here"fixupFold")dcarryi