{-# OPTIONS -fno-warn-name-shadowing #-}moduleLanguage.Core.EncodingwhereimportData.CharimportNumeric-- tjc: TODO: Copied straight out of Encoding.hs.-- Ugh, maybe we can avoid this copy-pasta...-- ------------------------------------------------------------------------------- The Z-encoding{-
This is the main name-encoding and decoding function. It encodes any
string into a string that is acceptable as a C name. This is done
right before we emit a symbol name into the compiled C or asm code.
Z-encoding of strings is cached in the FastString interface, so we
never encode the same string more than once.
The basic encoding scheme is this.
* Tuples (,,,) are coded as Z3T
* Alphabetic characters (upper and lower) and digits
all translate to themselves;
except 'Z', which translates to 'ZZ'
and 'z', which translates to 'zz'
We need both so that we can preserve the variable/tycon distinction
* Most other printable characters translate to 'zx' or 'Zx' for some
alphabetic character x
* The others translate as 'znnnU' where 'nnn' is the decimal number
of the character
Before After
--------------------------
Trak Trak
foo_wib foozuwib
> zg
>1 zg1
foo# foozh
foo## foozhzh
foo##1 foozhzh1
fooZ fooZZ
:+ ZCzp
() Z0T 0-tuple
(,,,,) Z5T 5-tuple
(# #) Z1H unboxed 1-tuple (note the space)
(#,,,,#) Z5H unboxed 5-tuple
(NB: There is no Z1T nor Z0H.)
-}typeUserString=String-- As the user typed ittypeEncodedString=String-- Encoded formzEncodeString::UserString->EncodedStringzEncodeStringcs=casemaybe_tuplecsofJustn->n-- Tuples go to Z2T etcNothing->gocswherego[]=[]go(c:cs)=encode_chc++gocsunencodedChar::Char->Bool-- True for chars that don't need encodingunencodedChar'Z'=FalseunencodedChar'z'=FalseunencodedCharc=c>='a'&&c<='z'||c>='A'&&c<='Z'||c>='0'&&c<='9'encode_ch::Char->EncodedStringencode_chc|unencodedCharc=[c]-- Common case first-- Constructorsencode_ch'('="ZL"-- Needed for things like (,), and (->)encode_ch')'="ZR"-- For symmetry with (encode_ch'['="ZM"encode_ch']'="ZN"encode_ch':'="ZC"encode_ch'Z'="ZZ"-- Variablesencode_ch'z'="zz"encode_ch'&'="za"encode_ch'|'="zb"encode_ch'^'="zc"encode_ch'$'="zd"encode_ch'='="ze"encode_ch'>'="zg"encode_ch'#'="zh"encode_ch'.'="zi"encode_ch'<'="zl"encode_ch'-'="zm"encode_ch'!'="zn"encode_ch'+'="zp"encode_ch'\''="zq"encode_ch'\\'="zr"encode_ch'/'="zs"encode_ch'*'="zt"encode_ch'_'="zu"encode_ch'%'="zv"encode_chc='z':ifisDigit(headhex_str)thenhex_strelse'0':hex_strwherehex_str=showHex(ordc)"U"-- ToDo: we could improve the encoding here in various ways.-- eg. strings of unicode characters come out as 'z1234Uz5678U', we-- could remove the 'U' in the middle (the 'z' works as a separator).showHex=showIntAtBase16intToDigit-- needed because prior to GHC 6.2, Numeric.showHex added a "0x" prefixzDecodeString::EncodedString->UserStringzDecodeString[]=[]zDecodeString('Z':d:rest)|isDigitd=decode_tupledrest|otherwise=decode_upperd:zDecodeStringrestzDecodeString('z':d:rest)|isDigitd=decode_num_escdrest|otherwise=decode_lowerd:zDecodeStringrestzDecodeString(c:rest)=c:zDecodeStringrestdecode_upper,decode_lower::Char->Chardecode_upper'L'='('decode_upper'R'=')'decode_upper'M'='['decode_upper'N'=']'decode_upper'C'=':'decode_upper'Z'='Z'decode_upperch={-pprTrace "decode_upper" (char ch)-}chdecode_lower'z'='z'decode_lower'a'='&'decode_lower'b'='|'decode_lower'c'='^'decode_lower'd'='$'decode_lower'e'='='decode_lower'g'='>'decode_lower'h'='#'decode_lower'i'='.'decode_lower'l'='<'decode_lower'm'='-'decode_lower'n'='!'decode_lower'p'='+'decode_lower'q'='\''decode_lower'r'='\\'decode_lower's'='/'decode_lower't'='*'decode_lower'u'='_'decode_lower'v'='%'decode_lowerch={-pprTrace "decode_lower" (char ch)-}ch-- Characters not having a specific code are coded as z224U (in hex)decode_num_esc::Char->EncodedString->UserStringdecode_num_escdrest=go(digitToIntd)restwheregon(c:rest)|isHexDigitc=go(16*n+digitToIntc)restgon('U':rest)=chrn:zDecodeStringrestgonother=error("decode_num_esc: "++shown++' ':other)decode_tuple::Char->EncodedString->UserStringdecode_tupledrest=go(digitToIntd)restwhere-- NB. recurse back to zDecodeString after decoding the tuple, because-- the tuple might be embedded in a longer name.gon(c:rest)|isDigitc=go(10*n+digitToIntc)restgo0('T':rest)="()"++zDecodeStringrestgon('T':rest)='(':replicate(n-1)','++")"++zDecodeStringrestgo1('H':rest)="(# #)"++zDecodeStringrestgon('H':rest)='(':'#':replicate(n-1)','++"#)"++zDecodeStringrestgonother=error("decode_tuple: "++shown++' ':other){-
Tuples are encoded as
Z3T or Z3H
for 3-tuples or unboxed 3-tuples respectively. No other encoding starts
Z<digit>
* "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
There are no unboxed 0-tuples.
* "()" is the tycon for a boxed 0-tuple.
There are no boxed 1-tuples.
-}maybe_tuple::UserString->MaybeEncodedStringmaybe_tuple"(# #)"=Just("Z1H")maybe_tuple('(':'#':cs)=casecount_commas(0::Int)csof(n,'#':')':_)->Just('Z':shows(n+1)"H")_->Nothingmaybe_tuple"()"=Just("Z0T")maybe_tuple('(':cs)=casecount_commas(0::Int)csof(n,')':_)->Just('Z':shows(n+1)"T")_->Nothingmaybe_tuple_=Nothingcount_commas::Int->String->(Int,String)count_commasn(',':cs)=count_commas(n+1)cscount_commasncs=(n,cs)