{-# OPTIONS_GHC -XNoImplicitPrelude #-}{-# OPTIONS -#include "WCsubst.h" #-}{-# OPTIONS_HADDOCK hide #-}------------------------------------------------------------------------------- |-- Module : GHC.Unicode-- Copyright : (c) The University of Glasgow, 2003-- License : see libraries/base/LICENSE-- -- Maintainer : cvs-ghc@haskell.org-- Stability : internal-- Portability : non-portable (GHC extensions)---- Implementations for the character predicates (isLower, isUpper, etc.)-- and the conversions (toUpper, toLower). The implementation uses-- libunicode on Unix systems if that is available.--------------------------------------------------------------------------------- #hidemoduleGHC.Unicode(isAscii,isLatin1,isControl,isAsciiUpper,isAsciiLower,isPrint,isSpace,isUpper,isLower,isAlpha,isDigit,isOctDigit,isHexDigit,isAlphaNum,toUpper,toLower,toTitle,wgencat,)whereimportGHC.BaseimportGHC.Real(fromIntegral)importForeign.C.Types(CInt)importGHC.Num(fromInteger)#include "HsBaseConfig.h"-- | Selects the first 128 characters of the Unicode character set,-- corresponding to the ASCII character set.isAscii::Char->BoolisAsciic=c<'\x80'-- | Selects the first 256 characters of the Unicode character set,-- corresponding to the ISO 8859-1 (Latin-1) character set.isLatin1::Char->BoolisLatin1c=c<='\xff'-- | Selects ASCII lower-case letters,-- i.e. characters satisfying both 'isAscii' and 'isLower'.isAsciiLower::Char->BoolisAsciiLowerc=c>='a'&&c<='z'-- | Selects ASCII upper-case letters,-- i.e. characters satisfying both 'isAscii' and 'isUpper'.isAsciiUpper::Char->BoolisAsciiUpperc=c>='A'&&c<='Z'-- | Selects control characters, which are the non-printing characters of-- the Latin-1 subset of Unicode.isControl::Char->Bool-- | Selects printable Unicode characters-- (letters, numbers, marks, punctuation, symbols and spaces).isPrint::Char->Bool-- | Selects white-space characters in the Latin-1 range.-- (In Unicode terms, this includes spaces and some control characters.)isSpace::Char->Bool-- isSpace includes non-breaking space-- Done with explicit equalities both for efficiency, and to avoid a tiresome-- recursion with GHC.List elemisSpacec=c==' '||c=='\t'||c=='\n'||c=='\r'||c=='\f'||c=='\v'||c=='\xa0'||iswspace(fromIntegral(ordc))/=0-- | Selects upper-case or title-case alphabetic Unicode characters (letters).-- Title case is used by a small number of letter ligatures like the-- single-character form of /Lj/.isUpper::Char->Bool-- | Selects lower-case alphabetic Unicode characters (letters).isLower::Char->Bool-- | Selects alphabetic Unicode characters (lower-case, upper-case and-- title-case letters, plus letters of caseless scripts and modifiers letters).-- This function is equivalent to 'Data.Char.isLetter'.isAlpha::Char->Bool-- | Selects alphabetic or numeric digit Unicode characters.---- Note that numeric digits outside the ASCII range are selected by this-- function but not by 'isDigit'. Such digits may be part of identifiers-- but are not used by the printer and reader to represent numbers.isAlphaNum::Char->Bool-- | Selects ASCII digits, i.e. @\'0\'@..@\'9\'@.isDigit::Char->BoolisDigitc=c>='0'&&c<='9'-- | Selects ASCII octal digits, i.e. @\'0\'@..@\'7\'@.isOctDigit::Char->BoolisOctDigitc=c>='0'&&c<='7'-- | Selects ASCII hexadecimal digits,-- i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@.isHexDigit::Char->BoolisHexDigitc=isDigitc||c>='A'&&c<='F'||c>='a'&&c<='f'-- | Convert a letter to the corresponding upper-case letter, if any.-- Any other character is returned unchanged.toUpper::Char->Char-- | Convert a letter to the corresponding lower-case letter, if any.-- Any other character is returned unchanged.toLower::Char->Char-- | Convert a letter to the corresponding title-case or upper-case-- letter, if any. (Title case differs from upper case only for a small-- number of ligature letters.)-- Any other character is returned unchanged.toTitle::Char->Char-- ------------------------------------------------------------------------------- Implementation with the supplied auto-generated Unicode character properties-- table (default)#if 1-- Regardless of the O/S and Library, use the functions contained in WCsubst.cisAlphac=iswalpha(fromIntegral(ordc))/=0isAlphaNumc=iswalnum(fromIntegral(ordc))/=0--isSpace c = iswspace (fromIntegral (ord c)) /= 0isControlc=iswcntrl(fromIntegral(ordc))/=0isPrintc=iswprint(fromIntegral(ordc))/=0isUpperc=iswupper(fromIntegral(ordc))/=0isLowerc=iswlower(fromIntegral(ordc))/=0toLowerc=chr(fromIntegral(towlower(fromIntegral(ordc))))toUpperc=chr(fromIntegral(towupper(fromIntegral(ordc))))toTitlec=chr(fromIntegral(towtitle(fromIntegral(ordc))))foreignimportccallunsafe"u_iswalpha"iswalpha::CInt->CIntforeignimportccallunsafe"u_iswalnum"iswalnum::CInt->CIntforeignimportccallunsafe"u_iswcntrl"iswcntrl::CInt->CIntforeignimportccallunsafe"u_iswspace"iswspace::CInt->CIntforeignimportccallunsafe"u_iswprint"iswprint::CInt->CIntforeignimportccallunsafe"u_iswlower"iswlower::CInt->CIntforeignimportccallunsafe"u_iswupper"iswupper::CInt->CIntforeignimportccallunsafe"u_towlower"towlower::CInt->CIntforeignimportccallunsafe"u_towupper"towupper::CInt->CIntforeignimportccallunsafe"u_towtitle"towtitle::CInt->CIntforeignimportccallunsafe"u_gencat"wgencat::CInt->CInt-- ------------------------------------------------------------------------------- No libunicode, so fall back to the ASCII-only implementation (never used, indeed)#elseisControlc=c<' '||c>='\DEL'&&c<='\x9f'isPrintc=not(isControlc)-- The upper case ISO characters have the multiplication sign dumped-- randomly in the middle of the range. Go figure.isUpperc=c>='A'&&c<='Z'||c>='\xC0'&&c<='\xD6'||c>='\xD8'&&c<='\xDE'-- The lower case ISO characters have the division sign dumped-- randomly in the middle of the range. Go figure.isLowerc=c>='a'&&c<='z'||c>='\xDF'&&c<='\xF6'||c>='\xF8'&&c<='\xFF'isAlphac=isLowerc||isUppercisAlphaNumc=isAlphac||isDigitc-- Case-changing operationstoUpperc@(C#c#)|isAsciiLowerc=C#(chr#(ord#c#-#32#))|isAsciic=c-- fall-through to the slower stuff.|isLowerc&&c/='\xDF'&&c/='\xFF'=unsafeChr(ordc`minusInt`ord'a'`plusInt`ord'A')|otherwise=ctoLowerc@(C#c#)|isAsciiUpperc=C#(chr#(ord#c#+#32#))|isAsciic=c|isUpperc=unsafeChr(ordc`minusInt`ord'A'`plusInt`ord'a')|otherwise=c#endif