{-# LANGUAGE DeriveDataTypeable #-}------------------------------------------------------------------------------- |-- Module : Text.Trifecta.CharSet.Unicode-- Copyright : (c) Edward Kmett 2010-- License : BSD3-- Maintainer : ekmett@gmail.com-- Stability : experimental-- Portability : portable---- Provides unicode general categories, which are typically connoted by -- @\p{Ll}@ or @\p{Modifier_Letter}@. Lookups can be constructed using 'categories'-- or individual character sets can be used directly.-------------------------------------------------------------------------------moduleText.Trifecta.CharSet.Unicode(-- * Unicode General CategoryUnicodeCategory(..)-- * Lookup,unicodeCategories-- * CharSets by UnicodeCategory-- ** Letter,modifierLetter,otherLetter,letter-- *** Letter\&,lowercaseLetter,uppercaseLetter,titlecaseLetter,letterAnd-- ** Mark,nonSpacingMark,spacingCombiningMark,enclosingMark,mark-- ** Separator,space,lineSeparator,paragraphSeparator,separator-- ** Symbol,mathSymbol,currencySymbol,modifierSymbol,otherSymbol,symbol-- ** Number,decimalNumber,letterNumber,otherNumber,number-- ** Punctuation,dashPunctuation,openPunctuation,closePunctuation,initialQuote,finalQuote,connectorPunctuation,otherPunctuation,punctuation-- ** Other,control,format,privateUse,surrogate,notAssigned,other)whereimportData.CharimportData.DataimportText.Trifecta.CharSetdataUnicodeCategory=UnicodeCategoryStringStringCharSetStringderiving(Show,Data,Typeable)-- \p{Letter} or \p{Mc}unicodeCategories::[UnicodeCategory]unicodeCategories=[UnicodeCategory"Letter""L"letter"any kind of letter from any language.",UnicodeCategory"Lowercase_Letter""Ll"lowercaseLetter"a lowercase letter that has an uppercase variant",UnicodeCategory"Uppercase_Letter""Lu"uppercaseLetter"an uppercase letter that has a lowercase variant",UnicodeCategory"Titlecase_Letter""Lt"titlecaseLetter"a letter that appears at the start of a word when only the first letter of the word is capitalized",UnicodeCategory"Letter&""L&"letterAnd"a letter that exists in lowercase and uppercase variants (combination of Ll, Lu and Lt)",UnicodeCategory"Modifier_Letter""Lm"modifierLetter"a special character that is used like a letter",UnicodeCategory"Other_Letter""Lo"otherLetter"a letter or ideograph that does not have lowercase and uppercase variants",UnicodeCategory"Mark""M"mark"a character intended to be combined with another character (e.g. accents, umlauts, enclosing boxes, etc.)",UnicodeCategory"Non_Spacing_Mark""Mn"nonSpacingMark"a character intended to be combined with another character without taking up extra space (e.g. accents, umlauts, etc.)",UnicodeCategory"Spacing_Combining_Mark""Mc"spacingCombiningMark"a character intended to be combined with another character that takes up extra space (vowel signs in many Eastern languages)",UnicodeCategory"Enclosing_Mark""Me"enclosingMark"a character that encloses the character is is combined with (circle, square, keycap, etc.)",UnicodeCategory"Separator""Z"separator"any kind of whitespace or invisible separator",UnicodeCategory"Space_Separator""Zs"space"a whitespace character that is invisible, but does take up space",UnicodeCategory"Line_Separator""Zl"lineSeparator"line separator character U+2028",UnicodeCategory"Paragraph_Separator""Zp"paragraphSeparator"paragraph separator character U+2029",UnicodeCategory"Symbol""S"symbol"math symbols, currency signs, dingbats, box-drawing characters, etc.",UnicodeCategory"Math_Symbol""Sm"mathSymbol"any mathematical symbol",UnicodeCategory"Currency_Symbol""Sc"currencySymbol"any currency sign",UnicodeCategory"Modifier_Symbol""Sk"modifierSymbol"a combining character (mark) as a full character on its own",UnicodeCategory"Other_Symbol""So"otherSymbol"various symbols that are not math symbols, currency signs, or combining characters",UnicodeCategory"Number""N"number"any kind of numeric character in any script",UnicodeCategory"Decimal_Digit_Number""Nd"decimalNumber"a digit zero through nine in any script except ideographic scripts",UnicodeCategory"Letter_Number""Nl"letterNumber"a number that looks like a letter, such as a Roman numeral",UnicodeCategory"Other_Number""No"otherNumber"a superscript or subscript digit, or a number that is not a digit 0..9 (excluding numbers from ideographic scripts)",UnicodeCategory"Punctuation""P"punctuation"any kind of punctuation character",UnicodeCategory"Dash_Punctuation""Pd"dashPunctuation"any kind of hyphen or dash",UnicodeCategory"Open_Punctuation""Ps"openPunctuation"any kind of opening bracket",UnicodeCategory"Close_Punctuation""Pe"closePunctuation"any kind of closing bracket",UnicodeCategory"Initial_Punctuation""Pi"initialQuote"any kind of opening quote",UnicodeCategory"Final_Punctuation""Pf"finalQuote"any kind of closing quote",UnicodeCategory"Connector_Punctuation""Pc"connectorPunctuation"a punctuation character such as an underscore that connects words",UnicodeCategory"Other_Punctuation""Po"otherPunctuation"any kind of punctuation character that is not a dash, bracket, quote or connector",UnicodeCategory"Other""C"other"invisible control characters and unused code points",UnicodeCategory"Control""Cc"control"an ASCII 0x00..0x1F or Latin-1 0x80..0x9F control character",UnicodeCategory"Format""Cf"format"invisible formatting indicator",UnicodeCategory"Private_Use""Co"privateUse"any code point reserved for private use",UnicodeCategory"Surrogate""Cs"surrogate"one half of a surrogate pair in UTF-16 encoding",UnicodeCategory"Unassigned""Cn"notAssigned"any code point to which no character has been assigned.properties"]cat::GeneralCategory->CharSetcatcategory=build((category==).generalCategory)-- LetterlowercaseLetter,uppercaseLetter,titlecaseLetter,letterAnd,modifierLetter,otherLetter,letter::CharSetlowercaseLetter=catLowercaseLetteruppercaseLetter=catUppercaseLettertitlecaseLetter=catTitlecaseLetterletterAnd=lowercaseLetter`union`uppercaseLetter`union`titlecaseLettermodifierLetter=catModifierLetterotherLetter=catOtherLetterletter=letterAnd`union`modifierLetter`union`otherLetter-- MarksnonSpacingMark,spacingCombiningMark,enclosingMark,mark::CharSetnonSpacingMark=catNonSpacingMarkspacingCombiningMark=catSpacingCombiningMarkenclosingMark=catEnclosingMarkmark=nonSpacingMark`union`spacingCombiningMark`union`enclosingMarkspace,lineSeparator,paragraphSeparator,separator::CharSetspace=catSpacelineSeparator=catLineSeparatorparagraphSeparator=catParagraphSeparatorseparator=space`union`lineSeparator`union`paragraphSeparatormathSymbol,currencySymbol,modifierSymbol,otherSymbol,symbol::CharSetmathSymbol=catMathSymbolcurrencySymbol=catCurrencySymbolmodifierSymbol=catModifierSymbolotherSymbol=catOtherSymbolsymbol=mathSymbol`union`currencySymbol`union`modifierSymbol`union`otherSymboldecimalNumber,letterNumber,otherNumber,number::CharSetdecimalNumber=catDecimalNumberletterNumber=catLetterNumberotherNumber=catOtherNumbernumber=decimalNumber`union`letterNumber`union`otherNumberdashPunctuation,openPunctuation,closePunctuation,initialQuote,finalQuote,connectorPunctuation,otherPunctuation,punctuation::CharSetdashPunctuation=catDashPunctuationopenPunctuation=catOpenPunctuationclosePunctuation=catClosePunctuationinitialQuote=catInitialQuotefinalQuote=catFinalQuoteconnectorPunctuation=catConnectorPunctuationotherPunctuation=catOtherPunctuationpunctuation=dashPunctuation`union`openPunctuation`union`closePunctuation`union`initialQuote`union`finalQuote`union`connectorPunctuation`union`otherPunctuationcontrol,format,privateUse,surrogate,notAssigned,other::CharSetcontrol=catControlformat=catFormatprivateUse=catPrivateUsesurrogate=catSurrogatenotAssigned=catNotAssignedother=control`union`format`union`privateUse`union`surrogate`union`notAssigned