{-# LANGUAGE CPP #-}---------------------------------------------------------------------------------- |-- Module : Network.URI-- Copyright : (c) 2004, Graham Klyne-- License : BSD-style (see end of this file)---- Maintainer : Graham Klyne <gk@ninebynine.org>-- Stability : provisional-- Portability : portable---- This module defines functions for handling URIs. It presents substantially the-- same interface as the older GHC Network.URI module, but is implemented using-- Parsec rather than a Regex library that is not available with Hugs. The internal-- representation of URI has been changed so that URI strings are more-- completely preserved when round-tripping to a URI value and back.---- In addition, four methods are provided for parsing different-- kinds of URI string (as noted in RFC3986):-- 'parseURI',-- 'parseURIReference',-- 'parseRelativeReference' and-- 'parseAbsoluteURI'.---- Further, four methods are provided for classifying different-- kinds of URI string (as noted in RFC3986):-- 'isURI',-- 'isURIReference',-- 'isRelativeReference' and-- 'isAbsoluteURI'.---- The long-standing official reference for URI handling was RFC2396 [1],-- as updated by RFC 2732 [2], but this was replaced by a new specification,-- RFC3986 [3] in January 2005. This latter specification has been used-- as the primary reference for constructing the URI parser implemented-- here, and it is intended that there is a direct relationship between-- the syntax definition in that document and this parser implementation.---- RFC 1808 [4] contains a number of test cases for relative URI handling.-- Dan Connolly's Python module @uripath.py@ [5] also contains useful details-- and test cases.---- Some of the code has been copied from the previous GHC implementation,-- but the parser is replaced with one that performs more complete-- syntax checking of the URI itself, according to RFC3986 [3].---- References---- (1) <http://www.ietf.org/rfc/rfc2396.txt>---- (2) <http://www.ietf.org/rfc/rfc2732.txt>---- (3) <http://www.ietf.org/rfc/rfc3986.txt>---- (4) <http://www.ietf.org/rfc/rfc1808.txt>---- (5) <http://www.w3.org/2000/10/swap/uripath.py>----------------------------------------------------------------------------------moduleNetwork.URI(-- * The URI typeURI(..),URIAuth(..),nullURI-- * Parsing,parseURI,parseURIReference,parseRelativeReference,parseAbsoluteURI-- * Test for strings containing various kinds of URI,isURI,isURIReference,isRelativeReference,isAbsoluteURI,isIPv6address,isIPv4address-- * Predicates,uriIsAbsolute,uriIsRelative-- * Relative URIs,relativeTo,nonStrictRelativeTo,relativeFrom-- * Operations on URI strings-- | Support for putting strings into URI-friendly-- escaped format and getting them back again.-- This can't be done transparently in all cases, because certain-- characters have different meanings in different kinds of URI.-- The URI spec [3], section 2.4, indicates that all URI components-- should be escaped before they are assembled as a URI:-- \"Once produced, a URI is always in its percent-encoded form\",uriToString,isReserved,isUnreserved,isAllowedInURI,isUnescapedInURI,isUnescapedInURIComponent,escapeURIChar,escapeURIString,unEscapeString-- * URI Normalization functions,normalizeCase,normalizeEscape,normalizePathSegments-- * Deprecated functions,parseabsoluteURI,escapeString,reserved,unreserved,scheme,authority,path,query,fragment)whereimportText.ParserCombinators.Parsec(GenParser,ParseError,parse,(<|>),(<?>),try,option,many,many1,count,notFollowedBy,char,satisfy,oneOf,string,eof,unexpected)importControl.Monad(MonadPlus(..))importData.Char(ord,chr,isHexDigit,toLower,toUpper,digitToInt)importData.Bits((.|.),(.&.),shiftL,shiftR)importDebug.Trace(trace)importNumeric(showIntAtBase)importData.Typeable(Typeable)#if MIN_VERSION_base(4,0,0)importData.Data(Data)#elseimportData.Generics(Data)#endif-------------------------------------------------------------- The URI datatype-------------------------------------------------------------- |Represents a general universal resource identifier using-- its component parts.---- For example, for the URI---- > foo://anonymous@www.haskell.org:42/ghc?query#frag---- the components are:--dataURI=URI{uriScheme::String-- ^ @foo:@,uriAuthority::MaybeURIAuth-- ^ @\/\/anonymous\@www.haskell.org:42@,uriPath::String-- ^ @\/ghc@,uriQuery::String-- ^ @?query@,uriFragment::String-- ^ @#frag@}deriving(Eq,Ord,Typeable,Data)-- |Type for authority value within a URIdataURIAuth=URIAuth{uriUserInfo::String-- ^ @anonymous\@@,uriRegName::String-- ^ @www.haskell.org@,uriPort::String-- ^ @:42@}deriving(Eq,Ord,Show,Typeable,Data)-- |Blank URInullURI::URInullURI=URI{uriScheme="",uriAuthority=Nothing,uriPath="",uriQuery="",uriFragment=""}-- URI as instance of Show. Note that for security reasons, the default-- behaviour is to suppress any userinfo field (see RFC3986, section 7.5).-- This can be overridden by using uriToString directly with first-- argument @id@ (noting that this returns a ShowS value rather than a string).---- [[[Another design would be to embed the userinfo mapping function in-- the URIAuth value, with the default value suppressing userinfo formatting,-- but providing a function to return a new URI value with userinfo-- data exposed by show.]]]--instanceShowURIwhereshowsPrec_=uriToStringdefaultUserInfoMapdefaultUserInfoMap::String->StringdefaultUserInfoMapuinf=user++newpasswhere(user,pass)=break(==':')uinfnewpass=ifnullpass||(pass=="@")||(pass==":@")thenpasselse":...@"testDefaultUserInfoMap::[Bool]testDefaultUserInfoMap=[defaultUserInfoMap""=="",defaultUserInfoMap"@"=="@",defaultUserInfoMap"user@"=="user@",defaultUserInfoMap"user:@"=="user:@",defaultUserInfoMap"user:anonymous@"=="user:...@",defaultUserInfoMap"user:pass@"=="user:...@",defaultUserInfoMap"user:pass"=="user:...@",defaultUserInfoMap"user:anonymous"=="user:...@"]-------------------------------------------------------------- Parse a URI-------------------------------------------------------------- |Turn a string containing a URI into a 'URI'.-- Returns 'Nothing' if the string is not a valid URI;-- (an absolute URI with optional fragment identifier).---- NOTE: this is different from the previous network.URI,-- whose @parseURI@ function works like 'parseURIReference'-- in this module.--parseURI::String->MaybeURIparseURI=parseURIAnyuri-- |Parse a URI reference to a 'URI' value.-- Returns 'Nothing' if the string is not a valid URI reference.-- (an absolute or relative URI with optional fragment identifier).--parseURIReference::String->MaybeURIparseURIReference=parseURIAnyuriReference-- |Parse a relative URI to a 'URI' value.-- Returns 'Nothing' if the string is not a valid relative URI.-- (a relative URI with optional fragment identifier).--parseRelativeReference::String->MaybeURIparseRelativeReference=parseURIAnyrelativeRef-- |Parse an absolute URI to a 'URI' value.-- Returns 'Nothing' if the string is not a valid absolute URI.-- (an absolute URI without a fragment identifier).--parseAbsoluteURI::String->MaybeURIparseAbsoluteURI=parseURIAnyabsoluteURI-- |Test if string contains a valid URI-- (an absolute URI with optional fragment identifier).--isURI::String->BoolisURI=isValidParseuri-- |Test if string contains a valid URI reference-- (an absolute or relative URI with optional fragment identifier).--isURIReference::String->BoolisURIReference=isValidParseuriReference-- |Test if string contains a valid relative URI-- (a relative URI with optional fragment identifier).--isRelativeReference::String->BoolisRelativeReference=isValidParserelativeRef-- |Test if string contains a valid absolute URI-- (an absolute URI without a fragment identifier).--isAbsoluteURI::String->BoolisAbsoluteURI=isValidParseabsoluteURI-- |Test if string contains a valid IPv6 address--isIPv6address::String->BoolisIPv6address=isValidParseipv6address-- |Test if string contains a valid IPv4 address--isIPv4address::String->BoolisIPv4address=isValidParseipv4address-- |Test function: parse and reconstruct a URI reference--testURIReference::String->StringtestURIReferenceuristr=show(parseAlluriReference""uristr)-- Helper function for turning a string into a URI--parseURIAny::URIParserURI->String->MaybeURIparseURIAnyparseruristr=caseparseAllparser""uristrofLeft_->NothingRightu->Justu-- Helper function to test a string match to a parser--isValidParse::URIParsera->String->BoolisValidParseparseruristr=caseparseAllparser""uristrof-- Left e -> error (show e)Left_->FalseRight_->TrueparseAll::URIParsera->String->String->EitherParseErroraparseAllparserfilenameuristr=parsenewparserfilenameuristrwherenewparser=do{res<-parser;eof;returnres}-------------------------------------------------------------- Predicates------------------------------------------------------------uriIsAbsolute::URI->BooluriIsAbsolute(URI{uriScheme=scheme})=scheme/=""uriIsRelative::URI->BooluriIsRelative=not.uriIsAbsolute-------------------------------------------------------------- URI parser body based on Parsec elements and combinators-------------------------------------------------------------- Parser parser type.-- CurrentlytypeURIParsera=GenParserChar()a-- RFC3986, section 2.1---- Parse and return a 'pct-encoded' sequence--escaped::URIParserStringescaped=do{char'%';h1<-hexDigitChar;h2<-hexDigitChar;return$['%',h1,h2]}-- RFC3986, section 2.2---- |Returns 'True' if the character is a \"reserved\" character in a-- URI. To include a literal instance of one of these characters in a-- component of a URI, it must be escaped.--isReserved::Char->BoolisReservedc=isGenDelimsc||isSubDelimscisGenDelims::Char->BoolisGenDelimsc=c`elem`":/?#[]@"isSubDelims::Char->BoolisSubDelimsc=c`elem`"!$&'()*+,;="genDelims::URIParserStringgenDelims=do{c<-satisfyisGenDelims;return[c]}subDelims::URIParserStringsubDelims=do{c<-satisfyisSubDelims;return[c]}-- RFC3986, section 2.3---- |Returns 'True' if the character is an \"unreserved\" character in-- a URI. These characters do not need to be escaped in a URI. The-- only characters allowed in a URI are either \"reserved\",-- \"unreserved\", or an escape sequence (@%@ followed by two hex digits).--isUnreserved::Char->BoolisUnreservedc=isAlphaNumCharc||(c`elem`"-_.~")unreservedChar::URIParserStringunreservedChar=do{c<-satisfyisUnreserved;return[c]}-- RFC3986, section 3---- URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]---- hier-part = "//" authority path-abempty-- / path-abs-- / path-rootless-- / path-emptyuri::URIParserURIuri=do{us<-tryuscheme-- ; ua <- option Nothing ( do { try (string "//") ; uauthority } )-- ; up <- upath;(ua,up)<-hierPart;uq<-option""(do{char'?';uquery});uf<-option""(do{char'#';ufragment});return$URI{uriScheme=us,uriAuthority=ua,uriPath=up,uriQuery=uq,uriFragment=uf}}hierPart::URIParser((MaybeURIAuth),String)hierPart=do{try(string"//");ua<-uauthority;up<-pathAbEmpty;return(ua,up)}<|>do{up<-pathAbs;return(Nothing,up)}<|>do{up<-pathRootLess;return(Nothing,up)}<|>do{return(Nothing,"")}-- RFC3986, section 3.1uscheme::URIParserStringuscheme=do{s<-oneThenManyalphaChar(satisfyisSchemeChar);char':';return$s++":"}-- RFC3986, section 3.2uauthority::URIParser(MaybeURIAuth)uauthority=do{uu<-option""(tryuserinfo);uh<-host;up<-option""port;return$Just$URIAuth{uriUserInfo=uu,uriRegName=uh,uriPort=up}}-- RFC3986, section 3.2.1userinfo::URIParserStringuserinfo=do{uu<-many(uchar";:&=+$,");char'@';return(concatuu++"@")}-- RFC3986, section 3.2.2host::URIParserStringhost=ipLiteral<|>tryipv4address<|>regNameipLiteral::URIParserStringipLiteral=do{char'[';ua<-(ipv6address<|>ipvFuture);char']';return$"["++ua++"]"}<?>"IP address literal"ipvFuture::URIParserStringipvFuture=do{char'v';h<-hexDigitChar;char'.';a<-many1(satisfyisIpvFutureChar);return$'v':h:'.':a}isIpvFutureChar::Char->BoolisIpvFutureCharc=isUnreservedc||isSubDelimsc||(c==';')ipv6address::URIParserStringipv6address=try(do{a2<-count6h4c;a3<-ls32;return$concata2++a3})<|>try(do{string"::";a2<-count5h4c;a3<-ls32;return$"::"++concata2++a3})<|>try(do{a1<-opt_n_h4c_h40;string"::";a2<-count4h4c;a3<-ls32;return$a1++"::"++concata2++a3})<|>try(do{a1<-opt_n_h4c_h41;string"::";a2<-count3h4c;a3<-ls32;return$a1++"::"++concata2++a3})<|>try(do{a1<-opt_n_h4c_h42;string"::";a2<-count2h4c;a3<-ls32;return$a1++"::"++concata2++a3})<|>try(do{a1<-opt_n_h4c_h43;string"::";a2<-h4c;a3<-ls32;return$a1++"::"++a2++a3})<|>try(do{a1<-opt_n_h4c_h44;string"::";a3<-ls32;return$a1++"::"++a3})<|>try(do{a1<-opt_n_h4c_h45;string"::";a3<-h4;return$a1++"::"++a3})<|>try(do{a1<-opt_n_h4c_h46;string"::";return$a1++"::"})<?>"IPv6 address"opt_n_h4c_h4::Int->URIParserStringopt_n_h4c_h4n=option""$do{a1<-countMinMax0nh4c;a2<-h4;return$concata1++a2}ls32::URIParserStringls32=try(do{a1<-h4c;a2<-h4;return(a1++a2)})<|>ipv4addressh4c::URIParserStringh4c=try$do{a1<-h4;char':';notFollowedBy(char':');return$a1++":"}h4::URIParserStringh4=countMinMax14hexDigitCharipv4address::URIParserStringipv4address=do{a1<-decOctet;char'.';a2<-decOctet;char'.';a3<-decOctet;char'.';a4<-decOctet;notFollowedByregName;return$a1++"."++a2++"."++a3++"."++a4}<?>"IPv4 Address"decOctet::URIParserStringdecOctet=do{a1<-countMinMax13digitChar;if(reada1::Integer)>255thenfail"Decimal octet value too large"elsereturna1}regName::URIParserStringregName=do{ss<-countMinMax0255(unreservedChar<|>escaped<|>subDelims);return$concatss}<?>"Registered name"-- RFC3986, section 3.2.3port::URIParserStringport=do{char':';p<-manydigitChar;return(':':p)}---- RFC3986, section 3.3---- path = path-abempty ; begins with "/" or is empty-- / path-abs ; begins with "/" but not "//"-- / path-noscheme ; begins with a non-colon segment-- / path-rootless ; begins with a segment-- / path-empty ; zero characters---- path-abempty = *( "/" segment )-- path-abs = "/" [ segment-nz *( "/" segment ) ]-- path-noscheme = segment-nzc *( "/" segment )-- path-rootless = segment-nz *( "/" segment )-- path-empty = 0<pchar>---- segment = *pchar-- segment-nz = 1*pchar-- segment-nzc = 1*( unreserved / pct-encoded / sub-delims / "@" )---- pchar = unreserved / pct-encoded / sub-delims / ":" / "@"{-
upath :: URIParser String
upath = pathAbEmpty
<|> pathAbs
<|> pathNoScheme
<|> pathRootLess
<|> pathEmpty
-}pathAbEmpty::URIParserStringpathAbEmpty=do{ss<-manyslashSegment;return$concatss}pathAbs::URIParserStringpathAbs=do{char'/';ss<-option""pathRootLess;return$'/':ss}pathNoScheme::URIParserStringpathNoScheme=do{s1<-segmentNzc;ss<-manyslashSegment;return$concat(s1:ss)}pathRootLess::URIParserStringpathRootLess=do{s1<-segmentNz;ss<-manyslashSegment;return$concat(s1:ss)}slashSegment::URIParserStringslashSegment=do{char'/';s<-segment;return('/':s)}segment::URIParserStringsegment=do{ps<-manypchar;return$concatps}segmentNz::URIParserStringsegmentNz=do{ps<-many1pchar;return$concatps}segmentNzc::URIParserStringsegmentNzc=do{ps<-many1(uchar"@");return$concatps}pchar::URIParserStringpchar=uchar":@"-- helper function for pchar and friendsuchar::String->URIParserStringucharextras=unreservedChar<|>escaped<|>subDelims<|>do{c<-oneOfextras;return[c]}-- RFC3986, section 3.4uquery::URIParserStringuquery=do{ss<-many$uchar(":@"++"/?");return$'?':concatss}-- RFC3986, section 3.5ufragment::URIParserStringufragment=do{ss<-many$uchar(":@"++"/?");return$'#':concatss}-- Reference, Relative and Absolute URI forms---- RFC3986, section 4.1uriReference::URIParserURIuriReference=uri<|>relativeRef-- RFC3986, section 4.2---- relative-URI = relative-part [ "?" query ] [ "#" fragment ]---- relative-part = "//" authority path-abempty-- / path-abs-- / path-noscheme-- / path-emptyrelativeRef::URIParserURIrelativeRef=do{notMatchinguscheme-- ; ua <- option Nothing ( do { try (string "//") ; uauthority } )-- ; up <- upath;(ua,up)<-relativePart;uq<-option""(do{char'?';uquery});uf<-option""(do{char'#';ufragment});return$URI{uriScheme="",uriAuthority=ua,uriPath=up,uriQuery=uq,uriFragment=uf}}relativePart::URIParser((MaybeURIAuth),String)relativePart=do{try(string"//");ua<-uauthority;up<-pathAbEmpty;return(ua,up)}<|>do{up<-pathAbs;return(Nothing,up)}<|>do{up<-pathNoScheme;return(Nothing,up)}<|>do{return(Nothing,"")}-- RFC3986, section 4.3absoluteURI::URIParserURIabsoluteURI=do{us<-uscheme-- ; ua <- option Nothing ( do { try (string "//") ; uauthority } )-- ; up <- upath;(ua,up)<-hierPart;uq<-option""(do{char'?';uquery});return$URI{uriScheme=us,uriAuthority=ua,uriPath=up,uriQuery=uq,uriFragment=""}}-- Imports from RFC 2234-- NOTE: can't use isAlphaNum etc. because these deal with ISO 8859-- (and possibly Unicode!) chars.-- [[[Above was a comment originally in GHC Network/URI.hs:-- when IRIs are introduced then most codepoints above 128(?) should-- be treated as unreserved, and higher codepoints for letters should-- certainly be allowed.-- ]]]isAlphaChar::Char->BoolisAlphaCharc=(c>='A'&&c<='Z')||(c>='a'&&c<='z')isDigitChar::Char->BoolisDigitCharc=(c>='0'&&c<='9')isAlphaNumChar::Char->BoolisAlphaNumCharc=isAlphaCharc||isDigitCharcisHexDigitChar::Char->BoolisHexDigitCharc=isHexDigitcisSchemeChar::Char->BoolisSchemeCharc=(isAlphaNumCharc)||(c`elem`"+-.")alphaChar::URIParserCharalphaChar=satisfyisAlphaChar-- or: Parsec.letter ?digitChar::URIParserChardigitChar=satisfyisDigitChar-- or: Parsec.digit ?alphaNumChar::URIParserCharalphaNumChar=satisfyisAlphaNumCharhexDigitChar::URIParserCharhexDigitChar=satisfyisHexDigitChar-- or: Parsec.hexDigit ?-- Additional parser combinators for common patternsoneThenMany::GenParsertsa->GenParsertsa->GenParserts[a]oneThenManyp1pr=do{a1<-p1;ar<-manypr;return(a1:ar)}countMinMax::Int->Int->GenParsertsa->GenParserts[a]countMinMaxmnp|m>0=do{a1<-p;ar<-countMinMax(m-1)(n-1)p;return(a1:ar)}countMinMax_n_|n<=0=return[]countMinMax_np=option[]$do{a1<-p;ar<-countMinMax0(n-1)p;return(a1:ar)}notMatching::Showa=>GenParsertoksta->GenParsertokst()notMatchingp=do{a<-tryp;unexpected(showa)}<|>return()-------------------------------------------------------------- Reconstruct a URI string---------------------------------------------------------------- |Turn a 'URI' into a string.---- Uses a supplied function to map the userinfo part of the URI.---- The Show instance for URI uses a mapping that hides any password-- that may be present in the URI. Use this function with argument @id@-- to preserve the password in the formatted output.--uriToString::(String->String)->URI->ShowSuriToStringuserinfomapURI{uriScheme=myscheme,uriAuthority=myauthority,uriPath=mypath,uriQuery=myquery,uriFragment=myfragment}=(myscheme++).(uriAuthToStringuserinfomapmyauthority).(mypath++).(myquery++).(myfragment++)uriAuthToString::(String->String)->(MaybeURIAuth)->ShowSuriAuthToString_Nothing=id-- shows ""uriAuthToStringuserinfomap(JustURIAuth{uriUserInfo=myuinfo,uriRegName=myregname,uriPort=myport})=("//"++).(ifnullmyuinfothenidelse((userinfomapmyuinfo)++)).(myregname++).(myport++)-------------------------------------------------------------- Character classes-------------------------------------------------------------- | Returns 'True' if the character is allowed in a URI.--isAllowedInURI::Char->BoolisAllowedInURIc=isReservedc||isUnreservedc||c=='%'-- escape char-- | Returns 'True' if the character is allowed unescaped in a URI.--isUnescapedInURI::Char->BoolisUnescapedInURIc=isReservedc||isUnreservedc-- | Returns 'True' if the character is allowed unescaped in a URI component.--isUnescapedInURIComponent::Char->BoolisUnescapedInURIComponentc=not(isReservedc||not(isUnescapedInURIc))-------------------------------------------------------------- Escape sequence handling-------------------------------------------------------------- |Escape character if supplied predicate is not satisfied,-- otherwise return character as singleton string.--escapeURIChar::(Char->Bool)->Char->StringescapeURICharpc|pc=[c]|otherwise=concatMap(\i->'%':myShowHexi"")(utf8EncodeCharc)wheremyShowHex::Int->ShowSmyShowHexnr=caseshowIntAtBase16(toChrHex)nrof[]->"00"[x]->['0',x]cs->cstoChrHexd|d<10=chr(ord'0'+fromIntegrald)|otherwise=chr(ord'A'+fromIntegral(d-10))-- From http://hackage.haskell.org/package/utf8-string-- by Eric Mertens, BSD3-- Returns [Int] for use with showIntAtBaseutf8EncodeChar::Char->[Int]utf8EncodeChar=mapfromIntegral.go.ordwheregooc|oc<=0x7f=[oc]|oc<=0x7ff=[0xc0+(oc`shiftR`6),0x80+oc.&.0x3f]|oc<=0xffff=[0xe0+(oc`shiftR`12),0x80+((oc`shiftR`6).&.0x3f),0x80+oc.&.0x3f]|otherwise=[0xf0+(oc`shiftR`18),0x80+((oc`shiftR`12).&.0x3f),0x80+((oc`shiftR`6).&.0x3f),0x80+oc.&.0x3f]-- |Can be used to make a string valid for use in a URI.--escapeURIString::(Char->Bool)-- ^ a predicate which returns 'False'-- if the character should be escaped->String-- ^ the string to process->String-- ^ the resulting URI stringescapeURIStringps=concatMap(escapeURICharp)s-- |Turns all instances of escaped characters in the string back-- into literal characters.--unEscapeString::String->StringunEscapeString[]=""unEscapeStrings@(c:cs)=caseunEscapeBytesofJust(byte,rest)->unEscapeUtf8byterestNothing->c:unEscapeStringcsunEscapeByte::String->Maybe(Int,String)unEscapeByte('%':x1:x2:s)|isHexDigitx1&&isHexDigitx2=Just(digitToIntx1*16+digitToIntx2,s)unEscapeByte_=Nothing-- Adapted from http://hackage.haskell.org/package/utf8-string-- by Eric Mertens, BSD3unEscapeUtf8::Int->String->StringunEscapeUtf8crest|c<0x80=chrc:unEscapeStringrest|c<0xc0=replacement_character:unEscapeStringrest|c<0xe0=multi1|c<0xf0=multi_byte20xf0x800|c<0xf8=multi_byte30x70x10000|c<0xfc=multi_byte40x30x200000|c<0xfe=multi_byte50x10x4000000|otherwise=replacement_character:unEscapeStringrestwherereplacement_character='\xfffd'multi1=caseunEscapeByterestofJust(c1,ds)|c1.&.0xc0==0x80->letd=((fromEnumc.&.0x1f)`shiftL`6).|.fromEnum(c1.&.0x3f)inifd>=0x000080thentoEnumd:unEscapeStringdselsereplacement_character:unEscapeStringds_->replacement_character:unEscapeStringrestmulti_byteimaskoverlong=auxirest(unEscapeByterest)(c.&.mask)whereaux0rs_acc|overlong<=acc&&acc<=0x10ffff&&(acc<0xd800||0xdfff<acc)&&(acc<0xfffe||0xffff<acc)=chracc:unEscapeStringrs|otherwise=replacement_character:unEscapeStringrsauxn_(Just(r,rs))acc|r.&.0xc0==0x80=aux(n-1)rs(unEscapeByters)$!shiftLacc6.|.(r.&.0x3f)aux_rs__=replacement_character:unEscapeStringrs-------------------------------------------------------------- Resolving a relative URI relative to a base URI-------------------------------------------------------------- |Returns a new 'URI' which represents the value of the-- first 'URI' interpreted as relative to the second 'URI'.-- For example:---- > "foo" `relativeTo` "http://bar.org/" = "http://bar.org/foo"-- > "http:foo" `nonStrictRelativeTo` "http://bar.org/" = "http://bar.org/foo"---- Algorithm from RFC3986 [3], section 5.2.2--nonStrictRelativeTo::URI->URI->URInonStrictRelativeTorefbase=relativeToref'basewhereref'=ifuriSchemeref==uriSchemebasethenref{uriScheme=""}elserefisDefined::(MonadPlusm,Eq(ma))=>ma->BoolisDefineda=a/=mzero-- | Returns a new 'URI' which represents the value of the first 'URI'-- interpreted as relative to the second 'URI'.---- Algorithm from RFC3986 [3], section 5.2relativeTo::URI->URI->URIrelativeTorefbase|isDefined(uriSchemeref)=just_segmentsref|isDefined(uriAuthorityref)=just_segmentsref{uriScheme=uriSchemebase}|isDefined(uriPathref)=if(head(uriPathref)=='/')thenjust_segmentsref{uriScheme=uriSchemebase,uriAuthority=uriAuthoritybase}elsejust_segmentsref{uriScheme=uriSchemebase,uriAuthority=uriAuthoritybase,uriPath=mergePathsbaseref}|isDefined(uriQueryref)=just_segmentsref{uriScheme=uriSchemebase,uriAuthority=uriAuthoritybase,uriPath=uriPathbase}|otherwise=just_segmentsref{uriScheme=uriSchemebase,uriAuthority=uriAuthoritybase,uriPath=uriPathbase,uriQuery=uriQuerybase}wherejust_segmentsu=u{uriPath=removeDotSegments(uriPathu)}mergePathsbr|isDefined(uriAuthorityb)&&nullpb='/':pr|otherwise=dropLastpb++prwherepb=uriPathbpr=uriPathrdropLast=fst.splitLast-- reverse . dropWhile (/='/') . reverse-- Remove dot segments, but protect leading '/' characterremoveDotSegments::String->StringremoveDotSegments('/':ps)='/':elimDotsps[]removeDotSegmentsps=elimDotsps[]-- Second arg accumulates segments processed so far in reverse orderelimDots::String->[String]->String-- elimDots ps rs | traceVal "\nps " ps $ traceVal "rs " rs $ False = error ""elimDots[][]=""elimDots[]rs=concat(reversers)elimDots('.':'/':ps)rs=elimDotspsrselimDots('.':[])rs=elimDots[]rselimDots('.':'.':'/':ps)rs=elimDotsps(drop1rs)elimDots('.':'.':[])rs=elimDots[](drop1rs)elimDotspsrs=elimDotsps1(r:rs)where(r,ps1)=nextSegmentps-- Returns the next segment and the rest of the path from a path string.-- Each segment ends with the next '/' or the end of string.--nextSegment::String->(String,String)nextSegmentps=casebreak(=='/')psof(r,'/':ps1)->(r++"/",ps1)(r,_)->(r,[])-- Split last (name) segment from path, returning (path,name)splitLast::String->(String,String)splitLastp=(reverserevpath,reverserevname)where(revname,revpath)=break(=='/')$reversep-------------------------------------------------------------- Finding a URI relative to a base URI-------------------------------------------------------------- |Returns a new 'URI' which represents the relative location of-- the first 'URI' with respect to the second 'URI'. Thus, the-- values supplied are expected to be absolute URIs, and the result-- returned may be a relative URI.---- Example:---- > "http://example.com/Root/sub1/name2#frag"-- > `relativeFrom` "http://example.com/Root/sub2/name2#frag"-- > == "../sub1/name2#frag"---- There is no single correct implementation of this function,-- but any acceptable implementation must satisfy the following:---- > (uabs `relativeFrom` ubase) `relativeTo` ubase == uabs---- For any valid absolute URI.-- (cf. <http://lists.w3.org/Archives/Public/uri/2003Jan/0008.html>-- <http://lists.w3.org/Archives/Public/uri/2003Jan/0005.html>)--relativeFrom::URI->URI->URIrelativeFromuabsbase|diffuriSchemeuabsbase=uabs|diffuriAuthorityuabsbase=uabs{uriScheme=""}|diffuriPathuabsbase=uabs{uriScheme="",uriAuthority=Nothing,uriPath=relPathFrom(removeBodyDotSegments$uriPathuabs)(removeBodyDotSegments$uriPathbase)}|diffuriQueryuabsbase=uabs{uriScheme="",uriAuthority=Nothing,uriPath=""}|otherwise=uabs-- Always carry fragment from uabs{uriScheme="",uriAuthority=Nothing,uriPath="",uriQuery=""}wherediff::Eqb=>(a->b)->a->a->Booldiffselu1u2=selu1/=selu2-- Remove dot segments except the final segmentremoveBodyDotSegmentsp=removeDotSegmentsp1++p2where(p1,p2)=splitLastprelPathFrom::String->String->StringrelPathFrom[]_="/"relPathFrompabs[]=pabsrelPathFrompabsbase=-- Construct a relative path segmentsifsa1==sb1-- if the paths share a leading segmentthenif(sa1=="/")-- other than a leading '/'thenif(sa2==sb2)thenrelPathFrom1ra2rb2elsepabselserelPathFrom1ra1rb1elsepabswhere(sa1,ra1)=nextSegmentpabs(sb1,rb1)=nextSegmentbase(sa2,ra2)=nextSegmentra1(sb2,rb2)=nextSegmentrb1-- relPathFrom1 strips off trailing names from the supplied paths,-- and calls difPathFrom to find the relative path from base to-- targetrelPathFrom1::String->String->StringrelPathFrom1pabsbase=relNamewhere(sa,na)=splitLastpabs(sb,nb)=splitLastbaserp=relSegsFromsasbrelName=ifnullrpthenif(na==nb)then""elseifprotectnathen"./"++naelsenaelserp++na-- Precede name with some path if it is null or contains a ':'protects=nulls||':'`elem`s-- relSegsFrom discards any common leading segments from both paths,-- then invokes difSegsFrom to calculate a relative path from the end-- of the base path to the end of the target path.-- The final name is handled separately, so this deals only with-- "directory" segtments.--relSegsFrom::String->String->String{-
relSegsFrom sabs base
| traceVal "\nrelSegsFrom\nsabs " sabs $ traceVal "base " base $
False = error ""
-}relSegsFrom[][]=""-- paths are identicalrelSegsFromsabsbase=ifsa1==sb1thenrelSegsFromra1rb1elsedifSegsFromsabsbasewhere(sa1,ra1)=nextSegmentsabs(sb1,rb1)=nextSegmentbase-- difSegsFrom calculates a path difference from base to target,-- not including the final name at the end of the path-- (i.e. results always ends with '/')---- This function operates under the invariant that the supplied-- value of sabs is the desired path relative to the beginning of-- base. Thus, when base is empty, the desired path has been found.--difSegsFrom::String->String->String{-
difSegsFrom sabs base
| traceVal "\ndifSegsFrom\nsabs " sabs $ traceVal "base " base $
False = error ""
-}difSegsFromsabs""=sabsdifSegsFromsabsbase=difSegsFrom("../"++sabs)(snd$nextSegmentbase)-------------------------------------------------------------- Other normalization functions-------------------------------------------------------------- |Case normalization; cf. RFC3986 section 6.2.2.1-- NOTE: authority case normalization is not performed--normalizeCase::String->StringnormalizeCaseuristr=ncSchemeuristrwherencScheme(':':cs)=':':ncEscapecsncScheme(c:cs)|isSchemeCharc=toLowerc:ncSchemecsncScheme_=ncEscapeuristr-- no scheme presentncEscape('%':h1:h2:cs)='%':toUpperh1:toUpperh2:ncEscapecsncEscape(c:cs)=c:ncEscapecsncEscape[]=[]-- |Encoding normalization; cf. RFC3986 section 6.2.2.2--normalizeEscape::String->StringnormalizeEscape('%':h1:h2:cs)|isHexDigith1&&isHexDigith2&&isUnreservedescval=escval:normalizeEscapecswhereescval=chr(digitToInth1*16+digitToInth2)normalizeEscape(c:cs)=c:normalizeEscapecsnormalizeEscape[]=[]-- |Path segment normalization; cf. RFC3986 section 6.2.2.4--normalizePathSegments::String->StringnormalizePathSegmentsuristr=normstrjuriwherejuri=parseURIuristrnormstrNothing=uristrnormstr(Justu)=show(normuriu)normuriu=u{uriPath=removeDotSegments(uriPathu)}-------------------------------------------------------------- Local trace helper functions------------------------------------------------------------traceShow::Showa=>String->a->atraceShowmsgx=trace(msg++showx)xtraceVal::Showa=>String->a->b->btraceValmsgxy=trace(msg++showx)y-------------------------------------------------------------- Deprecated functions------------------------------------------------------------{-# DEPRECATED parseabsoluteURI "use parseAbsoluteURI" #-}parseabsoluteURI::String->MaybeURIparseabsoluteURI=parseAbsoluteURI{-# DEPRECATED escapeString "use escapeURIString, and note the flipped arguments" #-}escapeString::String->(Char->Bool)->StringescapeString=flipescapeURIString{-# DEPRECATED reserved "use isReserved" #-}reserved::Char->Boolreserved=isReserved{-# DEPRECATED unreserved "use isUnreserved" #-}unreserved::Char->Boolunreserved=isUnreserved-- Additional component access functions for backward compatibility{-# DEPRECATED scheme "use uriScheme" #-}scheme::URI->Stringscheme=orNullinit.uriScheme{-# DEPRECATED authority "use uriAuthority, and note changed functionality" #-}authority::URI->Stringauthority=dropss.($"").uriAuthToStringid.uriAuthoritywhere-- Old-style authority component does not include leading '//'dropss('/':'/':s)=sdropsss=s{-# DEPRECATED path "use uriPath" #-}path::URI->Stringpath=uriPath{-# DEPRECATED query "use uriQuery, and note changed functionality" #-}query::URI->Stringquery=orNulltail.uriQuery{-# DEPRECATED fragment "use uriFragment, and note changed functionality" #-}fragment::URI->Stringfragment=orNulltail.uriFragmentorNull::([a]->[a])->[a]->[a]orNull_[]=[]orNullfas=fas------------------------------------------------------------------------------------ Copyright (c) 2004, G. KLYNE. All rights reserved.-- Distributed as free software under the following license.---- Redistribution and use in source and binary forms, with or without-- modification, are permitted provided that the following conditions-- are met:---- - Redistributions of source code must retain the above copyright notice,-- this list of conditions and the following disclaimer.---- - Redistributions in binary form must reproduce the above copyright-- notice, this list of conditions and the following disclaimer in the-- documentation and/or other materials provided with the distribution.---- - Neither name of the copyright holders nor the names of its-- contributors may be used to endorse or promote products derived from-- this software without specific prior written permission.---- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT-- HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,-- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,-- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS-- OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR-- TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE-- USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.----------------------------------------------------------------------------------