Código fonte para django.utils.encoding

importcodecsimportdatetimeimportlocalefromdecimalimportDecimalfromurllib.parseimportquotefromdjango.utilsimportsixfromdjango.utils.functionalimportPromiseclassDjangoUnicodeDecodeError(UnicodeDecodeError):def__init__(self,obj,*args):self.obj=objsuper().__init__(*args)def__str__(self):return'%s. You passed in %r (%s)'%(super().__str__(),self.obj,type(self.obj))# For backwards compatibility. (originally in Django, then added to six 1.9)python_2_unicode_compatible=six.python_2_unicode_compatible

[documentos]defsmart_text(s,encoding='utf-8',strings_only=False,errors='strict'):""" Return a string representing 's'. Treat bytestrings using the 'encoding' codec. If strings_only is True, don't convert (some) non-string-like objects. """ifisinstance(s,Promise):# The input is the result of a gettext_lazy() call.returns

[documentos]defis_protected_type(obj):"""Determine if the object instance is of a protected type. Objects of protected types are preserved as-is when passed to force_text(strings_only=True). """

returnisinstance(obj,_PROTECTED_TYPES)

[documentos]defforce_text(s,encoding='utf-8',strings_only=False,errors='strict'):""" Similar to smart_text, except that lazy instances are resolved to strings, rather than kept as lazy objects. If strings_only is True, don't convert (some) non-string-like objects. """# Handle the common case first for performance reasons.ifissubclass(type(s),str):returnsifstrings_onlyandis_protected_type(s):returnstry:ifisinstance(s,bytes):s=str(s,encoding,errors)else:s=str(s)exceptUnicodeDecodeErrorase:raiseDjangoUnicodeDecodeError(s,*e.args)

returns

[documentos]defsmart_bytes(s,encoding='utf-8',strings_only=False,errors='strict'):""" Return a bytestring version of 's', encoded as specified in 'encoding'. If strings_only is True, don't convert (some) non-string-like objects. """ifisinstance(s,Promise):# The input is the result of a gettext_lazy() call.returns

returnforce_bytes(s,encoding,strings_only,errors)

[documentos]defforce_bytes(s,encoding='utf-8',strings_only=False,errors='strict'):""" Similar to smart_bytes, except that lazy instances are resolved to strings, rather than kept as lazy objects. If strings_only is True, don't convert (some) non-string-like objects. """# Handle the common case first for performance reasons.ifisinstance(s,bytes):ifencoding=='utf-8':returnselse:returns.decode('utf-8',errors).encode(encoding,errors)ifstrings_onlyandis_protected_type(s):returnsifisinstance(s,memoryview):returnbytes(s)ifisinstance(s,Promise)ornotisinstance(s,str):returnstr(s).encode(encoding,errors)else:

returns.encode(encoding,errors)smart_str=smart_textforce_str=force_textsmart_str.__doc__="""Apply smart_text in Python 3 and smart_bytes in Python 2.This is suitable for writing to sys.stdout (for instance)."""force_str.__doc__="""Apply force_text in Python 3 and force_bytes in Python 2."""

returnquote(iri,safe="/#%[]=:;$&()+,!?*@'~")# List of byte values that uri_to_iri() decodes from percent encoding.# First, the unreserved characters from RFC 3986:_ascii_ranges=[[45,46,95,126],range(65,91),range(97,123)]_hextobyte={(fmt%char).encode():bytes((char,))forascii_rangein_ascii_rangesforcharinascii_rangeforfmtin['%02x','%02X']}# And then everything above 128, because bytes ≥ 128 are part of multibyte# unicode characters._hexdig='0123456789ABCDEFabcdef'_hextobyte.update({(a+b).encode():bytes.fromhex(a+b)forain_hexdig[8:]forbin_hexdig})

[documentos]defuri_to_iri(uri):""" Convert a Uniform Resource Identifier(URI) into an Internationalized Resource Identifier(IRI). This is the algorithm from section 3.2 of RFC 3987, excluding step 4. Take an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and return a string containing the encoded result (e.g. '/I%20♥%20Django/'). """ifuriisNone:returnuriuri=force_bytes(uri)# Fast selective unqote: First, split on '%' and then starting with the# second block, decode the first 2 bytes if they represent a hex code to# decode. The rest of the block is the part after '%AB', not containing# any '%'. Add that to the output without further processing.bits=uri.split(b'%')iflen(bits)==1:iri=urielse:parts=[bits[0]]append=parts.appendhextobyte=_hextobyteforiteminbits[1:]:hex=item[:2]ifhexinhextobyte:append(hextobyte[item[:2]])append(item[2:])else:append(b'%')append(item)iri=b''.join(parts)

returnquote(path,safe="/:@&+$,-_.!~*'()")defrepercent_broken_unicode(path):""" As per section 3.2 of RFC 3987, step three of converting a URI into an IRI, repercent-encode any octet produced that is not part of a strictly legal UTF-8 octet sequence. """try:path.decode()exceptUnicodeDecodeErrorase:repercent=quote(path[e.start:e.end],safe=b"/#%[]=:;$&()+,!?*@'~")path=repercent_broken_unicode(path[:e.start]+force_bytes(repercent)+path[e.end:])returnpath

[documentos]deffilepath_to_uri(path):"""Convert a file system path to a URI portion that is suitable for inclusion in a URL. Encode certain chars that would normally be recognized as special chars for URIs. Do not encode the ' character, as it is a valid character within URIs. See the encodeURIComponent() JavaScript function for details. """ifpathisNone:returnpath# I know about `os.sep` and `os.altsep` but I want to leave# some flexibility for hardcoding separators.

returnquote(path.replace("\\","/"),safe="/~!*()'")defget_system_encoding():""" The encoding of the default system locale. Fallback to 'ascii' if the #encoding is unsupported by Python or could not be determined. See tickets #10335 and #5846. """try:encoding=locale.getdefaultlocale()[1]or'ascii'codecs.lookup(encoding)exceptException:encoding='ascii'returnencodingDEFAULT_LOCALE_ENCODING=get_system_encoding()