Source code for tornado.httputil

#!/usr/bin/env python## Copyright 2009 Facebook## Licensed under the Apache License, Version 2.0 (the "License"); you may# not use this file except in compliance with the License. You may obtain# a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the# License for the specific language governing permissions and limitations# under the License."""HTTP utility code shared by clients and servers.This module also defines the `HTTPServerRequest` class which is exposedvia `tornado.web.RequestHandler.request`."""from__future__importabsolute_import,division,print_function,with_statementimportcalendarimportcollectionsimportcopyimportdatetimeimportemail.utilsimportnumbersimportreimporttimefromtornado.escapeimportnative_str,parse_qs_bytes,utf8fromtornado.logimportgen_logfromtornado.utilimportObjectDict,PY3ifPY3:importhttp.cookiesasCookiefromhttp.clientimportresponsesfromurllib.parseimporturlencodeelse:importCookiefromhttplibimportresponsesfromurllibimporturlencode# responses is unused in this file, but we re-export it to other files.# Reference it so pyflakes doesn't complain.responsestry:fromsslimportSSLErrorexceptImportError:# ssl is unavailable on app engine.class_SSLError(Exception):pass# Hack around a mypy limitation. We can't simply put "type: ignore"# on the class definition itself; must go through an assignment.SSLError=_SSLError# type: ignoretry:importtypingexceptImportError:pass# RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line# terminator and ignore any preceding CR._CRLF_RE=re.compile(r'\r?\n')class_NormalizedHeaderCache(dict):"""Dynamic cached mapping of header names to Http-Header-Case. Implemented as a dict subclass so that cache hits are as fast as a normal dict lookup, without the overhead of a python function call. >>> normalized_headers = _NormalizedHeaderCache(10) >>> normalized_headers["coNtent-TYPE"] 'Content-Type' """def__init__(self,size):super(_NormalizedHeaderCache,self).__init__()self.size=sizeself.queue=collections.deque()def__missing__(self,key):normalized="-".join([w.capitalize()forwinkey.split("-")])self[key]=normalizedself.queue.append(key)iflen(self.queue)>self.size:# Limit the size of the cache. LRU would be better, but this# simpler approach should be fine. In Python 2.7+ we could# use OrderedDict (or in 3.2+, @functools.lru_cache).old_key=self.queue.popleft()delself[old_key]returnnormalized_normalized_headers=_NormalizedHeaderCache(1000)

[docs]defadd(self,name,value):# type: (str, str) -> None"""Adds a new value for the given key."""norm_name=_normalized_headers[name]self._last_key=norm_nameifnorm_nameinself:self._dict[norm_name]=(native_str(self[norm_name])+','+native_str(value))self._as_list[norm_name].append(value)else:self[norm_name]=value

[docs]defget_list(self,name):"""Returns all values for the given header as a list."""norm_name=_normalized_headers[name]returnself._as_list.get(norm_name,[])

[docs]defget_all(self):# type: () -> typing.Iterable[typing.Tuple[str, str]]"""Returns an iterable of all (name, value) pairs. If a header has multiple values, multiple pairs will be returned with the same name. """forname,valuesinself._as_list.items():forvalueinvalues:yield(name,value)

# MutableMapping abstract method implementations.def__setitem__(self,name,value):norm_name=_normalized_headers[name]self._dict[norm_name]=valueself._as_list[norm_name]=[value]def__getitem__(self,name):# type: (str) -> strreturnself._dict[_normalized_headers[name]]def__delitem__(self,name):norm_name=_normalized_headers[name]delself._dict[norm_name]delself._as_list[norm_name]def__len__(self):returnlen(self._dict)def__iter__(self):returniter(self._dict)defcopy(self):# defined in dict but not in MutableMapping.returnHTTPHeaders(self)# Use our overridden copy method for the copy.copy module.# This makes shallow copies one level deeper, but preserves# the appearance that HTTPHeaders is a single container.__copy__=copydef__str__(self):lines=[]forname,valueinself.get_all():lines.append("%s: %s\n"%(name,value))return"".join(lines)__unicode__=__str__

[docs]classHTTPServerRequest(object):"""A single HTTP request. All attributes are type `str` unless otherwise noted. .. attribute:: method HTTP request method, e.g. "GET" or "POST" .. attribute:: uri The requested uri. .. attribute:: path The path portion of `uri` .. attribute:: query The query portion of `uri` .. attribute:: version HTTP version specified in request, e.g. "HTTP/1.1" .. attribute:: headers `.HTTPHeaders` dictionary-like object for request headers. Acts like a case-insensitive dictionary with additional methods for repeated headers. .. attribute:: body Request body, if present, as a byte string. .. attribute:: remote_ip Client's IP address as a string. If ``HTTPServer.xheaders`` is set, will pass along the real IP address provided by a load balancer in the ``X-Real-Ip`` or ``X-Forwarded-For`` header. .. versionchanged:: 3.1 The list format of ``X-Forwarded-For`` is now supported. .. attribute:: protocol The protocol used, either "http" or "https". If ``HTTPServer.xheaders`` is set, will pass along the protocol used by a load balancer if reported via an ``X-Scheme`` header. .. attribute:: host The requested hostname, usually taken from the ``Host`` header. .. attribute:: arguments GET/POST arguments are available in the arguments property, which maps arguments names to lists of values (to support multiple values for individual names). Names are of type `str`, while arguments are byte strings. Note that this is different from `.RequestHandler.get_argument`, which returns argument values as unicode strings. .. attribute:: query_arguments Same format as ``arguments``, but contains only arguments extracted from the query string. .. versionadded:: 3.2 .. attribute:: body_arguments Same format as ``arguments``, but contains only arguments extracted from the request body. .. versionadded:: 3.2 .. attribute:: files File uploads are available in the files property, which maps file names to lists of `.HTTPFile`. .. attribute:: connection An HTTP request is attached to a single HTTP connection, which can be accessed through the "connection" attribute. Since connections are typically kept open in HTTP/1.1, multiple requests can be handled sequentially on a single connection. .. versionchanged:: 4.0 Moved from ``tornado.httpserver.HTTPRequest``. """def__init__(self,method=None,uri=None,version="HTTP/1.0",headers=None,body=None,host=None,files=None,connection=None,start_line=None):ifstart_lineisnotNone:method,uri,version=start_lineself.method=methodself.uri=uriself.version=versionself.headers=headersorHTTPHeaders()self.body=bodyorb""# set remote IP and protocolcontext=getattr(connection,'context',None)self.remote_ip=getattr(context,'remote_ip',None)self.protocol=getattr(context,'protocol',"http")self.host=hostorself.headers.get("Host")or"127.0.0.1"self.files=filesor{}self.connection=connectionself._start_time=time.time()self._finish_time=Noneself.path,sep,self.query=uri.partition('?')self.arguments=parse_qs_bytes(self.query,keep_blank_values=True)self.query_arguments=copy.deepcopy(self.arguments)self.body_arguments={}

[docs]defsupports_http_1_1(self):"""Returns True if this request supports HTTP/1.1 semantics. .. deprecated:: 4.0 Applications are less likely to need this information with the introduction of `.HTTPConnection`. If you still need it, access the ``version`` attribute directly. """returnself.version=="HTTP/1.1"

@propertydefcookies(self):"""A dictionary of Cookie.Morsel objects."""ifnothasattr(self,"_cookies"):self._cookies=Cookie.SimpleCookie()if"Cookie"inself.headers:try:parsed=parse_cookie(self.headers["Cookie"])exceptException:passelse:fork,vinparsed.items():try:self._cookies[k]=vexceptException:# SimpleCookie imposes some restrictions on keys;# parse_cookie does not. Discard any cookies# with disallowed keys.passreturnself._cookies

[docs]defwrite(self,chunk,callback=None):"""Writes the given chunk to the response stream. .. deprecated:: 4.0 Use ``request.connection`` and the `.HTTPConnection` methods to write the response. """assertisinstance(chunk,bytes)assertself.version.startswith("HTTP/1."), \
"deprecated interface only supported in HTTP/1.x"self.connection.write(chunk,callback=callback)

[docs]deffinish(self):"""Finishes this HTTP request on the open connection. .. deprecated:: 4.0 Use ``request.connection`` and the `.HTTPConnection` methods to write the response. """self.connection.finish()self._finish_time=time.time()

[docs]deffull_url(self):"""Reconstructs the full URL for this request."""returnself.protocol+"://"+self.host+self.uri

[docs]defrequest_time(self):"""Returns the amount of time it took for this request to execute."""ifself._finish_timeisNone:returntime.time()-self._start_timeelse:returnself._finish_time-self._start_time

[docs]defget_ssl_certificate(self,binary_form=False):"""Returns the client's SSL certificate, if any. To use client certificates, the HTTPServer's `ssl.SSLContext.verify_mode` field must be set, e.g.:: ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) ssl_ctx.load_cert_chain("foo.crt", "foo.key") ssl_ctx.load_verify_locations("cacerts.pem") ssl_ctx.verify_mode = ssl.CERT_REQUIRED server = HTTPServer(app, ssl_options=ssl_ctx) By default, the return value is a dictionary (or None, if no client certificate is present). If ``binary_form`` is true, a DER-encoded form of the certificate is returned instead. See SSLSocket.getpeercert() in the standard library for more details. http://docs.python.org/library/ssl.html#sslsocket-objects """try:returnself.connection.stream.socket.getpeercert(binary_form=binary_form)exceptSSLError:returnNone

[docs]defstart_request(self,server_conn,request_conn):"""This method is called by the server when a new request has started. :arg server_conn: is an opaque object representing the long-lived (e.g. tcp-level) connection. :arg request_conn: is a `.HTTPConnection` object for a single request/response exchange. This method should return a `.HTTPMessageDelegate`. """raiseNotImplementedError()

[docs]defon_close(self,server_conn):"""This method is called when a connection has been closed. :arg server_conn: is a server connection that has previously been passed to ``start_request``. """pass

[docs]defheaders_received(self,start_line,headers):"""Called when the HTTP headers have been received and parsed. :arg start_line: a `.RequestStartLine` or `.ResponseStartLine` depending on whether this is a client or server message. :arg headers: a `.HTTPHeaders` instance. Some `.HTTPConnection` methods can only be called during ``headers_received``. May return a `.Future`; if it does the body will not be read until it is done. """pass

[docs]defdata_received(self,chunk):"""Called when a chunk of data has been received. May return a `.Future` for flow control. """pass

[docs]deffinish(self):"""Called after the last chunk of data has been received."""pass

[docs]defon_connection_close(self):"""Called if the connection is closed without finishing the request. If ``headers_received`` is called, either ``finish`` or ``on_connection_close`` will be called, but not both. """pass

[docs]classHTTPConnection(object):"""Applications use this interface to write their responses. .. versionadded:: 4.0 """

[docs]defwrite_headers(self,start_line,headers,chunk=None,callback=None):"""Write an HTTP header block. :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`. :arg headers: a `.HTTPHeaders` instance. :arg chunk: the first (optional) chunk of data. This is an optimization so that small responses can be written in the same call as their headers. :arg callback: a callback to be run when the write is complete. The ``version`` field of ``start_line`` is ignored. Returns a `.Future` if no callback is given. """raiseNotImplementedError()

[docs]defwrite(self,chunk,callback=None):"""Writes a chunk of body data. The callback will be run when the write is complete. If no callback is given, returns a Future. """raiseNotImplementedError()

[docs]deffinish(self):"""Indicates that the last body data has been written. """raiseNotImplementedError()

[docs]defurl_concat(url,args):"""Concatenate url and arguments regardless of whether url has existing query parameters. ``args`` may be either a dictionary or a list of key-value pairs (the latter allows for multiple values with the same key. >>> url_concat("http://example.com/foo", dict(c="d")) 'http://example.com/foo?c=d' >>> url_concat("http://example.com/foo?a=b", dict(c="d")) 'http://example.com/foo?a=b&c=d' >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")]) 'http://example.com/foo?a=b&c=d&c=d2' """ifnotargs:returnurlifurl[-1]notin('?','&'):url+='&'if('?'inurl)else'?'returnurl+urlencode(args)

[docs]defparse_body_arguments(content_type,body,arguments,files,headers=None):"""Parses a form request body. Supports ``application/x-www-form-urlencoded`` and ``multipart/form-data``. The ``content_type`` parameter should be a string and ``body`` should be a byte string. The ``arguments`` and ``files`` parameters are dictionaries that will be updated with the parsed contents. """ifheadersand'Content-Encoding'inheaders:gen_log.warning("Unsupported Content-Encoding: %s",headers['Content-Encoding'])returnifcontent_type.startswith("application/x-www-form-urlencoded"):try:uri_arguments=parse_qs_bytes(native_str(body),keep_blank_values=True)exceptExceptionase:gen_log.warning('Invalid x-www-form-urlencoded body: %s',e)uri_arguments={}forname,valuesinuri_arguments.items():ifvalues:arguments.setdefault(name,[]).extend(values)elifcontent_type.startswith("multipart/form-data"):try:fields=content_type.split(";")forfieldinfields:k,sep,v=field.strip().partition("=")ifk=="boundary"andv:parse_multipart_form_data(utf8(v),body,arguments,files)breakelse:raiseValueError("multipart boundary not found")exceptExceptionase:gen_log.warning("Invalid multipart/form-data: %s",e)

[docs]defparse_multipart_form_data(boundary,data,arguments,files):"""Parses a ``multipart/form-data`` body. The ``boundary`` and ``data`` parameters are both byte strings. The dictionaries given in the arguments and files parameters will be updated with the contents of the body. """# The standard allows for the boundary to be quoted in the header,# although it's rare (it happens at least for google app engine# xmpp). I think we're also supposed to handle backslash-escapes# here but I'll save that until we see a client that uses them# in the wild.ifboundary.startswith(b'"')andboundary.endswith(b'"'):boundary=boundary[1:-1]final_boundary_index=data.rfind(b"--"+boundary+b"--")iffinal_boundary_index==-1:gen_log.warning("Invalid multipart/form-data: no final boundary")returnparts=data[:final_boundary_index].split(b"--"+boundary+b"\r\n")forpartinparts:ifnotpart:continueeoh=part.find(b"\r\n\r\n")ifeoh==-1:gen_log.warning("multipart/form-data missing headers")continueheaders=HTTPHeaders.parse(part[:eoh].decode("utf-8"))disp_header=headers.get("Content-Disposition","")disposition,disp_params=_parse_header(disp_header)ifdisposition!="form-data"ornotpart.endswith(b"\r\n"):gen_log.warning("Invalid multipart/form-data")continuevalue=part[eoh+4:-2]ifnotdisp_params.get("name"):gen_log.warning("multipart/form-data value missing name")continuename=disp_params["name"]ifdisp_params.get("filename"):ctype=headers.get("Content-Type","application/unknown")files.setdefault(name,[]).append(HTTPFile(# type: ignorefilename=disp_params["filename"],body=value,content_type=ctype))else:arguments.setdefault(name,[]).append(value)

[docs]defformat_timestamp(ts):"""Formats a timestamp in the format used by HTTP. The argument may be a numeric timestamp as returned by `time.time`, a time tuple as returned by `time.gmtime`, or a `datetime.datetime` object. >>> format_timestamp(1359312200) 'Sun, 27 Jan 2013 18:43:20 GMT' """ifisinstance(ts,numbers.Real):passelifisinstance(ts,(tuple,time.struct_time)):ts=calendar.timegm(ts)elifisinstance(ts,datetime.datetime):ts=calendar.timegm(ts.utctimetuple())else:raiseTypeError("unknown timestamp type: %r"%ts)returnemail.utils.formatdate(ts,usegmt=True)

# _parseparam and _parse_header are copied and modified from python2.7's cgi.py# The original 2.7 version of this code did not correctly support some# combinations of semicolons and double quotes.# It has also been modified to support valueless parameters as seen in# websocket extension negotiations.def_parseparam(s):whiles[:1]==';':s=s[1:]end=s.find(';')whileend>0and(s.count('"',0,end)-s.count('\\"',0,end))%2:end=s.find(';',end+1)ifend<0:end=len(s)f=s[:end]yieldf.strip()s=s[end:]def_parse_header(line):"""Parse a Content-type like header. Return the main content-type and a dictionary of options. """parts=_parseparam(';'+line)key=next(parts)pdict={}forpinparts:i=p.find('=')ifi>=0:name=p[:i].strip().lower()value=p[i+1:].strip()iflen(value)>=2andvalue[0]==value[-1]=='"':value=value[1:-1]value=value.replace('\\\\','\\').replace('\\"','"')pdict[name]=valueelse:pdict[p]=Nonereturnkey,pdictdef_encode_header(key,pdict):"""Inverse of _parse_header. >>> _encode_header('permessage-deflate', ... {'client_max_window_bits': 15, 'client_no_context_takeover': None}) 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover' """ifnotpdict:returnkeyout=[key]# Sort the parameters just to make it easy to test.fork,vinsorted(pdict.items()):ifvisNone:out.append(k)else:# TODO: quote if necessary.out.append('%s=%s'%(k,v))return'; '.join(out)defdoctests():importdoctestreturndoctest.DocTestSuite()

[docs]defsplit_host_and_port(netloc):"""Returns ``(host, port)`` tuple from ``netloc``. Returned ``port`` will be ``None`` if not present. .. versionadded:: 4.1 """match=re.match(r'^(.+):(\d+)$',netloc)ifmatch:host=match.group(1)port=int(match.group(2))else:host=netlocport=Nonereturn(host,port)

_OctalPatt=re.compile(r"\\[0-3][0-7][0-7]")_QuotePatt=re.compile(r"[\\].")_nulljoin=''.joindef_unquote_cookie(str):"""Handle double quotes and escaping in cookie values. This method is copied verbatim from the Python 3.5 standard library (http.cookies._unquote) so we don't have to depend on non-public interfaces. """# If there aren't any doublequotes,# then there can't be any special characters. See RFC 2109.ifstrisNoneorlen(str)<2:returnstrifstr[0]!='"'orstr[-1]!='"':returnstr# We have to assume that we must decode this string.# Down to work.# Remove the "sstr=str[1:-1]# Check for special sequences. Examples:# \012 --> \n# \" --> "#i=0n=len(str)res=[]while0<=i<n:o_match=_OctalPatt.search(str,i)q_match=_QuotePatt.search(str,i)ifnoto_matchandnotq_match:# Neither matchedres.append(str[i:])break# else:j=k=-1ifo_match:j=o_match.start(0)ifq_match:k=q_match.start(0)ifq_matchand(noto_matchork<j):# QuotePatt matchedres.append(str[i:k])res.append(str[k+1])i=k+2else:# OctalPatt matchedres.append(str[i:j])res.append(chr(int(str[j+1:j+4],8)))i=j+4return_nulljoin(res)

[docs]defparse_cookie(cookie):"""Parse a ``Cookie`` HTTP header into a dict of name/value pairs. This function attempts to mimic browser cookie parsing behavior; it specifically does not follow any of the cookie-related RFCs (because browsers don't either). The algorithm used is identical to that used by Django version 1.9.10. .. versionadded:: 4.4.2 """cookiedict={}forchunkincookie.split(str(';')):ifstr('=')inchunk:key,val=chunk.split(str('='),1)else:# Assume an empty name per# https://bugzilla.mozilla.org/show_bug.cgi?id=169091key,val=str(''),chunkkey,val=key.strip(),val.strip()ifkeyorval:# unquote using Python's algorithm.cookiedict[key]=_unquote_cookie(val)returncookiedict