diff options
Diffstat (limited to 'requests/packages/urllib3')
| -rw-r--r-- | requests/packages/urllib3/__init__.py | 40 | ||||
| -rw-r--r-- | requests/packages/urllib3/_collections.py | 167 | ||||
| -rw-r--r-- | requests/packages/urllib3/connectionpool.py | 561 | ||||
| -rw-r--r-- | requests/packages/urllib3/contrib/ntlmpool.py | 11 | ||||
| -rw-r--r-- | requests/packages/urllib3/contrib/pyopenssl.py | 344 | ||||
| -rw-r--r-- | requests/packages/urllib3/exceptions.py | 112 | ||||
| -rw-r--r-- | requests/packages/urllib3/fields.py | 177 | ||||
| -rw-r--r-- | requests/packages/urllib3/filepost.py | 94 | ||||
| -rw-r--r-- | requests/packages/urllib3/packages/ordered_dict.py | 260 | ||||
| -rw-r--r-- | requests/packages/urllib3/packages/six.py | 385 | ||||
| -rw-r--r-- | requests/packages/urllib3/packages/ssl_match_hostname/__init__.py | 67 | ||||
| -rw-r--r-- | requests/packages/urllib3/poolmanager.py | 238 | ||||
| -rw-r--r-- | requests/packages/urllib3/request.py | 57 | ||||
| -rw-r--r-- | requests/packages/urllib3/response.py | 204 | ||||
| -rw-r--r-- | requests/packages/urllib3/util.py | 626 |
15 files changed, 2840 insertions, 503 deletions
diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py index 20b1fb4..73071f7 100644 --- a/requests/packages/urllib3/__init__.py +++ b/requests/packages/urllib3/__init__.py @@ -1,5 +1,5 @@ # urllib3/__init__.py -# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -10,31 +10,25 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.0.2' +__version__ = 'dev' from .connectionpool import ( HTTPConnectionPool, HTTPSConnectionPool, - connection_from_url, - get_host, - make_headers) - - -from .exceptions import ( - HTTPError, - MaxRetryError, - SSLError, - TimeoutError) + connection_from_url +) +from . import exceptions +from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .filepost import encode_multipart_formdata +from .util import make_headers, get_host, Timeout # Set default logging handler to avoid "No handler found" warnings. import logging -try: +try: # Python 2.7+ from logging import NullHandler except ImportError: class NullHandler(logging.Handler): @@ -43,6 +37,22 @@ except ImportError: logging.getLogger(__name__).addHandler(NullHandler()) +def add_stderr_logger(level=logging.DEBUG): + """ + Helper for quickly adding a StreamHandler to the logger. Useful for + debugging. + + Returns the handler after adding it. + """ + # This method needs to be in this __init__.py to get the __name__ correct + # even if urllib3 is vendored within another package. + logger = logging.getLogger(__name__) + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) + logger.addHandler(handler) + logger.setLevel(level) + logger.debug('Added an stderr logging handler to logger: %s' % __name__) + return handler + # ... Clean up. -del logging del NullHandler diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index 00b2cd5..282b8d5 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -1,131 +1,94 @@ # urllib3/_collections.py -# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -from collections import deque - +from collections import MutableMapping from threading import RLock -__all__ = ['RecentlyUsedContainer'] - - -class AccessEntry(object): - __slots__ = ('key', 'is_valid') - - def __init__(self, key, is_valid=True): - self.key = key - self.is_valid = is_valid - - -class RecentlyUsedContainer(dict): - """ - Provides a dict-like that maintains up to ``maxsize`` keys while throwing - away the least-recently-used keys beyond ``maxsize``. - """ - - # If len(self.access_log) exceeds self._maxsize * CLEANUP_FACTOR, then we - # will attempt to cleanup the invalidated entries in the access_log - # datastructure during the next 'get' operation. - CLEANUP_FACTOR = 10 - - def __init__(self, maxsize=10): - self._maxsize = maxsize - - self._container = {} - - # We use a deque to to store our keys ordered by the last access. - self.access_log = deque() - self.access_log_lock = RLock() +try: # Python 2.7+ + from collections import OrderedDict +except ImportError: + from .packages.ordered_dict import OrderedDict - # We look up the access log entry by the key to invalidate it so we can - # insert a new authorative entry at the head without having to dig and - # find the old entry for removal immediately. - self.access_lookup = {} - # Trigger a heap cleanup when we get past this size - self.access_log_limit = maxsize * self.CLEANUP_FACTOR +__all__ = ['RecentlyUsedContainer'] - def _invalidate_entry(self, key): - "If exists: Invalidate old entry and return it." - old_entry = self.access_lookup.get(key) - if old_entry: - old_entry.is_valid = False - return old_entry +_Null = object() - def _push_entry(self, key): - "Push entry onto our access log, invalidate the old entry if exists." - self._invalidate_entry(key) - new_entry = AccessEntry(key) - self.access_lookup[key] = new_entry +class RecentlyUsedContainer(MutableMapping): + """ + Provides a thread-safe dict-like container which maintains up to + ``maxsize`` keys while throwing away the least-recently-used keys beyond + ``maxsize``. - self.access_log_lock.acquire() - self.access_log.appendleft(new_entry) - self.access_log_lock.release() + :param maxsize: + Maximum number of recent elements to retain. - def _prune_entries(self, num): - "Pop entries from our access log until we popped ``num`` valid ones." - while num > 0: - self.access_log_lock.acquire() - p = self.access_log.pop() - self.access_log_lock.release() + :param dispose_func: + Every time an item is evicted from the container, + ``dispose_func(value)`` is called. Callback which will get called + """ - if not p.is_valid: - continue # Invalidated entry, skip + ContainerCls = OrderedDict - dict.pop(self, p.key, None) - self.access_lookup.pop(p.key, None) - num -= 1 + def __init__(self, maxsize=10, dispose_func=None): + self._maxsize = maxsize + self.dispose_func = dispose_func - def _prune_invalidated_entries(self): - "Rebuild our access_log without the invalidated entries." - self.access_log_lock.acquire() - self.access_log = deque(e for e in self.access_log if e.is_valid) - self.access_log_lock.release() + self._container = self.ContainerCls() + self.lock = RLock() - def _get_ordered_access_keys(self): - "Return ordered access keys for inspection. Used for testing." - self.access_log_lock.acquire() - r = [e.key for e in self.access_log if e.is_valid] - self.access_log_lock.release() + def __getitem__(self, key): + # Re-insert the item, moving it to the end of the eviction line. + with self.lock: + item = self._container.pop(key) + self._container[key] = item + return item - return r + def __setitem__(self, key, value): + evicted_value = _Null + with self.lock: + # Possibly evict the existing value of 'key' + evicted_value = self._container.get(key, _Null) + self._container[key] = value - def __getitem__(self, key): - item = dict.get(self, key) + # If we didn't evict an existing value, we might have to evict the + # least recently used item from the beginning of the container. + if len(self._container) > self._maxsize: + _key, evicted_value = self._container.popitem(last=False) - if not item: - raise KeyError(key) + if self.dispose_func and evicted_value is not _Null: + self.dispose_func(evicted_value) - # Insert new entry with new high priority, also implicitly invalidates - # the old entry. - self._push_entry(key) + def __delitem__(self, key): + with self.lock: + value = self._container.pop(key) - if len(self.access_log) > self.access_log_limit: - # Heap is getting too big, try to clean up any tailing invalidated - # entries. - self._prune_invalidated_entries() + if self.dispose_func: + self.dispose_func(value) - return item + def __len__(self): + with self.lock: + return len(self._container) - def __setitem__(self, key, item): - # Add item to our container and access log - dict.__setitem__(self, key, item) - self._push_entry(key) + def __iter__(self): + raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') - # Discard invalid and excess entries - self._prune_entries(len(self) - self._maxsize) + def clear(self): + with self.lock: + # Copy pointers to all values, then wipe the mapping + # under Python 2, this copies the list of values twice :-| + values = list(self._container.values()) + self._container.clear() - def __delitem__(self, key): - self._invalidate_entry(key) - self.access_lookup.pop(key, None) - dict.__delitem__(self, key) + if self.dispose_func: + for value in values: + self.dispose_func(value) - def get(self, key, default=None): - try: - return self[key] - except KeyError: - return default + def keys(self): + with self.lock: + return self._container.keys() diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 17f2f84..691d4e2 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -1,43 +1,84 @@ # urllib3/connectionpool.py -# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php +import errno import logging + +from socket import error as SocketError, timeout as SocketTimeout import socket +try: # Python 3 + from http.client import HTTPConnection, HTTPException + from http.client import HTTP_PORT, HTTPS_PORT +except ImportError: + from httplib import HTTPConnection, HTTPException + from httplib import HTTP_PORT, HTTPS_PORT + +try: # Python 3 + from queue import LifoQueue, Empty, Full +except ImportError: + from Queue import LifoQueue, Empty, Full + import Queue as _ # Platform-specific: Windows -from httplib import HTTPConnection, HTTPSConnection, HTTPException -from Queue import Queue, Empty, Full -from select import select -from socket import error as SocketError, timeout as SocketTimeout -from .packages.ssl_match_hostname import match_hostname, CertificateError +try: # Compiled with SSL? + HTTPSConnection = object + + class BaseSSLError(BaseException): + pass + + ssl = None + + try: # Python 3 + from http.client import HTTPSConnection + except ImportError: + from httplib import HTTPSConnection -try: import ssl BaseSSLError = ssl.SSLError -except ImportError: - ssl = None - BaseSSLError = None + +except (ImportError, AttributeError): # Platform-specific: No SSL. + pass -from .request import RequestMethods -from .response import HTTPResponse from .exceptions import ( - SSLError, - MaxRetryError, - TimeoutError, - HostChangedError, + ClosedPoolError, + ConnectTimeoutError, EmptyPoolError, + HostChangedError, + MaxRetryError, + SSLError, + ReadTimeoutError, + ProxyError, +) +from .packages.ssl_match_hostname import CertificateError, match_hostname +from .packages import six +from .request import RequestMethods +from .response import HTTPResponse +from .util import ( + assert_fingerprint, + get_host, + is_connection_dropped, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, + Timeout, ) +xrange = six.moves.xrange log = logging.getLogger(__name__) _Default = object() +port_by_scheme = { + 'http': HTTP_PORT, + 'https': HTTPS_PORT, +} + ## Connection objects (extension of httplib) @@ -48,31 +89,55 @@ class VerifiedHTTPSConnection(HTTPSConnection): """ cert_reqs = None ca_certs = None + ssl_version = None def set_cert(self, key_file=None, cert_file=None, - cert_reqs='CERT_NONE', ca_certs=None): - ssl_req_scheme = { - 'CERT_NONE': ssl.CERT_NONE, - 'CERT_OPTIONAL': ssl.CERT_OPTIONAL, - 'CERT_REQUIRED': ssl.CERT_REQUIRED - } + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None): self.key_file = key_file self.cert_file = cert_file - self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE + self.cert_reqs = cert_reqs self.ca_certs = ca_certs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint def connect(self): # Add certificate verification - sock = socket.create_connection((self.host, self.port), self.timeout) + try: + sock = socket.create_connection( + address=(self.host, self.port), + timeout=self.timeout) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) + resolved_ssl_version = resolve_ssl_version(self.ssl_version) + + if self._tunnel_host: + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() # Wrap socket using verification with the root certs in # trusted_root_certs - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs) - if self.ca_certs: - match_hostname(self.sock.getpeercert(), self.host) + self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=resolved_cert_reqs, + ca_certs=self.ca_certs, + server_hostname=self.host, + ssl_version=resolved_ssl_version) + + if resolved_cert_reqs != ssl.CERT_NONE: + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif self.assert_hostname is not False: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or self.host) + ## Pool objects @@ -81,8 +146,23 @@ class ConnectionPool(object): Base class for all connection pools, such as :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. """ - pass + scheme = None + QueueCls = LifoQueue + + def __init__(self, host, port=None): + # httplib doesn't like it when we include brackets in ipv6 addresses + host = host.strip('[]') + + self.host = host + self.port = port + + def __str__(self): + return '%s(host=%r, port=%r)' % (type(self).__name__, + self.host, self.port) + +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) class HTTPConnectionPool(ConnectionPool, RequestMethods): """ @@ -101,9 +181,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): as a valid HTTP/1.0 or 1.1 status line, passed into :class:`httplib.HTTPConnection`. + .. note:: + Only works in Python 2. This parameter is ignored in Python 3. + :param timeout: - Socket timeout for each individual connection, can be a float. None - disables timeout. + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. :param maxsize: Number of connections to save that can be reused. More than 1 is useful @@ -121,19 +207,38 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param headers: Headers to include with all requests, unless other headers are given explicitly. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.connectionpool.ProxyManager`" + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.connectionpool.ProxyManager`" """ scheme = 'http' - def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, - block=False, headers=None): - self.host = host - self.port = port + def __init__(self, host, port=None, strict=False, + timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, + headers=None, _proxy=None, _proxy_headers=None): + ConnectionPool.__init__(self, host, port) + RequestMethods.__init__(self, headers) + self.strict = strict + + # This is for backwards compatibility and can be removed once a timeout + # can only be set to a Timeout object + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + self.timeout = timeout - self.pool = Queue(maxsize) + + self.pool = self.QueueCls(maxsize) self.block = block - self.headers = headers or {} + + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): @@ -150,7 +255,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - return HTTPConnection(host=self.host, port=self.port) + extra_params = {} + if not six.PY3: # Python 2 + extra_params['strict'] = self.strict + + return HTTPConnection(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + **extra_params) + def _get_conn(self, timeout=None): """ @@ -168,18 +280,21 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: conn = self.pool.get(block=self.block, timeout=timeout) - # If this is a persistent connection, check if it got disconnected - if conn and conn.sock and select([conn.sock], [], [], 0.0)[0]: - # Either data is buffered (bad), or the connection is dropped. - log.info("Resetting dropped connection: %s" % self.host) - conn.close() + except AttributeError: # self.pool is None + raise ClosedPoolError(self, "Pool is closed.") except Empty: if self.block: - raise EmptyPoolError("Pool reached maximum size and no more " + raise EmptyPoolError(self, + "Pool reached maximum size and no more " "connections are allowed.") pass # Oh well, we'll create a new connection then + # If this is a persistent connection, check if it got disconnected + if conn and is_connection_dropped(conn): + log.info("Resetting dropped connection: %s" % self.host) + conn.close() + return conn or self._new_conn() def _put_conn(self, conn): @@ -190,50 +305,147 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Connection object for the current host and port as returned by :meth:`._new_conn` or :meth:`._get_conn`. - If the pool is already full, the connection is discarded because we - exceeded maxsize. If connections are discarded frequently, then maxsize - should be increased. + If the pool is already full, the connection is closed and discarded + because we exceeded maxsize. If connections are discarded frequently, + then maxsize should be increased. + + If the pool is closed, then the connection will be closed and discarded. """ try: self.pool.put(conn, block=False) + return # Everything is dandy, done. + except AttributeError: + # self.pool is None. + pass except Full: # This should never happen if self.block == True log.warning("HttpConnectionPool is full, discarding connection: %s" % self.host) + # Connection never got put back into the pool, close it. + if conn: + conn.close() + + def _get_timeout(self, timeout): + """ Helper that always returns a :class:`urllib3.util.Timeout` """ + if timeout is _Default: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) + def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ Perform a request on a given httplib connection object taken from our pool. + + :param conn: + a connection from one of our connection pools + + :param timeout: + Socket timeout in seconds for the request. This can be a + float or integer, which will set the same timeout value for + the socket connect and the socket read, or an instance of + :class:`urllib3.util.Timeout`, which gives you more fine-grained + control over your timeouts. """ self.num_requests += 1 - if timeout is _Default: - timeout = self.timeout + timeout_obj = self._get_timeout(timeout) - conn.timeout = timeout # This only does anything in Py26+ + try: + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + # conn.request() calls httplib.*.request, not the method in + # request.py. It also calls makefile (recv) on the socket + conn.request(method, url, **httplib_request_kw) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, timeout_obj.connect_timeout)) - conn.request(method, url, **httplib_request_kw) - conn.sock.settimeout(timeout) - httplib_response = conn.getresponse() + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr + if hasattr(conn, 'sock') and \ + read_timeout is not None and \ + read_timeout is not Timeout.DEFAULT_TIMEOUT: + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, + "Read timed out. (read timeout=%s)" % read_timeout) + conn.sock.settimeout(read_timeout) - log.debug("\"%s %s %s\" %s %s" % - (method, url, - conn._http_vsn_str, # pylint: disable-msg=W0212 - httplib_response.status, httplib_response.length)) + # Receive the response from the server + try: + try: # Python 2.7+, use buffering of HTTP responses + httplib_response = conn.getresponse(buffering=True) + except TypeError: # Python 2.6 and older + httplib_response = conn.getresponse() + except SocketTimeout: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) + + except SocketError as e: # Platform-specific: Python 2 + # See the above comment about EAGAIN in Python 3. In Python 2 we + # have to specifically catch it and throw the timeout error + if e.errno in _blocking_errnos: + raise ReadTimeoutError( + self, url, + "Read timed out. (read timeout=%s)" % read_timeout) + raise + + # AppEngine doesn't have a version attr. + http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') + log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, + httplib_response.status, + httplib_response.length)) return httplib_response + def close(self): + """ + Close all pooled connections and disable the pool. + """ + # Disable access to the pool + old_pool, self.pool = self.pool, None + + try: + while True: + conn = old_pool.get(block=False) + if conn: + conn.close() + + except Empty: + pass # Done. def is_same_host(self, url): """ Check if the given ``url`` is a member of the same host as this - conncetion pool. + connection pool. """ + if url.startswith('/'): + return True + # TODO: Add optional support for socket.gethostbyname checking. - return (url.startswith('/') or - get_host(url) == (self.scheme, self.host, self.port)) + scheme, host, port = get_host(url) + + if self.port and not port: + # Use explicit default port for comparison when none is given. + port = port_by_scheme.get(scheme) + + return (scheme, host, port) == (self.scheme, self.host, self.port) def urlopen(self, method, url, body=None, headers=None, retries=3, redirect=True, assert_same_host=True, timeout=_Default, @@ -246,7 +458,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): .. note:: More commonly, it's appropriate to use a convenience method provided - by :class:`.RequestMethods`, such as :meth:`.request`. + by :class:`.RequestMethods`, such as :meth:`request`. .. note:: @@ -272,8 +484,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Number of retries to allow before raising a MaxRetryError exception. :param redirect: - Automatically handle redirects (status codes 301, 302, 303, 307), - each redirect counts as a retry. + If True, automatically handle redirects (status codes 301, 302, + 303, 307, 308). Each redirect counts as a retry. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -281,7 +493,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): use the pool on an HTTP proxy and request foreign hosts. :param timeout: - If specified, overrides the default timeout for this one request. + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. :param pool_timeout: If set and the pool is set to block=True, then this method will @@ -306,27 +520,19 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): headers = self.headers if retries < 0: - raise MaxRetryError(url) - - if timeout is _Default: - timeout = self.timeout + raise MaxRetryError(self, url) if release_conn is None: release_conn = response_kw.get('preload_content', True) # Check host if assert_same_host and not self.is_same_host(url): - host = "%s://%s" % (self.scheme, self.host) - if self.port: - host = "%s:%d" % (host, self.port) - - raise HostChangedError(host, url, retries - 1) + raise HostChangedError(self, url, retries - 1) conn = None try: # Request a connection from the queue - # (Could raise SocketError: Bad file descriptor) conn = self._get_conn(timeout=pool_timeout) # Make the request on the httplib connection object @@ -351,45 +557,65 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.release_conn()`` is called (implicitly by # ``response.read()``) - except (Empty), e: + except Empty: # Timed out by queue - raise TimeoutError("Request timed out. (pool_timeout=%s)" % - pool_timeout) + raise ReadTimeoutError( + self, url, "Read timed out, no pool connections are available.") - except (SocketTimeout), e: + except SocketTimeout: # Timed out by socket - raise TimeoutError("Request timed out. (timeout=%s)" % - timeout) + raise ReadTimeoutError(self, url, "Read timed out.") - except (BaseSSLError), e: + except BaseSSLError as e: # SSL certificate error + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") raise SSLError(e) - except (CertificateError), e: + except CertificateError as e: # Name mismatch raise SSLError(e) - except (HTTPException, SocketError), e: + except (HTTPException, SocketError) as e: + if isinstance(e, SocketError) and self.proxy is not None: + raise ProxyError('Cannot connect to proxy. ' + 'Socket error: %s.' % e) + # Connection broken, discard. It will be replaced next _get_conn(). conn = None + # This is necessary so we can access e below + err = e + + if retries == 0: + raise MaxRetryError(self, url, e) finally: - if conn and release_conn: - # Put the connection back to be reused + if release_conn: + # Put the connection back to be reused. If the connection is + # expired then it will be None, which will get replaced with a + # fresh connection during _get_conn. self._put_conn(conn) if not conn: + # Try again log.warn("Retrying (%d attempts remain) after connection " - "broken by '%r': %s" % (retries, e, url)) + "broken by '%r': %s" % (retries, err, url)) return self.urlopen(method, url, body, headers, retries - 1, - redirect, assert_same_host) # Try again + redirect, assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) # Handle redirect? redirect_location = redirect and response.get_redirect_location() if redirect_location: + if response.status == 303: + method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, body, headers, - retries - 1, redirect, assert_same_host) + retries - 1, redirect, assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) return response @@ -400,11 +626,16 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:httplib.HTTPSConnection`. + instead of :class:`httplib.HTTPSConnection`. - The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters - are only used if :mod:`ssl` is available and are fed into - :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket. + :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, + ``assert_hostname`` and ``host`` in this order to verify connections. + If ``assert_hostname`` is False, no verification is done. + + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and + ``ssl_version`` are only used if :mod:`ssl` is available and are fed into + :meth:`urllib3.util.ssl_wrap_socket` to upgrade the connection socket + into an SSL socket. """ scheme = 'https' @@ -412,16 +643,48 @@ class HTTPSConnectionPool(HTTPConnectionPool): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None, - key_file=None, cert_file=None, - cert_reqs='CERT_NONE', ca_certs=None): + _proxy=None, _proxy_headers=None, + key_file=None, cert_file=None, cert_reqs=None, + ca_certs=None, ssl_version=None, + assert_hostname=None, assert_fingerprint=None): - super(HTTPSConnectionPool, self).__init__(host, port, - strict, timeout, maxsize, - block, headers) + HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, + block, headers, _proxy, _proxy_headers) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs self.ca_certs = ca_certs + self.ssl_version = ssl_version + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def _prepare_conn(self, connection): + """ + Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` + and establish the tunnel if proxy is used. + """ + + if isinstance(connection, VerifiedHTTPSConnection): + connection.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + connection.ssl_version = self.ssl_version + + if self.proxy is not None: + # Python 2.7+ + try: + set_tunnel = connection.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = connection._set_tunnel + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib + # would improperly set Host: header to proxy's IP:port. + connection.connect() + + return connection def _new_conn(self): """ @@ -431,94 +694,28 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) - if not ssl: - return HTTPSConnection(host=self.host, port=self.port) - - connection = VerifiedHTTPSConnection(host=self.host, port=self.port) - connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, - cert_reqs=self.cert_reqs, ca_certs=self.ca_certs) - return connection - - -## Helpers - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" + actual_host = self.host + actual_port = self.port + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - Example: :: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) + if not ssl: # Platform-specific: Python compiled without +ssl + if not HTTPSConnection or HTTPSConnection is object: + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + connection_class = HTTPSConnection else: - accept_encoding = 'gzip,deflate' - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' + connection_class = VerifiedHTTPSConnection - if basic_auth: - headers['authorization'] = 'Basic ' + \ - basic_auth.encode('base64').strip() + extra_params = {} + if not six.PY3: # Python 2 + extra_params['strict'] = self.strict + connection = connection_class(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + **extra_params) - return headers - - -def get_host(url): - """ - Given a url, return its scheme, host and port (None if it's not there). - - For example: :: - - >>> get_host('http://google.com/mail/') - ('http', 'google.com', None) - >>> get_host('google.com:80') - ('http', 'google.com', 80) - """ - # This code is actually similar to urlparse.urlsplit, but much - # simplified for our needs. - port = None - scheme = 'http' - if '://' in url: - scheme, url = url.split('://', 1) - if '/' in url: - url, _path = url.split('/', 1) - if '@' in url: - _auth, url = url.split('@', 1) - if ':' in url: - url, port = url.split(':', 1) - port = int(port) - return scheme, url, port + return self._prepare_conn(connection) def connection_from_url(url, **kw): diff --git a/requests/packages/urllib3/contrib/ntlmpool.py b/requests/packages/urllib3/contrib/ntlmpool.py index c5f010e..b8cd933 100644 --- a/requests/packages/urllib3/contrib/ntlmpool.py +++ b/requests/packages/urllib3/contrib/ntlmpool.py @@ -1,5 +1,5 @@ # urllib3/contrib/ntlmpool.py -# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -10,7 +10,10 @@ NTLM authenticating pool, contributed by erikcederstran Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 """ -import httplib +try: + from http.client import HTTPSConnection +except ImportError: + from httplib import HTTPSConnection from logging import getLogger from ntlm import ntlm @@ -30,7 +33,7 @@ class NTLMConnectionPool(HTTPSConnectionPool): def __init__(self, user, pw, authurl, *args, **kwargs): """ authurl is a random URL on the server that is protected by NTLM. - user is the Windows user, probably in the DOMAIN\username format. + user is the Windows user, probably in the DOMAIN\\username format. pw is the password for the user. """ super(NTLMConnectionPool, self).__init__(*args, **kwargs) @@ -53,7 +56,7 @@ class NTLMConnectionPool(HTTPSConnectionPool): req_header = 'Authorization' resp_header = 'www-authenticate' - conn = httplib.HTTPSConnection(host=self.host, port=self.port) + conn = HTTPSConnection(host=self.host, port=self.port) # Send negotiation message headers[req_header] = ( diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py new file mode 100644 index 0000000..d43bcd6 --- /dev/null +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -0,0 +1,344 @@ +'''SSL with SNI-support for Python 2. + +This needs the following packages installed: + +* pyOpenSSL (tested with 0.13) +* ndg-httpsclient (tested with 0.3.2) +* pyasn1 (tested with 0.1.6) + +To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`. +This can be done in a ``sitecustomize`` module, or at any other time before +your application begins using ``urllib3``, like this:: + + try: + import urllib3.contrib.pyopenssl + urllib3.contrib.pyopenssl.inject_into_urllib3() + except ImportError: + pass + +Now you can use :mod:`urllib3` as you normally would, and it will support SNI +when the required modules are installed. +''' + +from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT +from ndg.httpsclient.subj_alt_name import SubjectAltName +import OpenSSL.SSL +from pyasn1.codec.der import decoder as der_decoder +from socket import _fileobject +import ssl +from cStringIO import StringIO + +from .. import connectionpool +from .. import util + +__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] + +# SNI only *really* works if we can read the subjectAltName of certificates. +HAS_SNI = SUBJ_ALT_NAME_SUPPORT + +# Map from urllib3 to PyOpenSSL compatible parameter-values. +_openssl_versions = { + ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, + ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, + ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, +} +_openssl_verify = { + ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, + ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, + ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER + + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, +} + + +orig_util_HAS_SNI = util.HAS_SNI +orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket + + +def inject_into_urllib3(): + 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' + + connectionpool.ssl_wrap_socket = ssl_wrap_socket + util.HAS_SNI = HAS_SNI + + +def extract_from_urllib3(): + 'Undo monkey-patching by :func:`inject_into_urllib3`.' + + connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket + util.HAS_SNI = orig_util_HAS_SNI + + +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +def get_subj_alt_name(peer_cert): + # Search through extensions + dns_name = [] + if not SUBJ_ALT_NAME_SUPPORT: + return dns_name + + general_names = SubjectAltName() + for i in range(peer_cert.get_extension_count()): + ext = peer_cert.get_extension(i) + ext_name = ext.get_short_name() + if ext_name != 'subjectAltName': + continue + + # PyOpenSSL returns extension data in ASN.1 encoded form + ext_dat = ext.get_data() + decoded_dat = der_decoder.decode(ext_dat, + asn1Spec=general_names) + + for name in decoded_dat: + if not isinstance(name, SubjectAltName): + continue + for entry in range(len(name)): + component = name.getComponentByPosition(entry) + if component.getName() != 'dNSName': + continue + dns_name.append(str(component.getComponent())) + + return dns_name + + +class fileobject(_fileobject): + + def read(self, size=-1): + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + if size < 0: + # Read until EOF + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + try: + data = self._sock.recv(left) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + def readline(self, size=-1): + buf = self._rbuf + buf.seek(0, 2) # seek end + if buf.tell() > 0: + # check if we already have it in our buffer + buf.seek(0) + bline = buf.readline(size) + if bline.endswith('\n') or len(bline) == size: + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return bline + del bline + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + buf.seek(0) + buffers = [buf.read()] + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + data = None + recv = self._sock.recv + while True: + try: + while data != "\n": + data = recv(1) + if not data: + break + buffers.append(data) + except OpenSSL.SSL.WantReadError: + continue + break + return "".join(buffers) + + buf.seek(0, 2) # seek end + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + nl = data.find('\n') + if nl >= 0: + nl += 1 + buf.write(data[:nl]) + self._rbuf.write(data[nl:]) + del data + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or \n or EOF seen, whichever comes first + buf.seek(0, 2) # seek end + buf_len = buf.tell() + if buf_len >= size: + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + left = size - buf_len + # did we just receive a newline? + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + # save the excess data to _rbuf + self._rbuf.write(data[nl:]) + if buf_len: + buf.write(data[:nl]) + break + else: + # Shortcut. Avoid data copy through buf when returning + # a substring of our first recv(). + return data[:nl] + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid data copy through buf when + # returning exactly all of our first recv(). + return data + if n >= left: + buf.write(data[:left]) + self._rbuf.write(data[left:]) + break + buf.write(data) + buf_len += n + #assert buf_len == buf.tell() + return buf.getvalue() + + +class WrappedSocket(object): + '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' + + def __init__(self, connection, socket): + self.connection = connection + self.socket = socket + + def fileno(self): + return self.socket.fileno() + + def makefile(self, mode, bufsize=-1): + return fileobject(self.connection, mode, bufsize) + + def settimeout(self, timeout): + return self.socket.settimeout(timeout) + + def sendall(self, data): + return self.connection.sendall(data) + + def close(self): + return self.connection.shutdown() + + def getpeercert(self, binary_form=False): + x509 = self.connection.get_peer_certificate() + + if not x509: + return x509 + + if binary_form: + return OpenSSL.crypto.dump_certificate( + OpenSSL.crypto.FILETYPE_ASN1, + x509) + + return { + 'subject': ( + (('commonName', x509.get_subject().CN),), + ), + 'subjectAltName': [ + ('DNS', value) + for value in get_subj_alt_name(x509) + ] + } + + +def _verify_callback(cnx, x509, err_no, err_depth, return_code): + return err_no == 0 + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) + if certfile: + ctx.use_certificate_file(certfile) + if keyfile: + ctx.use_privatekey_file(keyfile) + if cert_reqs != ssl.CERT_NONE: + ctx.set_verify(_openssl_verify[cert_reqs], _verify_callback) + if ca_certs: + try: + ctx.load_verify_locations(ca_certs, None) + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + + cnx = OpenSSL.SSL.Connection(ctx, sock) + cnx.set_tlsext_host_name(server_hostname) + cnx.set_connect_state() + while True: + try: + cnx.do_handshake() + except OpenSSL.SSL.WantReadError: + continue + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake', e) + break + + return WrappedSocket(cnx, sock) diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 47937f7..98ef9ab 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -1,45 +1,121 @@ # urllib3/exceptions.py -# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -## Exceptions + +## Base Exceptions class HTTPError(Exception): "Base exception used by this module." pass -class SSLError(Exception): +class PoolError(HTTPError): + "Base exception for errors caused within a pool." + def __init__(self, pool, message): + self.pool = pool + HTTPError.__init__(self, "%s: %s" % (pool, message)) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, None) + + +class RequestError(PoolError): + "Base exception for PoolErrors that have associated URLs." + def __init__(self, pool, url, message): + self.url = url + PoolError.__init__(self, pool, message) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, self.url, None) + + +class SSLError(HTTPError): "Raised when SSL certificate fails in an HTTPS connection." pass -class MaxRetryError(HTTPError): - "Raised when the maximum number of retries is exceeded." - def __init__(self, url): - HTTPError.__init__(self, "Max retries exceeded for url: %s" % url) - self.url = url +class ProxyError(HTTPError): + "Raised when the connection to a proxy fails." + pass -class TimeoutError(HTTPError): - "Raised when a socket timeout occurs." +class DecodeError(HTTPError): + "Raised when automatic decoding based on Content-Type fails." pass -class HostChangedError(HTTPError): +## Leaf Exceptions + +class MaxRetryError(RequestError): + "Raised when the maximum number of retries is exceeded." + + def __init__(self, pool, url, reason=None): + self.reason = reason + + message = "Max retries exceeded with url: %s" % url + if reason: + message += " (Caused by %s: %s)" % (type(reason), reason) + else: + message += " (Caused by redirect)" + + RequestError.__init__(self, pool, url, message) + + +class HostChangedError(RequestError): "Raised when an existing pool gets a request for a foreign host." - def __init__(self, original_host, new_url, retries=3): - HTTPError.__init__(self, - "Connection pool with host '%s' tried to open a foreign host: %s" % - (original_host, new_url)) - self.original_host = original_host - self.new_url = new_url + def __init__(self, pool, url, retries=3): + message = "Tried to open a foreign host with url: %s" % url + RequestError.__init__(self, pool, url, message) self.retries = retries -class EmptyPoolError(HTTPError): +class TimeoutStateError(HTTPError): + """ Raised when passing an invalid state to a timeout """ + pass + + +class TimeoutError(HTTPError): + """ Raised when a socket timeout error occurs. + + Catching this error will catch both :exc:`ReadTimeoutErrors + <ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`. + """ + pass + + +class ReadTimeoutError(TimeoutError, RequestError): + "Raised when a socket timeout occurs while receiving data from a server" + pass + + +# This timeout error does not have a URL attached and needs to inherit from the +# base HTTPError +class ConnectTimeoutError(TimeoutError): + "Raised when a socket timeout occurs while connecting to a server" + pass + + +class EmptyPoolError(PoolError): "Raised when a pool runs out of connections and no more are allowed." pass + + +class ClosedPoolError(PoolError): + "Raised when a request enters a pool after the pool has been closed." + pass + + +class LocationParseError(ValueError, HTTPError): + "Raised when get_host or similar fails to parse the URL input." + + def __init__(self, location): + message = "Failed to parse: %s" % location + HTTPError.__init__(self, message) + + self.location = location diff --git a/requests/packages/urllib3/fields.py b/requests/packages/urllib3/fields.py new file mode 100644 index 0000000..ed01765 --- /dev/null +++ b/requests/packages/urllib3/fields.py @@ -0,0 +1,177 @@ +# urllib3/fields.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import email.utils +import mimetypes + +from .packages import six + + +def guess_content_type(filename, default='application/octet-stream'): + """ + Guess the "Content-Type" of a file. + + :param filename: + The filename to guess the "Content-Type" of using :mod:`mimetimes`. + :param default: + If no "Content-Type" can be guessed, default to `default`. + """ + if filename: + return mimetypes.guess_type(filename)[0] or default + return default + + +def format_header_param(name, value): + """ + Helper function to format and quote a single header parameter. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows RFC 2231, as + suggested by RFC 2388 Section 4.4. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + if not any(ch in value for ch in '"\\\r\n'): + result = '%s="%s"' % (name, value) + try: + result.encode('ascii') + except UnicodeEncodeError: + pass + else: + return result + if not six.PY3: # Python 2: + value = value.encode('utf-8') + value = email.utils.encode_rfc2231(value, 'utf-8') + value = '%s*=%s' % (name, value) + return value + + +class RequestField(object): + """ + A data container for request body parameters. + + :param name: + The name of this request field. + :param data: + The data/value body. + :param filename: + An optional filename of the request field. + :param headers: + An optional dict-like object of headers to initially use for the field. + """ + def __init__(self, name, data, filename=None, headers=None): + self._name = name + self._filename = filename + self.data = data + self.headers = {} + if headers: + self.headers = dict(headers) + + @classmethod + def from_tuples(cls, fieldname, value): + """ + A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. + + Supports constructing :class:`~urllib3.fields.RequestField` from parameter + of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) + tuple where the MIME type is optional. For example: :: + + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + + Field names and filenames must be unicode. + """ + if isinstance(value, tuple): + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = guess_content_type(filename) + else: + filename = None + content_type = None + data = value + + request_param = cls(fieldname, data, filename=filename) + request_param.make_multipart(content_type=content_type) + + return request_param + + def _render_part(self, name, value): + """ + Overridable helper function to format a single header parameter. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + return format_header_param(name, value) + + def _render_parts(self, header_parts): + """ + Helper function to format and quote a single header. + + Useful for single headers that are composed of multiple items. E.g., + 'Content-Disposition' fields. + + :param header_parts: + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as + `k1="v1"; k2="v2"; ...`. + """ + parts = [] + iterable = header_parts + if isinstance(header_parts, dict): + iterable = header_parts.items() + + for name, value in iterable: + if value: + parts.append(self._render_part(name, value)) + + return '; '.join(parts) + + def render_headers(self): + """ + Renders the headers for this request field. + """ + lines = [] + + sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] + for sort_key in sort_keys: + if self.headers.get(sort_key, False): + lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + + for header_name, header_value in self.headers.items(): + if header_name not in sort_keys: + if header_value: + lines.append('%s: %s' % (header_name, header_value)) + + lines.append('\r\n') + return '\r\n'.join(lines) + + def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + """ + Makes this request field into a multipart request field. + + This method overrides "Content-Disposition", "Content-Type" and + "Content-Location" headers to the request parameter. + + :param content_type: + The 'Content-Type' of the request body. + :param content_location: + The 'Content-Location' of the request body. + + """ + self.headers['Content-Disposition'] = content_disposition or 'form-data' + self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Type'] = content_type + self.headers['Content-Location'] = content_location diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py index 2ffea8b..4575582 100644 --- a/requests/packages/urllib3/filepost.py +++ b/requests/packages/urllib3/filepost.py @@ -1,71 +1,101 @@ # urllib3/filepost.py -# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php import codecs -import mimetools import mimetypes -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO # pylint: disable-msg=W0404 +from uuid import uuid4 +from io import BytesIO +from .packages import six +from .packages.six import b +from .fields import RequestField writer = codecs.lookup('utf-8')[3] -def get_content_type(filename): - return mimetypes.guess_type(filename)[0] or 'application/octet-stream' +def choose_boundary(): + """ + Our embarassingly-simple replacement for mimetools.choose_boundary. + """ + return uuid4().hex + + +def iter_field_objects(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts, and lists of + :class:`~urllib3.fields.RequestField`. + + """ + if isinstance(fields, dict): + i = six.iteritems(fields) + else: + i = iter(fields) + + for field in i: + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) + + +def iter_fields(fields): + """ + Iterate over fields. + + .. deprecated :: + + The addition of `~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + `~urllib3.fields.RequestField` objects, instead. + + Supports list of (k, v) tuples and dicts. + + """ + if isinstance(fields, dict): + return ((k, v) for k, v in six.iteritems(fields)) + + return ((k, v) for k, v in fields) def encode_multipart_formdata(fields, boundary=None): """ - Encode a dictionary of ``fields`` using the multipart/form-data mime format. + Encode a dictionary of ``fields`` using the multipart/form-data MIME format. :param fields: - Dictionary of fields. The key is treated as the field name, and the - value as the body of the form-data. If the value is a tuple of two - elements, then the first element is treated as the filename of the - form-data section. + Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). :param boundary: If not specified, then a random boundary will be generated using :func:`mimetools.choose_boundary`. """ - body = StringIO() + body = BytesIO() if boundary is None: - boundary = mimetools.choose_boundary() + boundary = choose_boundary() - for fieldname, value in fields.iteritems(): - body.write('--%s\r\n' % (boundary)) + for field in iter_field_objects(fields): + body.write(b('--%s\r\n' % (boundary))) - if isinstance(value, tuple): - filename, data = value - writer(body).write('Content-Disposition: form-data; name="%s"; ' - 'filename="%s"\r\n' % (fieldname, filename)) - body.write('Content-Type: %s\r\n\r\n' % - (get_content_type(filename))) - else: - data = value - writer(body).write('Content-Disposition: form-data; name="%s"\r\n' - % (fieldname)) - body.write('Content-Type: text/plain\r\n\r\n') + writer(body).write(field.render_headers()) + data = field.data if isinstance(data, int): data = str(data) # Backwards compatibility - if isinstance(data, unicode): + if isinstance(data, six.text_type): writer(body).write(data) else: body.write(data) - body.write('\r\n') + body.write(b'\r\n') - body.write('--%s--\r\n' % (boundary)) + body.write(b('--%s--\r\n' % (boundary))) - content_type = 'multipart/form-data; boundary=%s' % boundary + content_type = str('multipart/form-data; boundary=%s' % boundary) return body.getvalue(), content_type diff --git a/requests/packages/urllib3/packages/ordered_dict.py b/requests/packages/urllib3/packages/ordered_dict.py new file mode 100644 index 0000000..7f8ee15 --- /dev/null +++ b/requests/packages/urllib3/packages/ordered_dict.py @@ -0,0 +1,260 @@ +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. +# Copyright 2009 Raymond Hettinger, released under the MIT License. +# http://code.activestate.com/recipes/576693/ + +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) diff --git a/requests/packages/urllib3/packages/six.py b/requests/packages/urllib3/packages/six.py new file mode 100644 index 0000000..27d8011 --- /dev/null +++ b/requests/packages/urllib3/packages/six.py @@ -0,0 +1,385 @@ +"""Utilities for writing code that runs on Python 2 and 3""" + +#Copyright (c) 2010-2011 Benjamin Peterson + +#Permission is hereby granted, free of charge, to any person obtaining a copy of +#this software and associated documentation files (the "Software"), to deal in +#the Software without restriction, including without limitation the rights to +#use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +#the Software, and to permit persons to whom the Software is furnished to do so, +#subject to the following conditions: + +#The above copyright notice and this permission notice shall be included in all +#copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +#FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +#COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +#IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +#CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import operator +import sys +import types + +__author__ = "Benjamin Peterson <benjamin@python.org>" +__version__ = "1.2.0" # Revision 41c74fef2ded + + +# True if we are running on Python 3. +PY3 = sys.version_info[0] == 3 + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + if sys.platform.startswith("java"): + # Jython always uses 32 bits. + MAXSIZE = int((1 << 31) - 1) + else: + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) + # This is a bit ugly, but it avoids running this again. + delattr(tp, self.name) + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + + +class _MovedItems(types.ModuleType): + """Lazy loading of moved objects""" + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("reload_module", "__builtin__", "imp", "reload"), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("copyreg", "copy_reg"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), + MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule("tkinter_colorchooser", "tkColorChooser", + "tkinter.colorchooser"), + MovedModule("tkinter_commondialog", "tkCommonDialog", + "tkinter.commondialog"), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", + "tkinter.simpledialog"), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("winreg", "_winreg"), +] +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) +del attr + +moves = sys.modules[__name__ + ".moves"] = _MovedItems("moves") + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + + _func_code = "__code__" + _func_defaults = "__defaults__" + + _iterkeys = "keys" + _itervalues = "values" + _iteritems = "items" +else: + _meth_func = "im_func" + _meth_self = "im_self" + + _func_code = "func_code" + _func_defaults = "func_defaults" + + _iterkeys = "iterkeys" + _itervalues = "itervalues" + _iteritems = "iteritems" + + +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + +if PY3: + def get_unbound_function(unbound): + return unbound + + Iterator = object + + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) +else: + def get_unbound_function(unbound): + return unbound.im_func + + class Iterator(object): + + def next(self): + return type(self).__next__(self) + + callable = callable +_add_doc(get_unbound_function, + """Get the function out of a possibly unbound function""") + + +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) + + +def iterkeys(d): + """Return an iterator over the keys of a dictionary.""" + return iter(getattr(d, _iterkeys)()) + +def itervalues(d): + """Return an iterator over the values of a dictionary.""" + return iter(getattr(d, _itervalues)()) + +def iteritems(d): + """Return an iterator over the (key, value) pairs of a dictionary.""" + return iter(getattr(d, _iteritems)()) + + +if PY3: + def b(s): + return s.encode("latin-1") + def u(s): + return s + if sys.version_info[1] <= 1: + def int2byte(i): + return bytes((i,)) + else: + # This is about 2x faster than the implementation above on 3.2+ + int2byte = operator.methodcaller("to_bytes", 1, "big") + import io + StringIO = io.StringIO + BytesIO = io.BytesIO +else: + def b(s): + return s + def u(s): + return unicode(s, "unicode_escape") + int2byte = chr + import StringIO + StringIO = BytesIO = StringIO.StringIO +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +if PY3: + import builtins + exec_ = getattr(builtins, "exec") + + + def reraise(tp, value, tb=None): + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + + + print_ = getattr(builtins, "print") + del builtins + +else: + def exec_(code, globs=None, locs=None): + """Execute code in a namespace.""" + if globs is None: + frame = sys._getframe(1) + globs = frame.f_globals + if locs is None: + locs = frame.f_locals + del frame + elif locs is None: + locs = globs + exec("""exec code in globs, locs""") + + + exec_("""def reraise(tp, value, tb=None): + raise tp, value, tb +""") + + + def print_(*args, **kwargs): + """The new-style print function.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + def write(data): + if not isinstance(data, basestring): + data = str(data) + fp.write(data) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) + +_add_doc(reraise, """Reraise an exception.""") + + +def with_metaclass(meta, base=object): + """Create a base class with a metaclass.""" + return meta("NewBase", (base,), {}) diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py index 9560b04..2d61ac2 100644 --- a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -7,23 +7,60 @@ __version__ = '3.2.2' class CertificateError(ValueError): pass -def _dnsname_to_pat(dn): +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ pats = [] - for frag in dn.split(r'.'): - if frag == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - else: - # Otherwise, '*' matches any dotless fragment. - frag = re.escape(frag) - pats.append(frag.replace(r'\*', '[^.]*')) - return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + if not dn: + return False + + parts = dn.split(r'.') + leftmost = parts[0] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survery of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in parts[1:]: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + def match_hostname(cert, hostname): """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules - are mostly followed, but IP addresses are not accepted for *hostname*. + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. CertificateError is raised on failure. On success, the function returns nothing. @@ -34,7 +71,7 @@ def match_hostname(cert, hostname): san = cert.get('subjectAltName', ()) for key, value in san: if key == 'DNS': - if _dnsname_to_pat(value).match(hostname): + if _dnsname_match(value, hostname): return dnsnames.append(value) if not dnsnames: @@ -45,7 +82,7 @@ def match_hostname(cert, hostname): # XXX according to RFC 2818, the most specific Common Name # must be used. if key == 'commonName': - if _dnsname_to_pat(value).match(hostname): + if _dnsname_match(value, hostname): return dnsnames.append(value) if len(dnsnames) > 1: diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index 482ee4a..e7f8667 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -1,16 +1,21 @@ # urllib3/poolmanager.py -# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php import logging +try: # Python 3 + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin + from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import get_host, connection_from_url -from .exceptions import HostChangedError +from .connectionpool import port_by_scheme from .request import RequestMethods +from .util import parse_url __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] @@ -21,13 +26,11 @@ pool_classes_by_scheme = { 'https': HTTPSConnectionPool, } -port_by_scheme = { - 'http': 80, - 'https': 443, -} - log = logging.getLogger(__name__) +SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', + 'ssl_version') + class PoolManager(RequestMethods): """ @@ -35,8 +38,12 @@ class PoolManager(RequestMethods): necessary connection pools for you. :param num_pools: - Number of connection pools to cache before discarding the least recently - used pool. + Number of connection pools to cache before discarding the least + recently used pool. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. :param \**connection_pool_kw: Additional parameters are used to create fresh @@ -44,42 +51,73 @@ class PoolManager(RequestMethods): Example: :: - >>> manager = PoolManager() - >>> r = manager.urlopen("http://google.com/") - >>> r = manager.urlopen("http://google.com/mail") - >>> r = manager.urlopen("http://yahoo.com/") - >>> len(r.pools) + >>> manager = PoolManager(num_pools=2) + >>> r = manager.request('GET', 'http://google.com/') + >>> r = manager.request('GET', 'http://google.com/mail') + >>> r = manager.request('GET', 'http://yahoo.com/') + >>> len(manager.pools) 2 """ - # TODO: Make sure there are no memory leaks here. + proxy = None - def __init__(self, num_pools=10, **connection_pool_kw): + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): + RequestMethods.__init__(self, headers) self.connection_pool_kw = connection_pool_kw - self.pools = RecentlyUsedContainer(num_pools) + self.pools = RecentlyUsedContainer(num_pools, + dispose_func=lambda p: p.close()) + + def _new_pool(self, scheme, host, port): + """ + Create a new :class:`ConnectionPool` based on host, port and scheme. + + This method is used to actually create the connection pools handed out + by :meth:`connection_from_url` and companion methods. It is intended + to be overridden for customization. + """ + pool_cls = pool_classes_by_scheme[scheme] + kwargs = self.connection_pool_kw + if scheme == 'http': + kwargs = self.connection_pool_kw.copy() + for kw in SSL_KEYWORDS: + kwargs.pop(kw, None) + + return pool_cls(host, port, **kwargs) - def connection_from_host(self, host, port=80, scheme='http'): + def clear(self): + """ + Empty our store of pools and direct them all to close. + + This will not affect in-flight connections, but they will not be + re-used after completion. + """ + self.pools.clear() + + def connection_from_host(self, host, port=None, scheme='http'): """ Get a :class:`ConnectionPool` based on the host, port, and scheme. - Note that an appropriate ``port`` value is required here to normalize - connection pools in our container most effectively. + If ``port`` isn't given, it will be derived from the ``scheme`` using + ``urllib3.connectionpool.port_by_scheme``. """ - pool_key = (scheme, host, port) - # If the scheme, host, or port doesn't match existing open connections, - # open a new ConnectionPool. - pool = self.pools.get(pool_key) - if pool: - return pool + scheme = scheme or 'http' - # Make a fresh ConnectionPool of the desired type - pool_cls = pool_classes_by_scheme[scheme] - pool = pool_cls(host, port, **self.connection_pool_kw) + port = port or port_by_scheme.get(scheme, 80) + + pool_key = (scheme, host, port) - self.pools[pool_key] = pool + with self.pools.lock: + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + # Make a fresh ConnectionPool of the desired type + pool = self._new_pool(scheme, host, port) + self.pools[pool_key] = pool return pool def connection_from_url(self, url): @@ -91,43 +129,131 @@ class PoolManager(RequestMethods): Additional parameters are taken from the :class:`.PoolManager` constructor. """ - scheme, host, port = get_host(url) - - port = port or port_by_scheme.get(scheme, 80) + u = parse_url(url) + return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - return self.connection_from_host(host, port=port, scheme=scheme) - - def urlopen(self, method, url, **kw): + def urlopen(self, method, url, redirect=True, **kw): """ - Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`. + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` + with custom cross-host redirect logic and only sends the request-uri + portion of the ``url``. - ``url`` must be absolute, such that an appropriate + The given ``url`` parameter must be absolute, such that an appropriate :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. """ - conn = self.connection_from_url(url) - try: - return conn.urlopen(method, url, **kw) + u = parse_url(url) + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) + + kw['assert_same_host'] = False + kw['redirect'] = False + if 'headers' not in kw: + kw['headers'] = self.headers + + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, **kw) + else: + response = conn.urlopen(method, u.request_uri, **kw) + + redirect_location = redirect and response.get_redirect_location() + if not redirect_location: + return response + + # Support relative URLs for redirecting. + redirect_location = urljoin(url, redirect_location) - except HostChangedError, e: - kw['retries'] = e.retries # Persist retries countdown - return self.urlopen(method, e.new_url, **kw) + # RFC 2616, Section 10.3.4 + if response.status == 303: + method = 'GET' + log.info("Redirecting %s -> %s" % (url, redirect_location)) + kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + kw['redirect'] = redirect + return self.urlopen(method, redirect_location, **kw) -class ProxyManager(RequestMethods): + +class ProxyManager(PoolManager): """ - Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method - will make requests to any url through the defined proxy. + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param poxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary contaning headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + """ - def __init__(self, proxy_pool): - self.proxy_pool = proxy_pool + def __init__(self, proxy_url, num_pools=10, headers=None, + proxy_headers=None, **connection_pool_kw): - def urlopen(self, method, url, **kw): + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, + proxy_url.port) + proxy = parse_url(proxy_url) + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + assert self.proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy + connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw) + + def connection_from_host(self, host, port=None, scheme='http'): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, self.proxy.port, self.proxy.scheme) + + def _set_proxy_headers(self, url, headers=None): + """ + Sets headers needed by proxies: specifically, the Accept and Host + headers. Only sets headers not provided by the user. + """ + headers_ = {'Accept': '*/*'} + + netloc = parse_url(url).netloc + if netloc: + headers_['Host'] = netloc + + if headers: + headers_.update(headers) + return headers_ + + def urlopen(self, method, url, redirect=True, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." - kw['assert_same_host'] = False - return self.proxy_pool.urlopen(method, url, **kw) + u = parse_url(url) + + if u.scheme == "http": + # It's too late to set proxy headers on per-request basis for + # tunnelled HTTPS connections, should use + # constructor's proxy_headers instead. + kw['headers'] = self._set_proxy_headers(url, kw.get('headers', + self.headers)) + kw['headers'].update(self.proxy_headers) + + return super(ProxyManager, self).urlopen(method, url, redirect, **kw) -def proxy_from_url(url, **pool_kw): - proxy_pool = connection_from_url(url, **pool_kw) - return ProxyManager(proxy_pool) +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/requests/packages/urllib3/request.py b/requests/packages/urllib3/request.py index a7e0b5d..66a9a0e 100644 --- a/requests/packages/urllib3/request.py +++ b/requests/packages/urllib3/request.py @@ -1,11 +1,13 @@ # urllib3/request.py -# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php - -from urllib import urlencode +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode from .filepost import encode_multipart_formdata @@ -28,21 +30,29 @@ class RequestMethods(object): in the URL (such as GET, HEAD, DELETE). :meth:`.request_encode_body` is for sending requests whose fields are - encoded in the *body* of the request using multipart or www-orm-urlencoded + encoded in the *body* of the request using multipart or www-form-urlencoded (such as for POST, PUT, PATCH). :meth:`.request` is for making any kind of request, it will look up the appropriate encoding format and use one of the above two methods to make the request. + + Initializer parameters: + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. """ _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) - _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) + def __init__(self, headers=None): + self.headers = headers or {} + def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, - **kw): + **kw): # Abstract raise NotImplemented("Classes extending RequestMethods must implement " "their own ``urlopen`` method.") @@ -95,13 +105,16 @@ class RequestMethods(object): such as with OAuth. Supports an optional ``fields`` parameter of key/value strings AND - key/filetuple. A filetuple is a (filename, data) tuple. For example: :: + key/filetuple. A filetuple is a (filename, data, MIME type) tuple where + the MIME type is optional. For example: :: fields = { 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), 'realfile': ('barfile.txt', open('realfile').read()), - 'nonamefile': ('contents of nonamefile field'), + 'typedfile': ('bazfile.bin', open('bazfile').read(), + 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', } When uploading a file, providing a filename (the first parameter of the @@ -119,27 +132,11 @@ class RequestMethods(object): body, content_type = (urlencode(fields or {}), 'application/x-www-form-urlencoded') - headers = headers or {} - headers.update({'Content-Type': content_type}) - - return self.urlopen(method, url, body=body, headers=headers, - **urlopen_kw) + if headers is None: + headers = self.headers - # Deprecated: + headers_ = {'Content-Type': content_type} + headers_.update(headers) - def get_url(self, url, fields=None, **urlopen_kw): - """ - .. deprecated:: 1.0 - Use :meth:`request` instead. - """ - return self.request_encode_url('GET', url, fields=fields, - **urlopen_kw) - - def post_url(self, url, fields=None, headers=None, **urlopen_kw): - """ - .. deprecated:: 1.0 - Use :meth:`request` instead. - """ - return self.request_encode_body('POST', url, fields=fields, - headers=headers, - **urlopen_kw) + return self.urlopen(method, url, body=body, headers=headers_, + **urlopen_kw) diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index ee2ff66..4efff5a 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -1,39 +1,56 @@ # urllib3/response.py -# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import gzip + import logging import zlib +import io +from .exceptions import DecodeError +from .packages.six import string_types as basestring, binary_type +from .util import is_fp_closed -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO # pylint: disable-msg=W0404 +log = logging.getLogger(__name__) -from .exceptions import HTTPError +class DeflateDecoder(object): -log = logging.getLogger(__name__) + def __init__(self): + self._first_try = True + self._data = binary_type() + self._obj = zlib.decompressobj() + def __getattr__(self, name): + return getattr(self._obj, name) -def decode_gzip(data): - gzipper = gzip.GzipFile(fileobj=StringIO(data)) - return gzipper.read() + def decompress(self, data): + if not self._first_try: + return self._obj.decompress(data) + self._data += data + try: + return self._obj.decompress(data) + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + finally: + self._data = None -def decode_deflate(data): - try: - return zlib.decompress(data) - except zlib.error: - return zlib.decompress(data, -zlib.MAX_WBITS) +def _get_decoder(mode): + if mode == 'gzip': + return zlib.decompressobj(16 + zlib.MAX_WBITS) -class HTTPResponse(object): + return DeflateDecoder() + + +class HTTPResponse(io.IOBase): """ HTTP Response container. @@ -56,10 +73,8 @@ class HTTPResponse(object): otherwise unused. """ - CONTENT_DECODERS = { - 'gzip': decode_gzip, - 'deflate': decode_deflate, - } + CONTENT_DECODERS = ['gzip', 'deflate'] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, @@ -69,9 +84,10 @@ class HTTPResponse(object): self.version = version self.reason = reason self.strict = strict + self.decode_content = decode_content - self._decode_content = decode_content - self._body = None + self._decoder = None + self._body = body if body and isinstance(body, basestring) else None self._fp = None self._original_response = original_response @@ -81,7 +97,7 @@ class HTTPResponse(object): if hasattr(body, 'read'): self._fp = body - if preload_content: + if preload_content and not self._body: self._body = self.read(decode_content=decode_content) def get_redirect_location(self): @@ -92,7 +108,7 @@ class HTTPResponse(object): code and valid location. ``None`` if redirect status and no location. ``False`` if not a redirect status code. """ - if self.status in [301, 302, 303, 307]: + if self.status in self.REDIRECT_STATUSES: return self.headers.get('location') return False @@ -119,13 +135,13 @@ class HTTPResponse(object): parameters: ``decode_content`` and ``cache_content``. :param amt: - How much of the content to read. If specified, decoding and caching - is skipped because we can't decode partial content nor does it make - sense to cache partial content as the full response. + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. :param decode_content: If True, will attempt to decode the body based on the - 'content-encoding' header. (Overridden if ``amt`` is set.) + 'content-encoding' header. :param cache_content: If True, will save the returned data such that the same result is @@ -134,29 +150,51 @@ class HTTPResponse(object): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - content_encoding = self.headers.get('content-encoding') - decoder = self.CONTENT_DECODERS.get(content_encoding) + # Note: content-encoding value should be case-insensitive, per RFC 2616 + # Section 3.5 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) if decode_content is None: - decode_content = self._decode_content + decode_content = self.decode_content - data = self._fp and self._fp.read(amt) - - try: - - if amt: - return data + if self._fp is None: + return - if not decode_content or not decoder: - if cache_content: - self._body = data + flush_decoder = False - return data + try: + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() + flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do not + # properly close the connection in all cases. There is no harm + # in redundantly calling close. + self._fp.close() + flush_decoder = True try: - data = decoder(data) - except IOError: - raise HTTPError("Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding) + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, + e) + + if flush_decoder and decode_content and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() if cache_content: self._body = data @@ -164,10 +202,32 @@ class HTTPResponse(object): return data finally: - if self._original_response and self._original_response.isclosed(): self.release_conn() + def stream(self, amt=2**16, decode_content=None): + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + + @classmethod def from_httplib(ResponseCls, r, **response_kw): """ @@ -178,12 +238,26 @@ class HTTPResponse(object): with ``original_response=r``. """ + # Normalize headers between different versions of Python + headers = {} + for k, v in r.getheaders(): + # Python 3: Header keys are returned capitalised + k = k.lower() + + has_value = headers.get(k) + if has_value: # Python 3: Repeating header keys are unmerged. + v = ', '.join([has_value, v]) + + headers[k] = v + + # HTTPResponse objects in Python 3 don't have a .strict attribute + strict = getattr(r, 'strict', 0) return ResponseCls(body=r, - headers=dict(r.getheaders()), + headers=headers, status=r.status, version=r.version, reason=r.reason, - strict=r.strict, + strict=strict, original_response=r, **response_kw) @@ -193,3 +267,35 @@ class HTTPResponse(object): def getheader(self, name, default=None): return self.headers.get(name, default) + + # Overrides from io.IOBase + def close(self): + if not self.closed: + self._fp.close() + + @property + def closed(self): + if self._fp is None: + return True + elif hasattr(self._fp, 'closed'): + return self._fp.closed + elif hasattr(self._fp, 'isclosed'): # Python 2 + return self._fp.isclosed() + else: + return True + + def fileno(self): + if self._fp is None: + raise IOError("HTTPResponse has no file to get a fileno from") + elif hasattr(self._fp, "fileno"): + return self._fp.fileno() + else: + raise IOError("The file-like object this HTTPResponse is wrapped " + "around has no file descriptor") + + def flush(self): + if self._fp is not None and hasattr(self._fp, 'flush'): + return self._fp.flush() + + def readable(self): + return True diff --git a/requests/packages/urllib3/util.py b/requests/packages/urllib3/util.py new file mode 100644 index 0000000..266c9ed --- /dev/null +++ b/requests/packages/urllib3/util.py @@ -0,0 +1,626 @@ +# urllib3/util.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + + +from base64 import b64encode +from binascii import hexlify, unhexlify +from collections import namedtuple +from hashlib import md5, sha1 +from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT +import time + +try: + from select import poll, POLLIN +except ImportError: # `poll` doesn't exist on OSX and other platforms + poll = False + try: + from select import select + except ImportError: # `select` doesn't exist on AppEngine. + select = False + +try: # Test for SSL features + SSLContext = None + HAS_SNI = False + + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import SSLContext # Modern SSL? + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + +from .packages import six +from .exceptions import LocationParseError, SSLError, TimeoutStateError + + +_Default = object() +# The default timeout to use for socket connections. This is the attribute used +# by httplib to define the default timeout + + +def current_time(): + """ + Retrieve the current time, this function is mocked out in unit testing. + """ + return time.time() + + +class Timeout(object): + """ + Utility object for storing timeout values. + + Example usage: + + .. code-block:: python + + timeout = urllib3.util.Timeout(connect=2.0, read=7.0) + pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) + pool.request(...) # Etc, etc + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. + None will set an infinite timeout. + + :type read: integer, float, or None + + :param total: + The maximum amount of time to wait for an HTTP request to connect and + return. This combines the connect and read timeouts into one. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + + :type total: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. Specifically, Python's DNS resolver does not obey the + timeout specified on the socket. Other factors that can affect total + request time include high CPU load, high swap, the program running at a + low priority level, or other behaviors. The observed running time for + urllib3 to return a response may be greater than the value passed to + `total`. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, not + the total amount of time for the request to return a complete response. + As an example, you may want a request to return within 7 seconds or + fail, so you set the ``total`` timeout to 7 seconds. If the server + sends one byte to you every 5 seconds, the request will **not** trigger + time out. This case is admittedly rare. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, connect=_Default, read=_Default, total=None): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is used + for clear error messages + :return: the value + :raises ValueError: if the type is not an integer or a float, or if it + is a numeric value less than zero + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value passed + to this function. + + :param timeout: The legacy timeout value + :type timeout: integer, float, sentinel default object, or None + :return: a Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: the elapsed time + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: the connect timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: the value to use for the read timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # in case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read + + +class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. + """ + slots = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example: :: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx+1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example: :: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/', ...) + >>> parse_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> parse_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this implementations does silly things to be optimal + # on CPython. + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + auth, url = url.split('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url.split(']', 1) + host += ']' + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + + port = int(port) + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use :func:`.parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = 'gzip,deflate' + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(six.b(basic_auth)).decode('utf-8') + + return headers + + +def is_connection_dropped(conn): # Platform-specific + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + :class:`httplib.HTTPConnection` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if not sock: # Platform-specific: AppEngine + return False + + if not poll: + if not select: # Platform-specific: AppEngine + return False + + try: + return select([sock], [], [], 0.0)[0] + except SocketError: + return True + + # This version is better on platforms that support it. + p = poll() + p.register(sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + # Maps the length of a digest to a possible hash function producing + # this digest. + hashfunc_map = { + 16: md5, + 20: sha1 + } + + fingerprint = fingerprint.replace(':', '').lower() + + digest_length, rest = divmod(len(fingerprint), 2) + + if rest or digest_length not in hashfunc_map: + raise SSLError('Fingerprint is of invalid length.') + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + hashfunc = hashfunc_map[digest_length] + + cert_digest = hashfunc(cert).digest() + + if not cert_digest == fingerprint_bytes: + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(hexlify(fingerprint_bytes), + hexlify(cert_digest))) + +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + if hasattr(obj, 'fp'): + # Object is a container for another file-like object that gets released + # on exhaustion (e.g. HTTPResponse) + return obj.fp is None + + return obj.closed + + +if SSLContext is not None: # Python 3.2+ + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + """ + All arguments except `server_hostname` have the same meaning as for + :func:`ssl.wrap_socket` + + :param server_hostname: + Hostname of the expected certificate + """ + context = SSLContext(ssl_version) + context.verify_mode = cert_reqs + if ca_certs: + try: + context.load_verify_locations(ca_certs) + # Py32 raises IOError + # Py33 raises FileNotFoundError + except Exception as e: # Reraise as SSLError + raise SSLError(e) + if certfile: + # FIXME: This block needs a test. + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) + +else: # Python 3.1 and earlier + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + return wrap_socket(sock, keyfile=keyfile, certfile=certfile, + ca_certs=ca_certs, cert_reqs=cert_reqs, + ssl_version=ssl_version) |
