Index: Lib/httplib.py
===================================================================
--- Lib/httplib.py (revision 88454)
+++ Lib/httplib.py (working copy)
@@ -695,7 +695,8 @@
self.strict = strict
def set_tunnel(self, host, port=None, headers=None):
- """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
+ """ Sets up the host and the port that the HTTP CONNECT will tunnel to.
+ self.host, self.port are the host and port of the proxy server.
The headers argument should be a mapping of extra HTTP headers
to send with the CONNECT request.
@@ -729,25 +730,18 @@
def _tunnel(self):
self._set_hostport(self._tunnel_host, self._tunnel_port)
- self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
+ connect = ["CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)]
for header, value in self._tunnel_headers.iteritems():
- self.send("%s: %s\r\n" % (header, value))
- self.send("\r\n")
+ connect.append("%s: %s\r\n" % (header, value))
+ connect.append("\r\n")
+ self.send("".join(connect))
response = self.response_class(self.sock, strict = self.strict,
method = self._method)
- (version, code, message) = response._read_status()
+ response.begin()
- if code != 200:
- self.close()
- raise socket.error("Tunnel connection failed: %d %s" % (code,
- message.strip()))
- while True:
- line = response.fp.readline(_MAXLINE + 1)
- if len(line) > _MAXLINE:
- raise LineTooLong("header line")
- if line == '\r\n': break
+ if response.status != 200:
+ raise ProxyTunnelError(response)
-
def connect(self):
"""Connect to the host and port specified in __init__."""
self.sock = socket.create_connection((self.host,self.port),
@@ -775,7 +769,8 @@
raise NotConnected()
if self.debuglevel > 0:
- print "send:", repr(data)
+ for line in data.split('\r\n'):
+ print "send:", repr(line)
blocksize = 8192
if hasattr(data,'read') and not isinstance(data, array):
if self.debuglevel > 0: print "sendIng a read()able"
@@ -823,7 +818,6 @@
`skip_accept_encoding' if True does not add automatically an
'Accept-Encoding:' header
"""
-
# if a prior response has been completed, then forget about it.
if self.__response and self.__response.isclosed():
self.__response = None
@@ -1077,7 +1071,6 @@
def connect(self, host=None, port=None):
"Accept arguments to set the host/port, since the superclass doesn't."
-
if host is not None:
self._conn._set_hostport(host, port)
self._conn.connect()
@@ -1149,7 +1142,6 @@
def connect(self):
"Connect to a host on a given (SSL) port."
-
sock = socket.create_connection((self.host, self.port),
self.timeout, self.source_address)
if self._tunnel_host:
@@ -1197,6 +1189,14 @@
# or define self.args. Otherwise, str() will fail.
pass
+class ProxyTunnelError(HTTPException):
+ def __init__(self, response):
+ self.response = response
+
+ def __str__(self):
+ return "ProxyTunnelError(HTTPResponse(code=%d, reason=%s))" % (
+ self.response.status, self.response.reason)
+
class NotConnected(HTTPException):
pass
Index: Lib/urllib2.py
===================================================================
--- Lib/urllib2.py (revision 88454)
+++ Lib/urllib2.py (working copy)
@@ -185,6 +185,10 @@
return host.lower()
class Request:
+ HTTP_DIRECT = 1 # HTTP connection direct to host
+ HTTPS_DIRECT = 2 # HTTPS connection direct to host
+ HTTP_PROXY = 3 # HTTP connect to proxy
+ HTTPS_PROXY = 4 # HTTP connect to proxy tunnelling HTTPS to host
def __init__(self, url, data=None, headers={},
origin_req_host=None, unverifiable=False):
@@ -195,7 +199,7 @@
# self.__r_type is what's left after doing the splittype
self.host = None
self.port = None
- self._tunnel_host = None
+ self.__tunnel_host = None
self.data = data
self.headers = {}
for key, value in headers.items():
@@ -206,18 +210,73 @@
self.origin_req_host = origin_req_host
self.unverifiable = unverifiable
- def __getattr__(self, attr):
- # XXX this is a fallback mechanism to guard against these
- # methods getting called in a non-standard order. this may be
- # too complicated and/or unnecessary.
- # XXX should the __r_XXX attributes be public?
- if attr[:12] == '_Request__r_':
- name = attr[12:]
- if hasattr(Request, 'get_' + name):
- getattr(self, 'get_' + name)()
- return getattr(self, attr)
- raise AttributeError, attr
+ self.get_type()
+ self.get_host()
+ if self.type == 'http':
+ self.connection_type = self.HTTP_DIRECT
+ else:
+ self.connection_type = self.HTTPS_DIRECT
+
+ def set_proxy(self, host, type):
+ assert type == 'http', 'HTTP Proxies are always http'
+
+ if self.connection_type == self.HTTP_DIRECT:
+ self.connection_type = self.HTTP_PROXY
+ self.__r_host = self.__original
+ self.host = host
+
+ elif self.connection_type == self.HTTPS_DIRECT:
+ self.connection_type = self.HTTPS_PROXY
+ self.__tunnel_host = self.host
+ self.host = host
+
+ else:
+ assert self.host == host, 'Attempt to change proxy settings'
+
+ def has_https_proxy( self ):
+ return self.connection_type ==self.HTTPS_PROXY
+
+ def has_http_proxy( self ):
+ return self.connection_type == self.HTTP_PROXY
+
+ def has_proxy(self):
+ return self.connection_type in (self.HTTP_PROXY, self.HTTPS_PROXY)
+
+ def get_tunnel_host(self):
+ return self.__tunnel_host
+
+ def get_origin_passwd_lookup_uri(self):
+ # always use the original url in full
+ return self.get_full_url()
+
+ def get_proxy_passwd_lookup_uri(self):
+ # the proxy address is in host now
+ return self.get_host()
+
+ def get_origin_auth_uri(self):
+ # called by get_athorization
+ # return the /X
+ if self.connection_type in (self.HTTP_PROXY, self.HTTPS_PROXY):
+ return self.__origin_selector
+ else:
+ # HTTP_DIRECT and HTTPS_DIRECT
+ return self.__r_host
+
+ def get_proxy_auth_uri( self ):
+ # called by get_athorization
+ # return the uri that is used for digest auth
+ if self.connection_type in (self.HTTP_DIRECT, self.HTTPS_DIRECT):
+ return self.__r_host
+
+ elif self.connection_type == self.HTTP_PROXY:
+ # HTTP_DIRECT and HTTPS_DIRECT
+ return self.__original
+
+ elif self.connection_type == self.HTTPS_PROXY:
+ # HTTP_DIRECT and HTTPS_DIRECT
+ return self.host
+
def get_method(self):
if self.has_data():
return "POST"
@@ -225,6 +284,8 @@
return "GET"
# XXX these helper methods are lame
+ # XXX no these methods allow for debugging
+ # problems with the Request object
def add_data(self, data):
self.data = data
@@ -248,6 +309,7 @@
def get_host(self):
if self.host is None:
self.host, self.__r_host = splithost(self.__r_type)
+ self.__origin_selector = self.__r_host
if self.host:
self.host = unquote(self.host)
return self.host
@@ -255,18 +317,9 @@
def get_selector(self):
return self.__r_host
- def set_proxy(self, host, type):
- if self.type == 'https' and not self._tunnel_host:
- self._tunnel_host = self.host
- else:
- self.type = type
- self.__r_host = self.__original
+ def get_origin_selector(self):
+ return self.__origin_selector
- self.host = host
-
- def has_proxy(self):
- return self.__r_host == self.__original
-
def get_origin_req_host(self):
return self.origin_req_host
@@ -307,6 +360,21 @@
self.process_response = {}
self.process_request = {}
+ self._debuglevel = 0
+
+ def set_debuglevel( self, debuglevel ):
+ # set the debuglevel of all of the handlers
+ # by calling there set_debuglevel or set_http_debuglevel
+ # methods.
+ self._debuglevel = debuglevel
+ print 'debug: OpenerDirector.set_debuglevel %r' % (self._debuglevel,)
+
+ for handler in self.handlers:
+ if hasattr(handler, 'set_debuglevel'):
+ handler.set_debuglevel(debuglevel)
+ elif hasattr(handler, 'set_http_debuglevel'):
+ handler.set_http_debuglevel(debuglevel)
+
def add_handler(self, handler):
if not hasattr(handler, "add_parent"):
raise TypeError("expected BaseHandler instance, got %r" %
@@ -362,14 +430,28 @@
# Handlers raise an exception if no one else should try to handle
# the request, or return None if they can't but another handler
# could. Otherwise, they return the response.
+ if self._debuglevel >= 2:
+ print 'debug: _call_chain( kind %r, meth_name %r, args %r, chain %r )' % (kind, meth_name, args, chain)
+
handlers = chain.get(kind, ())
for handler in handlers:
func = getattr(handler, meth_name)
+ if self._debuglevel >= 2:
+ print 'debug: _call_chain handler %r' % (handler,)
+ print 'debug: _call_chain func %r' % (func,)
+
result = func(*args)
+
+ if self._debuglevel >= 2:
+ print 'debug: _call_chain result %r' % (result,)
+
if result is not None:
return result
+ if self._debuglevel >= 2:
+ print 'debug: _call_chain no handler matched'
+
def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
# accept a URL or a Request object
if isinstance(fullurl, basestring):
@@ -414,6 +496,9 @@
'unknown_open', req)
def error(self, proto, *args):
+ if self._debuglevel >= 2:
+ print 'debug: OpenerDirector.error proto %r args %r' % (proto, args)
+
if proto in ('http', 'https'):
# XXX http[s] protocols are special-cased
dict = self.handle_error['http'] # https is not different than http
@@ -921,11 +1006,17 @@
self.retried = 0
self.nonce_count = 0
self.last_nonce = None
+ self._debuglevel = 0
+ def set_debuglevel(self, debuglevel):
+ self._debuglevel = debuglevel
+
def reset_retry_count(self):
self.retried = 0
def http_error_auth_reqed(self, auth_header, host, req, headers):
+ proxy_auth_reqed = auth_header=='proxy-authenticate'
+
authreq = headers.get(auth_header, None)
if self.retried > 5:
# Don't fail endlessly - if we failed once, we'll probably
@@ -933,19 +1024,24 @@
# prompting for the information. Crap. This isn't great
# but it's better than the current 'repeat until recursion
# depth exceeded' approach