scrapy中间件
from scrapy.settings import default_settings
'scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware': 100,
'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware': 300,
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware': 350,
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware': 400,
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 500,
'scrapy.downloadermiddlewares.retry.RetryMiddleware': 550,
'scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware': 560,
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware': 580,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 590,
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware': 600,
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': 700,
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 750,
'scrapy.downloadermiddlewares.stats.DownloaderStats': 850,
'scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware': 900,
from scrapy.downloadermiddlewares.robotstxt import RobotsTxtMiddleware
from scrapy.downloadermiddlewares.httpauth import HttpAuthMiddleware
from scrapy.downloadermiddlewares.downloadtimeout import DownloadTimeoutMiddleware
from scrapy.downloadermiddlewares.defaultheaders import DefaultHeadersMiddleware
from scrapy.downloadermiddlewares.useragent import UserAgentMiddleware
from scrapy.downloadermiddlewares.retry import RetryMiddleware
from scrapy.downloadermiddlewares.ajaxcrawl import AjaxCrawlMiddleware
from scrapy.downloadermiddlewares.redirect import MetaRefreshMiddleware
from scrapy.downloadermiddlewares.httpcompression import HttpCompressionMiddleware
from scrapy.downloadermiddlewares.redirect import RedirectMiddleware
from scrapy.downloadermiddlewares.cookies import CookiesMiddleware
from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware
from scrapy.downloadermiddlewares.stats import DownloaderStats
from scrapy.downloadermiddlewares.httpcache import HttpCacheMiddleware
参数
# settings
USER_AGENT
ROBOTSTXT_OBEY {bool}
DOWNLOAD_TIMEOUT {float}
DEFAULT_REQUEST_HEADERS
RETRY_ENABLED {bool}
RETRY_TIMES {int}
RETRY_HTTP_CODES
RETRY_PRIORITY_ADJUST
AJAXCRAWL_ENABLED {bool}
AJAXCRAWL_MAXSIZE {int} 32768
REDIRECT_MAX_METAREFRESH_DELAY {int}
METAREFRESH_MAXDELAY {int}
COMPRESSION_ENABLED {bool}
COOKIES_ENABLED {bool}
COOKIES_DEBUG {bool}
HTTPPROXY_ENABLED
HTTPPROXY_AUTH_ENCODING
DOWNLOADER_STATS {bool}
HTTPCACHE_ENABLED {bool}
HTTPCACHE_POLICY
HTTPCACHE_STORAGE
HTTPCACHE_IGNORE_MISSING
CONCURRENT_REQUESTS 同时发出的请求
DOWNLOAD_DELAY 下载延时
HTTPERROR_ALLOWED_CODES
# request
dont_filter {bool}
# request.meta
dont_obey_robotstxt {bool}
download_timeout {float}
dont_retry {bool} False
retry_times {} 0
max_retry_times
ajax_crawlable {bool}
dont_redirect {bool} False
dont_merge_cookies {bool} False
cookiejar
proxy
dont_cache {bool} False
# request.headers
User-Agent
Cookie
Proxy-Authorization
# response.headers
Set-Cookie
# spider
http_user {str}
http_pass {str}
download_timeout {float}
user_agent
handle_httpstatus_list []