Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit db26be2

Browse files
committed
下载器抽取出来
1 parent 70507b7 commit db26be2

File tree

6 files changed

+68
-35
lines changed

6 files changed

+68
-35
lines changed
Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
from feapder.network.response import Response
2-
import abc
3-
4-
5-
class Downloader:
6-
@abc.abstractmethod
7-
def download(self, method, url, **kwargs) -> Response:
8-
raise NotImplementedError
1+
from .base import Downloader
2+
from ._requests import RequestsDownloader
3+
from ._requests import RequestsSessionDownloader

feapder/network/downloader/_requests.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,35 @@
88
99
"""
1010

11+
import requests
12+
from requests.adapters import HTTPAdapter
1113

12-
class _Requests:
13-
pass
14+
from feapder.network.downloader import Downloader
15+
from feapder.network.response import Response
16+
17+
18+
class RequestsDownloader(Downloader):
19+
def download(self, method, url, **kwargs) -> Response:
20+
response = requests.request(method, url, **kwargs)
21+
response = Response(response)
22+
return response
23+
24+
25+
class RequestsSessionDownloader(Downloader):
26+
session = None
27+
28+
@property
29+
def _session(self):
30+
if not self.__class__.session:
31+
self.__class__.session = requests.Session()
32+
# pool_connections – 缓存的 urllib3 连接池个数 pool_maxsize – 连接池中保存的最大连接数
33+
http_adapter = HTTPAdapter(pool_connections=1000, pool_maxsize=1000)
34+
# 任何使用该session会话的 HTTP 请求,只要其 URL 是以给定的前缀开头,该传输适配器就会被使用到。
35+
self.__class__.session.mount("http", http_adapter)
36+
37+
return self.__class__.session
38+
39+
def download(self, method, url, **kwargs) -> Response:
40+
response = self._session.request(method, url, **kwargs)
41+
response = Response(response)
42+
return response

feapder/network/downloader/base.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import abc
2+
3+
from feapder.network.response import Response
4+
5+
6+
class Downloader:
7+
@abc.abstractmethod
8+
def download(self, method, url, **kwargs) -> Response:
9+
raise NotImplementedError

feapder/network/request.py

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,17 @@
88
99
"""
1010

11+
import importlib
12+
1113
import requests
12-
from requests.adapters import HTTPAdapter
1314
from requests.cookies import RequestsCookieJar
1415
from requests.packages.urllib3.exceptions import InsecureRequestWarning
1516

1617
import feapder.setting as setting
1718
import feapder.utils.tools as tools
1819
from feapder.db.redisdb import RedisDB
1920
from feapder.network import user_agent
21+
from feapder.network.downloader import Downloader
2022
from feapder.network.proxy_pool import ProxyPool
2123
from feapder.network.response import Response
2224
from feapder.utils.log import log
@@ -26,8 +28,13 @@
2628
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
2729

2830

31+
def import_cls(cls_info) -> Downloader:
32+
module, class_name = cls_info.rsplit(".", 1)
33+
cls = importlib.import_module(module).__getattribute__(class_name)
34+
return cls()
35+
36+
2937
class Request(object):
30-
session = None
3138
webdriver_pool: WebDriverPool = None
3239
user_agent_pool = user_agent
3340
proxies_pool: ProxyPool = None
@@ -36,8 +43,9 @@ class Request(object):
3643
cached_redis_key = None # 缓存response的文件文件夹 response_cached:cached_redis_key:md5
3744
cached_expire_time = 1200 # 缓存过期时间
3845

39-
local_filepath = None
40-
oss_handler = None
46+
# 下载器
47+
downloader = import_cls(setting.DOWNLOADER)
48+
session_downloader = import_cls(setting.SESSION_DOWNLOADER)
4149

4250
__REQUEST_ATTRS__ = {
4351
# 'method', 'url', 必须传递 不加入**kwargs中
@@ -175,20 +183,6 @@ def __setattr__(self, key, value):
175183
def __lt__(self, other):
176184
return self.priority < other.priority
177185

178-
@property
179-
def _session(self):
180-
use_session = (
181-
setting.USE_SESSION if self.use_session is None else self.use_session
182-
) # self.use_session 优先级高
183-
if use_session and not self.__class__.session:
184-
self.__class__.session = requests.Session()
185-
# pool_connections – 缓存的 urllib3 连接池个数 pool_maxsize – 连接池中保存的最大连接数
186-
http_adapter = HTTPAdapter(pool_connections=1000, pool_maxsize=1000)
187-
# 任何使用该session会话的 HTTP 请求,只要其 URL 是以给定的前缀开头,该传输适配器就会被使用到。
188-
self.__class__.session.mount("http", http_adapter)
189-
190-
return self.__class__.session
191-
192186
@property
193187
def _webdriver_pool(self):
194188
if not self.__class__.webdriver_pool:
@@ -392,11 +386,13 @@ def get_response(self, save_cached=False):
392386
raise e
393387

394388
elif use_session:
395-
response = self._session.request(method, self.url, **self.requests_kwargs)
396-
response = Response(response)
389+
response = self.session_downloader.download(
390+
method, self.url, **self.requests_kwargs
391+
)
397392
else:
398-
response = requests.request(method, self.url, **self.requests_kwargs)
399-
response = Response(response)
393+
response = self.downloader.download(
394+
method, self.url, **self.requests_kwargs
395+
)
400396

401397
if save_cached:
402398
self.save_cached(response, expire_time=self.__class__.cached_expire_time)

feapder/setting.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@
112112
# requests 使用session
113113
USE_SESSION = False
114114

115+
# 下载
116+
DOWNLOADER = "feapder.network.downloader.RequestsDownloader"
117+
SESSION_DOWNLOADER = "feapder.network.downloader.RequestsSessionDownloader"
118+
115119
# 去重
116120
ITEM_FILTER_ENABLE = False # item 去重
117121
ITEM_FILTER_SETTING = dict(

tests/air-spider/test_air_spider.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212

1313

1414
class TestAirSpider(feapder.AirSpider):
15-
# __custom_setting__ = dict(
16-
# LOG_LEVEL = "INFO"
17-
# )
15+
__custom_setting__ = dict(
16+
USE_SESSION = True
17+
)
1818

1919
def start_callback(self):
2020
print("爬虫开始")

0 commit comments

Comments
 (0)