Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ae507ed

Browse files
committed
version 1.8.6b1, support use stop_spider method to stop spider
1 parent 7ac96ce commit ae507ed

File tree

6 files changed

+19
-7
lines changed

6 files changed

+19
-7
lines changed

feapder/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.8.5
1+
1.8.6-beta1

feapder/core/scheduler.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ def __init__(
155155
# 重置丢失的任务
156156
self.reset_task()
157157

158+
self._stop = False
159+
158160
def init_metrics(self):
159161
"""
160162
初始化打点系统
@@ -176,7 +178,7 @@ def run(self):
176178

177179
while True:
178180
try:
179-
if self.all_thread_is_done():
181+
if self._stop or self.all_thread_is_done():
180182
if not self._is_notify_end:
181183
self.spider_end() # 跑完一轮
182184
self._is_notify_end = True
@@ -586,3 +588,6 @@ def reset_task(self, heartbeat_interval=10):
586588
lose_count = len(datas)
587589
if lose_count:
588590
log.info("重置丢失任务完毕,共{}条".format(len(datas)))
591+
592+
def stop_spider(self):
593+
self._stop = True

feapder/core/spiders/air_spider.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def __init__(self, thread_count=None):
4646
db=self._memory_db, dedup_name=self.name
4747
)
4848

49+
self._stop = False
4950
metrics.init(**setting.METRICS_OTHER_ARGS)
5051

5152
def distribute_task(self):
@@ -97,7 +98,7 @@ def run(self):
9798

9899
while True:
99100
try:
100-
if self.all_thread_is_done():
101+
if self._stop or self.all_thread_is_done():
101102
# 停止 parser_controls
102103
for parser_control in self._parser_controls:
103104
parser_control.stop()
@@ -108,7 +109,10 @@ def run(self):
108109
# 关闭webdirver
109110
Request.render_downloader and Request.render_downloader.close_all()
110111

111-
log.info("无任务,爬虫结束")
112+
if self._stop:
113+
log.info("爬虫被停止")
114+
else:
115+
log.info("无任务,爬虫结束")
112116
break
113117

114118
except Exception as e:
@@ -130,3 +134,6 @@ def join(self, timeout=None):
130134
return
131135

132136
super().join()
137+
138+
def stop_spider(self):
139+
self._stop = True

feapder/core/spiders/batch_spider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1002,7 +1002,7 @@ def run(self):
10021002

10031003
while True:
10041004
try:
1005-
if (
1005+
if self._stop or (
10061006
self.task_is_done() and self.all_thread_is_done()
10071007
): # redis全部的任务已经做完 并且mysql中的任务已经做完(检查各个线程all_thread_is_done,防止任务没做完,就更新任务状态,导致程序结束的情况)
10081008
if not self._is_notify_end:

feapder/core/spiders/spider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def run(self):
184184

185185
while True:
186186
try:
187-
if self.all_thread_is_done():
187+
if self._stop or self.all_thread_is_done():
188188
if not self._is_notify_end:
189189
self.spider_end() # 跑完一轮
190190
self._is_notify_end = True

feapder/core/spiders/task_spider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ def run(self):
516516

517517
while True:
518518
try:
519-
if (
519+
if self._stop or (
520520
self.all_thread_is_done()
521521
and self.task_is_done()
522522
and self.related_spider_is_done()

0 commit comments

Comments
 (0)