Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9c2db0f

Browse files
committed
自动抓取,必须是最近3各月内的文章
1 parent c19366c commit 9c2db0f

File tree

1 file changed

+13
-2
lines changed

1 file changed

+13
-2
lines changed

websites/code/studygolang/src/service/article.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ import (
1212
"regexp"
1313
"strconv"
1414
"strings"
15+
"time"
1516

16-
"github.com/PuerkitoBio/goquery"
1717
"logger"
1818
"model"
1919
"util"
20+
21+
"github.com/PuerkitoBio/goquery"
2022
)
2123

2224
var domainPatch = map[string]string{
@@ -140,7 +142,7 @@ func ParseArticle(articleUrl string, auto bool) (*model.Article, error) {
140142
if rule.PubDate != "" {
141143
pubDate = strings.TrimSpace(doc.Find(rule.PubDate).First().Text())
142144

143-
// sochina patch
145+
// oschina patch
144146
re := regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}")
145147
submatches := re.FindStringSubmatch(pubDate)
146148
if len(submatches) > 0 {
@@ -150,6 +152,15 @@ func ParseArticle(articleUrl string, auto bool) (*model.Article, error) {
150152

151153
if pubDate == "" {
152154
pubDate = util.TimeNow()
155+
} else {
156+
// YYYYY-MM-dd HH:mm
157+
if len(pubDate) == 16 && auto {
158+
// 三个月之前不入库
159+
pubTime := time.ParseInLocation("2006-01-02 15:04", pubDate, time.Local)
160+
if pubTime.Add(3 * 30 * 86400 * time.Second).Before(time.Now()) {
161+
return nil, errors.New("article is old!")
162+
}
163+
}
153164
}
154165

155166
article := model.NewArticle()

0 commit comments

Comments
 (0)