Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 29fd64b

Browse files
committed
自动抓取文章,url去除 多余参数
1 parent c6eab42 commit 29fd64b

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

src/logic/article.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ func (self ArticleLogic) ParseArticle(ctx context.Context, articleUrl string, au
4242
articleUrl = "http://" + articleUrl
4343
}
4444

45+
articleUrl = self.cleanUrl(articleUrl, auto)
46+
4547
tmpArticle := &model.Article{}
4648
_, err := MasterDB.Where("url=?", articleUrl).Get(tmpArticle)
4749
if err != nil || tmpArticle.Id != 0 {
@@ -210,6 +212,22 @@ func (self ArticleLogic) ParseArticle(ctx context.Context, articleUrl string, au
210212
return article, nil
211213
}
212214

215+
func (ArticleLogic) cleanUrl(articleUrl string, auto bool) string {
216+
pos := strings.LastIndex(articleUrl, "#")
217+
if pos > 0 {
218+
articleUrl = articleUrl[:pos]
219+
}
220+
// 过滤多余的参数,避免加一个参数就是一个新文章,但实际上是同一篇
221+
if auto {
222+
pos = strings.Index(articleUrl, "?")
223+
if pos > 0 {
224+
articleUrl = articleUrl[:pos]
225+
}
226+
}
227+
228+
return articleUrl
229+
}
230+
213231
func (ArticleLogic) convertByExt(extMap map[string]string, article *model.Article) error {
214232
var err error
215233
if css, ok := extMap["css"]; ok {

0 commit comments

Comments
 (0)