Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 676fb4a

Browse files
committed
索引构建搭架子
1 parent c4ff125 commit 676fb4a

File tree

9 files changed

+249
-9
lines changed

9 files changed

+249
-9
lines changed

websites/code/studygolang/conf/config.json.example

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
"qiniu_secret_key": "xxxxxxxxxxxxxx",
1818
"qiniu_bucket_name": "xxxxxxxxx",
1919

20+
"indexing_url": "",
21+
2022
"crawl_host": ":7070",
2123
"crawl_spec": "0 0 */1 * * ?"
2224
}

websites/code/studygolang/install

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ gofmt -w src
1919

2020
go install server/studygolang
2121
go install server/crawlarticle
22+
go install server/indexer
2223

2324
export GOPATH="$OLDGOPATH"
2425
export PATH="$OLDPATH"

websites/code/studygolang/install.bat

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ gofmt -w src
1818
:: -tags "debug" 表示测试
1919
go install -tags "debug" server/studygolang
2020
go install -tags "debug" server/crawlarticle
21+
go install -tags "debug" server/indexer
2122

2223
set GOPATH=%OLDGOPATH%
2324

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// Copyright 2014 The StudyGolang Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
// http://studygolang.com
5+
// Author:polaris [email protected]
6+
7+
package model
8+
9+
import (
10+
"fmt"
11+
"regexp"
12+
"strings"
13+
)
14+
15+
// 文档对象(供solr使用)
16+
type Document struct {
17+
Id string `json:"id"`
18+
Objid int `json:"objid"`
19+
Objtype int `json:"objtype"`
20+
Title string `json:"title"`
21+
Author string `json:"author"`
22+
PubTime string `json:"pub_time"`
23+
Content string `json:"content"`
24+
Tags string `json:"tags"`
25+
Viewnum int `json:"viewnum"`
26+
Cmtnum int `json:"cmtnum"`
27+
Likenum int `json:"likenum"`
28+
}
29+
30+
func NewDocument(object interface{}, objectExt interface{}) *Document {
31+
var document *Document
32+
switch objdoc := object.(type) {
33+
case *Topic:
34+
case *Article:
35+
document = &Document{
36+
Id: fmt.Sprintf("%d%d", TYPE_ARTICLE, objdoc.Id),
37+
Objid: objdoc.Id,
38+
Objtype: TYPE_ARTICLE,
39+
Title: filterTxt(objdoc.Title),
40+
Author: objdoc.AuthorTxt,
41+
PubTime: objdoc.PubDate,
42+
Content: filterTxt(objdoc.Txt),
43+
Tags: objdoc.Tags,
44+
Viewnum: objdoc.Viewnum,
45+
Cmtnum: objdoc.Cmtnum,
46+
Likenum: objdoc.Likenum,
47+
}
48+
case *Resource:
49+
case *Wiki:
50+
}
51+
52+
return document
53+
}
54+
55+
var re = regexp.MustCompile("[\r\n\t\v ]+")
56+
57+
// 文本过滤(预处理)
58+
func filterTxt(txt string) string {
59+
txt = strings.TrimSpace(strings.TrimPrefix(txt, "原"))
60+
txt = strings.TrimSpace(strings.TrimPrefix(txt, "荐"))
61+
txt = strings.TrimSpace(strings.TrimPrefix(txt, "顶"))
62+
txt = strings.TrimSpace(strings.TrimPrefix(txt, "转"))
63+
64+
return re.ReplaceAllLiteralString(txt, " ")
65+
}
66+
67+
type AddCommand struct {
68+
Doc *Document `json:"doc"`
69+
Boost float64 `json:"boost,omitempty"`
70+
Overwrite bool `json:"overwrite"`
71+
CommitWithin int `json:"commitWithin,omitempty"`
72+
}
73+
74+
func NewDefaultArgsAddCommand(doc *Document) *AddCommand {
75+
return NewAddCommand(doc, 0.0, true, 0)
76+
}
77+
78+
func NewAddCommand(doc *Document, boost float64, overwrite bool, commitWithin int) *AddCommand {
79+
return &AddCommand{
80+
Doc: doc,
81+
Boost: boost,
82+
Overwrite: overwrite,
83+
CommitWithin: commitWithin,
84+
}
85+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// Copyright 2014 The StudyGolang Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
// http://studygolang.com
5+
// Author:polaris [email protected]
6+
7+
package main
8+
9+
import (
10+
"math/rand"
11+
"runtime"
12+
"time"
13+
//"path/filepath"
14+
15+
"github.com/robfig/cron"
16+
"logger"
17+
//"process"
18+
"service"
19+
)
20+
21+
func init() {
22+
runtime.GOMAXPROCS(runtime.NumCPU())
23+
// 设置随机数种子
24+
rand.Seed(time.Now().Unix())
25+
}
26+
27+
func main() {
28+
29+
c := cron.New()
30+
// 构建 solr 需要的索引数据
31+
// 一天一次全量
32+
c.AddFunc("*/20 * * * * *", func() {
33+
logger.Infoln("indexing start...")
34+
35+
start := time.Now()
36+
defer func() {
37+
logger.Infoln("indexing spend time:", time.Now().Sub(start))
38+
}()
39+
40+
service.Indexing(true)
41+
})
42+
43+
c.Start()
44+
45+
select {}
46+
}
47+
48+
// 保存PID
49+
func SavePid() {
50+
/*
51+
pidFile := Config["pid"]
52+
if !filepath.IsAbs(Config["pid"]) {
53+
pidFile = ROOT + "/" + pidFile
54+
}
55+
// TODO:错误不处理
56+
process.SavePidTo(pidFile)
57+
*/
58+
}

websites/code/studygolang/src/server/studygolang/background.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,6 @@ func ServeBackGround() {
3333
// 两分钟刷一次浏览数(TODO:重启丢失问题?信号控制重启?)
3434
c.AddFunc("@every 2m", service.Views.Flush)
3535

36-
// 构建 solr 需要的索引数据
37-
// 一天一次全量
38-
c.AddFunc("0 20 0 * * *", func() {
39-
40-
})
41-
4236
c.Start()
4337
}
4438

websites/code/studygolang/src/service/article.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ var domainPatch = map[string]string{
2626

2727
// 获取url对应的文章并根据规则进行解析
2828
func ParseArticle(articleUrl string, auto bool) (*model.Article, error) {
29+
articleUrl = strings.TrimSpace(articleUrl)
2930
if !strings.HasPrefix(articleUrl, "http") {
3031
articleUrl = "http://" + articleUrl
3132
}

websites/code/studygolang/src/service/searcher.go

Lines changed: 89 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,36 @@
77
package service
88

99
import (
10+
"encoding/json"
11+
"errors"
12+
"net/http"
13+
"strconv"
14+
15+
"config"
1016
"logger"
1117
"model"
18+
"util"
1219
)
1320

21+
const MaxRows = 100
22+
1423
// 准备索引数据,post 给 solr
1524
// isAll: 是否全量
1625
func Indexing(isAll bool) {
17-
26+
IndexingArticle(isAll)
1827
}
1928

2029
// 索引博文
2130
func IndexingArticle(isAll bool) {
22-
article := model.NewArticle()
31+
solrClient := NewSolrClient()
2332

33+
articleObj := model.NewArticle()
34+
35+
limit := strconv.Itoa(MaxRows)
2436
if isAll {
2537
id := 0
2638
for {
27-
articleList, err := article.Where("id>?", id).FindAll()
39+
articleList, err := articleObj.Where("id>? AND status!=?", id, model.StatusOffline).Limit(limit).FindAll()
2840
if err != nil {
2941
logger.Errorln("IndexingArticle error:", err)
3042
break
@@ -34,6 +46,80 @@ func IndexingArticle(isAll bool) {
3446
break
3547
}
3648

49+
for _, article := range articleList {
50+
if id < article.Id {
51+
id = article.Id
52+
}
53+
54+
document := model.NewDocument(article, nil)
55+
addCommand := model.NewDefaultArgsAddCommand(document)
56+
57+
solrClient.Push(addCommand)
58+
}
59+
60+
solrClient.Post()
61+
}
62+
}
63+
}
64+
65+
type SolrClient struct {
66+
addCommands []*model.AddCommand
67+
}
68+
69+
func NewSolrClient() *SolrClient {
70+
return &SolrClient{
71+
addCommands: make([]*model.AddCommand, 0, MaxRows),
72+
}
73+
}
74+
75+
func (this *SolrClient) Push(addCommand *model.AddCommand) {
76+
this.addCommands = append(this.addCommands, addCommand)
77+
}
78+
79+
func (this *SolrClient) Post() error {
80+
stringBuilder := util.NewBuffer().Append("{")
81+
82+
needComma := false
83+
for _, addCommand := range this.addCommands {
84+
commandJson, err := json.Marshal(addCommand)
85+
if err != nil {
86+
continue
87+
}
88+
89+
if stringBuilder.Len() == 1 {
90+
needComma = false
91+
} else {
92+
needComma = true
3793
}
94+
95+
if needComma {
96+
stringBuilder.Append(",")
97+
}
98+
99+
stringBuilder.Append(`"add":`).AppendBytes(commandJson)
38100
}
101+
102+
if stringBuilder.Len() == 1 {
103+
logger.Errorln("post docs:no right addcommand")
104+
return errors.New("no right addcommand")
105+
}
106+
107+
stringBuilder.Append("}")
108+
109+
resp, err := http.Post(config.Config["indexing_url"], "application/json", stringBuilder)
110+
if err != nil {
111+
logger.Errorln("post error:", err)
112+
return err
113+
}
114+
115+
defer resp.Body.Close()
116+
117+
var result map[string]interface{}
118+
err = json.NewDecoder(resp.Body).Decode(&result)
119+
if err != nil {
120+
logger.Errorln("parse response error:", err)
121+
return err
122+
}
123+
124+
return nil
39125
}

websites/code/studygolang/src/util/buffer.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,15 @@ func (this *Buffer) Append(s string) *Buffer {
3434
func (this *Buffer) AppendInt(i int) *Buffer {
3535
return this.Append(strconv.Itoa(i))
3636
}
37+
38+
func (this *Buffer) AppendBytes(p []byte) *Buffer {
39+
defer func() {
40+
if err := recover(); err != nil {
41+
log.Println("*****内存不够了!******")
42+
}
43+
}()
44+
45+
this.Buffer.Write(p)
46+
47+
return this
48+
}

0 commit comments

Comments
 (0)