@@ -11,6 +11,7 @@ import (
11
11
"errors"
12
12
"math/rand"
13
13
"regexp"
14
+ "strconv"
14
15
"strings"
15
16
"time"
16
17
@@ -22,7 +23,7 @@ import (
22
23
23
24
const (
24
25
Reddit = "http://www.reddit.com"
25
- RedditGolang = "/r/golang"
26
+ RedditGolang = "/r/golang/new/ "
26
27
)
27
28
28
29
// 获取url对应的文章并根据规则进行解析
@@ -44,14 +45,27 @@ func ParseReddit(redditUrl string) error {
44
45
return err
45
46
}
46
47
47
- doc .Find ("#siteTable .link" ).Each (func (i int , contentSelection * goquery.Selection ) {
48
+ /*
49
+ doc.Find("#siteTable .link").Each(func(i int, contentSelection *goquery.Selection) {
48
50
49
- err = dealRedditOneResource (contentSelection )
51
+ err = dealRedditOneResource(contentSelection)
52
+
53
+ if err != nil {
54
+ logger.Errorln(err)
55
+ }
56
+ })
57
+ */
58
+
59
+ // 最后面的先入库处理
60
+ resourcesSelection := doc .Find ("#siteTable .link" )
61
+
62
+ for i := resourcesSelection .Length () - 1 ; i >= 0 ; i -- {
63
+ err = dealRedditOneResource (goquery .NewDocumentFromNode (resourcesSelection .Get (i )).Selection )
50
64
51
65
if err != nil {
52
66
logger .Errorln (err )
53
67
}
54
- })
68
+ }
55
69
56
70
return err
57
71
}
@@ -74,19 +88,25 @@ func dealRedditOneResource(contentSelection *goquery.Selection) error {
74
88
return errors .New ("resource url is empty" )
75
89
}
76
90
91
+ isReddit := false
92
+
77
93
resource := model .NewResource ()
78
94
// Reddit 自身的内容
79
95
if contentSelection .HasClass ("self" ) {
96
+ isReddit = true
80
97
resourceUrl = Reddit + resourceUrl
81
98
}
82
99
83
100
err := resource .Where ("url=?" , resourceUrl ).Find ("id" )
84
101
// 已经存在
85
102
if resource .Id != 0 {
86
- return errors .New ("url" + resourceUrl + "has exists!" )
103
+ // 如果是 reddit 本身的,可以更新评论信息
104
+ if ! isReddit {
105
+ return errors .New ("url" + resourceUrl + "has exists!" )
106
+ }
87
107
}
88
108
89
- if contentSelection . HasClass ( "self" ) {
109
+ if isReddit {
90
110
91
111
resource .Form = model .ContentForm
92
112
@@ -149,18 +169,24 @@ func dealRedditOneResource(contentSelection *goquery.Selection) error {
149
169
}
150
170
resource .Ctime = ctime
151
171
152
- var id int64
153
- id , err = resource .Insert ()
172
+ if resource .Id == 0 {
173
+ var id int64
174
+ id , err = resource .Insert ()
154
175
155
- if err != nil {
156
- return errors .New ("insert into Resource error:" + err .Error ())
157
- }
176
+ if err != nil {
177
+ return errors .New ("insert into Resource error:" + err .Error ())
178
+ }
158
179
159
- // 存扩展信息
160
- resourceEx := model .NewResourceEx ()
161
- resourceEx .Id = int (id )
162
- if _ , err = resourceEx .Insert (); err != nil {
163
- return errors .New ("insert into ResourceEx error:" + err .Error ())
180
+ // 存扩展信息
181
+ resourceEx := model .NewResourceEx ()
182
+ resourceEx .Id = int (id )
183
+ if _ , err = resourceEx .Insert (); err != nil {
184
+ return errors .New ("insert into ResourceEx error:" + err .Error ())
185
+ }
186
+ } else {
187
+ if err = resource .Persist (resource ); err != nil {
188
+ return errors .New ("persist resource:" + strconv .Itoa (resource .Id ) + " error:" + err .Error ())
189
+ }
164
190
}
165
191
166
192
return nil
0 commit comments