@@ -8,14 +8,18 @@ package logic
8
8
9
9
import (
10
10
"errors"
11
+ "math/rand"
11
12
"model"
12
13
"net/url"
13
14
"strconv"
14
15
"strings"
15
16
"time"
17
+ "util"
16
18
17
19
. "db"
18
20
21
+ "github.com/PuerkitoBio/goquery"
22
+ "github.com/lunny/html2md"
19
23
"github.com/polaris1119/logger"
20
24
"golang.org/x/net/context"
21
25
)
@@ -212,6 +216,183 @@ func (ProjectLogic) getOwner(ctx context.Context, id int) int {
212
216
return user .Uid
213
217
}
214
218
219
+ // ParseProjectList 解析其他网站的开源项目
220
+ func (self ProjectLogic ) ParseProjectList (pUrl string ) error {
221
+ pUrl = strings .TrimSpace (pUrl )
222
+ if ! strings .HasPrefix (pUrl , "http" ) {
223
+ pUrl = "http://" + pUrl
224
+ }
225
+
226
+ var (
227
+ doc * goquery.Document
228
+ err error
229
+ )
230
+
231
+ if doc , err = goquery .NewDocument (pUrl ); err != nil {
232
+ logger .Errorln ("goquery opensource project newdocument error:" , err )
233
+ return err
234
+ }
235
+
236
+ // 最后面的先入库处理
237
+ projectsSelection := doc .Find (".ProjectList .List li" )
238
+
239
+ for i := projectsSelection .Length () - 1 ; i >= 0 ; i -- {
240
+
241
+ contentSelection := goquery .NewDocumentFromNode (projectsSelection .Get (i )).Selection
242
+ projectUrl , ok := contentSelection .Find ("h3 a" ).Attr ("href" )
243
+
244
+ if ! ok || projectUrl == "" {
245
+ continue
246
+ }
247
+ err = self .ParseOneProject (projectUrl )
248
+
249
+ if err != nil {
250
+ logger .Errorln (err )
251
+ }
252
+ }
253
+
254
+ return err
255
+ }
256
+
257
+ const OsChinaDomain = "http://www.oschina.net"
258
+
259
+ // ProjectLogoPrefix 开源项目 logo 前缀
260
+ const ProjectLogoPrefix = "plogo"
261
+
262
+ var PresetUsernames = []string {"polaris" , "blov" , "agolangf" , "xuanbao" }
263
+
264
+ // ParseOneProject 处理单个 project
265
+ func (ProjectLogic ) ParseOneProject (projectUrl string ) error {
266
+ if ! strings .HasPrefix (projectUrl , "http" ) {
267
+ projectUrl = OsChinaDomain + projectUrl
268
+ }
269
+
270
+ var (
271
+ doc * goquery.Document
272
+ err error
273
+ )
274
+
275
+ // 加上 ?fromerr=xfwefs,否则页面有 js 重定向
276
+ if doc , err = goquery .NewDocument (projectUrl + "?fromerr=xfwefs" ); err != nil {
277
+ return errors .New ("goquery fetch " + projectUrl + " error:" + err .Error ())
278
+ }
279
+
280
+ // 标题
281
+ category := strings .TrimSpace (doc .Find (".Project .name" ).Text ())
282
+ name := strings .TrimSpace (doc .Find (".Project .name u" ).Text ())
283
+ if category == "" && name == "" {
284
+ return errors .New ("projectUrl:" + projectUrl + " category and name are empty" )
285
+ }
286
+
287
+ tmpIndex := strings .LastIndex (category , name )
288
+ if tmpIndex != - 1 {
289
+ category = category [:tmpIndex ]
290
+ }
291
+
292
+ // uri
293
+ uri := projectUrl [strings .LastIndex (projectUrl , "/" )+ 1 :]
294
+
295
+ project := & model.OpenProject {}
296
+
297
+ _ , err = MasterDB .Where ("uri=?" , uri ).Get (project )
298
+ // 已经存在
299
+ if project .Id != 0 {
300
+ return errors .New ("url" + projectUrl + "has exists!" )
301
+ }
302
+
303
+ logoSelection := doc .Find (".Project .PN img" )
304
+ if logoSelection .AttrOr ("title" , "" ) != "" {
305
+ project .Logo = logoSelection .AttrOr ("src" , "" )
306
+
307
+ if ! strings .HasPrefix (project .Logo , "http" ) {
308
+ project .Logo = OsChinaDomain + project .Logo
309
+ }
310
+
311
+ project .Logo , err = DefaultUploader .TransferUrl (nil , project .Logo , ProjectLogoPrefix )
312
+ if err != nil {
313
+ logger .Errorln ("project logo upload error:" , err )
314
+ }
315
+ }
316
+
317
+ // 获取项目相关链接
318
+ doc .Find ("#Body .urls li" ).Each (func (i int , liSelection * goquery.Selection ) {
319
+ aSelection := liSelection .Find ("a" )
320
+ uri := util .FetchRealUrl (OsChinaDomain + aSelection .AttrOr ("href" , "" ))
321
+ switch aSelection .Text () {
322
+ case "软件首页" :
323
+ project .Home = uri
324
+ case "软件文档" :
325
+ project .Doc = uri
326
+ case "软件下载" :
327
+ project .Download = uri
328
+ }
329
+ })
330
+
331
+ ctime := time .Now ()
332
+ doc .Find ("#Body .attrs li" ).Each (func (i int , liSelection * goquery.Selection ) {
333
+ aSelection := liSelection .Find ("a" )
334
+ txt := aSelection .Text ()
335
+ if i == 0 {
336
+ project .Licence = txt
337
+ if txt == "未知" {
338
+ project .Licence = "其他"
339
+ }
340
+ } else if i == 1 {
341
+ project .Lang = txt
342
+ } else if i == 2 {
343
+ project .Os = txt
344
+ } else if i == 3 {
345
+ dtime , err := time .ParseInLocation ("2006年01月02日" , aSelection .Last ().Text (), time .Local )
346
+ if err != nil {
347
+ logger .Errorln ("parse ctime error:" , err )
348
+ } else {
349
+ ctime = dtime .Local ()
350
+ }
351
+ }
352
+ })
353
+
354
+ project .Name = name
355
+ project .Category = category
356
+ project .Uri = uri
357
+ project .Repo = strings .TrimSpace (doc .Find ("#Body .github-widget" ).AttrOr ("data-repo" , "" ))
358
+ project .Src = "https://github.com/" + project .Repo
359
+
360
+ pos := strings .Index (project .Repo , "/" )
361
+ if pos > - 1 {
362
+ project .Author = project .Repo [:pos ]
363
+ } else {
364
+ project .Author = "网友"
365
+ }
366
+
367
+ if project .Doc == "" {
368
+ // TODO:暂时认为一定是 Go 语言
369
+ project .Doc = "https://godoc.org/" + project .Src [8 :]
370
+ }
371
+
372
+ desc := ""
373
+ doc .Find ("#Body .detail" ).Find ("p" ).NextAll ().Each (func (i int , domSelection * goquery.Selection ) {
374
+ doc .FindSelection (domSelection ).WrapHtml (`<div id="tmp` + strconv .Itoa (i ) + `"></div>` )
375
+ domHtml , _ := doc .Find ("#tmp" + strconv .Itoa (i )).Html ()
376
+ if domSelection .Is ("pre" ) {
377
+ desc += domHtml + "\n \n "
378
+ } else {
379
+ desc += html2md .Convert (domHtml ) + "\n \n "
380
+ }
381
+ })
382
+
383
+ project .Desc = strings .TrimSpace (desc )
384
+ project .Username = PresetUsernames [rand .Intn (4 )]
385
+ project .Status = model .ProjectStatusOnline
386
+ project .Ctime = model .OftenTime (ctime )
387
+
388
+ _ , err = MasterDB .Insert (project )
389
+ if err != nil {
390
+ return errors .New ("insert into open project error:" + err .Error ())
391
+ }
392
+
393
+ return nil
394
+ }
395
+
215
396
// 项目评论
216
397
type ProjectComment struct {}
217
398
0 commit comments