Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
language: go
sudo: false
go:
- 1.8.x
- 1.9.x
- 1.10.x
- 1.11.x
- tip
script:
- go get -u github.com/golang/lint/golint
- go get -u golang.org/x/lint/golint
- OUT="$(go get -a)"; test -z "$OUT" || (echo "$OUT" && return 1)
- OUT="$(gofmt -l -d ./)"; test -z "$OUT" || (echo "$OUT" && return 1)
- OUT="$(golint ./...)"; test -z "$OUT" || (echo "$OUT" && return 1)
Expand Down
7 changes: 1 addition & 6 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -970,12 +970,7 @@ func (c *Collector) handleOnXML(resp *Response) error {
return err
}
if e := htmlquery.FindOne(doc, "//base/@href"); e != nil {
for _, a := range e.Attr {
if a.Key == "href" {
resp.Request.baseURL, _ = url.Parse(a.Val)
break
}
}
resp.Request.baseURL, _ = url.Parse(e.FirstChild.Data)
}

for _, cc := range c.xmlCallbacks {
Expand Down
2 changes: 1 addition & 1 deletion colly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ func TestBaseTag(t *testing.T) {

c2 := NewCollector()
c2.OnXML("//a/@href", func(e *XMLElement) {
u := e.Request.AbsoluteURL(e.Attr("href"))
u := e.Request.AbsoluteURL(e.Text)
if u != "http://xy.com/z" {
t.Error("Invalid <base /> tag handling in OnXML: expected https://xy.com/z, got " + u)
}
Expand Down
21 changes: 21 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
module github.com/go-colly/colly

require (
github.com/PuerkitoBio/goquery v1.4.1
github.com/andybalholm/cascadia v1.0.0 // indirect
github.com/antchfx/htmlquery v0.0.0-20180925020018-98389addba3d
github.com/antchfx/xmlquery v0.0.0-20181024140136-98cdbc3221ed
github.com/antchfx/xpath v0.0.0-20180922041825-3de91f3991a1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/gobwas/glob v0.2.3
github.com/gocolly/colly v1.1.0
github.com/jawher/mow.cli v1.0.4
github.com/kennygrant/sanitize v1.2.4
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca
github.com/stretchr/testify v1.2.2 // indirect
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea
golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f // indirect
google.golang.org/appengine v1.2.0
)
40 changes: 40 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
github.com/PuerkitoBio/goquery v1.4.1 h1:smcIRGdYm/w7JSbcdeLHEMzxmsBQvl8lhf0dSw2nzMI=
github.com/PuerkitoBio/goquery v1.4.1/go.mod h1:T9ezsOHcCrDCgA8aF1Cqr3sSYbO/xgdy8/R/XiIMAhA=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/antchfx/htmlquery v0.0.0-20180925020018-98389addba3d h1:BzVCjAnYYfpdD01lzVKlZ8mCJu+l4IdGGHjye+R8qgQ=
github.com/antchfx/htmlquery v0.0.0-20180925020018-98389addba3d/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
github.com/antchfx/xmlquery v0.0.0-20181024140136-98cdbc3221ed h1:RaoFHh2ENW0KnEWqTVLHWQ1KB2WMbEduM51FQNKpZBs=
github.com/antchfx/xmlquery v0.0.0-20181024140136-98cdbc3221ed/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
github.com/antchfx/xpath v0.0.0-20180922041825-3de91f3991a1 h1:nt4RMjvM9pJ5HR7WkCeAnDz1pgHbLT2vxICalUTWcJU=
github.com/antchfx/xpath v0.0.0-20180922041825-3de91f3991a1/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/gocolly/colly v1.1.0 h1:B1M8NzjFpuhagut8f2ILUDlWMag+nTx+PWEmPy7RhrE=
github.com/gocolly/colly v1.1.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/jawher/mow.cli v1.0.4 h1:hKjm95J7foZ2ngT8tGb15Aq9rj751R7IUDjG+5e3cGA=
github.com/jawher/mow.cli v1.0.4/go.mod h1:5hQj2V8g+qYmLUVWqu4Wuja1pI57M83EChYLVZ0sMKk=
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea h1:hH8P1IiDpzRU6ZDbDh/RDnVuezi2oOXJpApa06M0zyI=
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea/go.mod h1:aOux3gHPCftJ3KHq6Pz/AlDjYJ7Y+yKfm1gU/3B0u04=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519 h1:x6rhz8Y9CjbgQkccRGmELH6K+LJj7tOoh3XWeC1yaQM=
golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
google.golang.org/appengine v1.2.0 h1:S0iUepdCWODXRvtE+gcRDd15L+k+k1AiHlMiMjefH24=
google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=