From 6bfbce0fb5c8a41fad8c860c5dc96c730ed0e6da Mon Sep 17 00:00:00 2001 From: ysung6 Date: Mon, 31 Aug 2020 02:15:38 +0900 Subject: [PATCH] update coursera example --- .../coursera_courses/coursera_courses.go | 48 +++++++++++-------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/_examples/coursera_courses/coursera_courses.go b/_examples/coursera_courses/coursera_courses.go index 08b8e1ebc..a6a679ce7 100644 --- a/_examples/coursera_courses/coursera_courses.go +++ b/_examples/coursera_courses/coursera_courses.go @@ -18,11 +18,18 @@ type Course struct { URL string Language string Commitment string - HowToPass string Rating string } func main() { + fName := "courses.json" + file, err := os.Create(fName) + if err != nil { + log.Fatalf("Cannot create file %q: %s\n", fName, err) + return + } + defer file.Close() + // Instantiate default collector c := colly.NewCollector( // Visit only domains: coursera.org, www.coursera.org @@ -38,7 +45,7 @@ func main() { courses := make([]Course, 0, 200) - // On every a element which has href attribute call callback + // On every element which has "href" attribute call callback c.OnHTML("a[href]", func(e *colly.HTMLElement) { // If attribute class is this long string return from callback // As this a is irrelevant @@ -59,8 +66,8 @@ func main() { log.Println("visiting", r.URL.String()) }) - // On every a HTML element which has name attribute call callback - c.OnHTML(`a[name]`, func(e *colly.HTMLElement) { + // On every element with collection-product-card class call callback + c.OnHTML(`a.collection-product-card`, func(e *colly.HTMLElement) { // Activate detailCollector if the link contains "coursera.org/learn" courseURL := e.Request.AbsoluteURL(e.Attr("href")) if strings.Index(courseURL, "coursera.org/learn") != -1 { @@ -71,7 +78,7 @@ func main() { // Extract details of the course detailCollector.OnHTML(`div[id=rendered-content]`, func(e *colly.HTMLElement) { log.Println("Course found", e.Request.URL) - title := e.ChildText(".course-title") + title := e.ChildText(".banner-title") if title == "" { log.Println("No title found", e.Request.URL) } @@ -79,22 +86,23 @@ func main() { Title: title, URL: e.Request.URL.String(), Description: e.ChildText("div.content"), - Creator: e.ChildText("div.creator-names > span"), + Creator: e.ChildText("li.banner-instructor-info > a > div > div > span"), + Rating: e.ChildText("span.number-rating"), } - // Iterate over rows of the table which contains different information - // about the course - e.ForEach("table.basic-info-table tr", func(_ int, el *colly.HTMLElement) { - switch el.ChildText("td:first-child") { - case "Language": - course.Language = el.ChildText("td:nth-child(2)") + // Iterate over div components and add details to course + e.ForEach(".AboutCourse .ProductGlance > div", func(_ int, el *colly.HTMLElement) { + svgTitle := strings.Split(el.ChildText("div:nth-child(1) svg title"), " ") + lastWord := svgTitle[len(svgTitle)-1] + switch lastWord { + // svg Title: Available Langauges + case "languages": + course.Language = el.ChildText("div:nth-child(2) > div:nth-child(1)") + // svg Title: Mixed/Beginner/Intermediate/Advanced Level case "Level": - course.Level = el.ChildText("td:nth-child(2)") - case "Commitment": - course.Commitment = el.ChildText("td:nth-child(2)") - case "How To Pass": - course.HowToPass = el.ChildText("td:nth-child(2)") - case "User Ratings": - course.Rating = el.ChildText("td:nth-child(2) div:nth-of-type(2)") + course.Level = el.ChildText("div:nth-child(2) > div:nth-child(1)") + // svg Title: Hours to complete + case "complete": + course.Commitment = el.ChildText("div:nth-child(2) > div:nth-child(1)") } }) courses = append(courses, course) @@ -103,7 +111,7 @@ func main() { // Start scraping on http://coursera.com/browse c.Visit("https://coursera.org/browse") - enc := json.NewEncoder(os.Stdout) + enc := json.NewEncoder(file) enc.SetIndent("", " ") // Dump json to the standard output