Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions chalicelib/api/broadcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def send_announcement():


@broadcast_api.route("/test-email", methods=["GET", "POST"], cors=True)
@auth(broadcast_api, roles=[Roles.EBOARD])
def test_email():
# PLEASE REPLACE WITH YOUR OWN EMAIL FOR TESTING
html = broadcast_service.generate_newsletter_content()
Expand Down
106 changes: 77 additions & 29 deletions chalicelib/services/JobPostingService.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,27 @@
from datetime import datetime, timedelta
from typing import List, Dict
import re
import functools

def call_on_exit(method_name):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO decorator feels unnecessary and over engineered - unless we know we're gonna have more jobs/tasks that use the webdriver, I'd feel like just having a simple exit/kill method would be better. Regardless, if it works, no issues at all too!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might add a "recent news" section where we could scrape articles from the web and provide a summary of what's happening that week. Do you think it's a good idea?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah let's leave it for now - we can always iterate to change it later!

"""
Decorator to call a method (by name) on self when the decorated method exits (returns or raises).
"""
def decorator(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
finally:
getattr(self, method_name)()
return wrapper
return decorator

class JobPostingService:
def __init__(self):
self.driver = self._create_webdriver()
self.driver = None
self.gs = GoogleSheetsModule()

def _create_webdriver(self, *chrome_args: str) -> webdriver.Chrome:
"""
Configures and creates a headless Chrome webdriver with optional additional command-line options.
Expand All @@ -25,6 +40,10 @@ def _create_webdriver(self, *chrome_args: str) -> webdriver.Chrome:
Returns:
webdriver.Chrome: A configured instance of the Chrome webdriver.
"""
# Return existing driver if possible
if self.driver is not None:
return self.driver

options = webdriver.ChromeOptions()
# Use default options if no extra arguments are provided
# "--headless": Run Chrome in headless mode
Expand All @@ -34,11 +53,12 @@ def _create_webdriver(self, *chrome_args: str) -> webdriver.Chrome:
args_to_use = chrome_args if chrome_args else default_args
for arg in args_to_use:
options.add_argument(arg)

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)
return driver

@call_on_exit("_close_driver")
def get_jobs(self, urlStr) -> List:
"""
Fetches job postings from the given URL and returns a list of job details.
Expand All @@ -47,7 +67,7 @@ def get_jobs(self, urlStr) -> List:
and processes each row of the table body to extract job details such as the company name,
job role, application link, and posting date. The extraction stops once a job posting date
is determined to be more than one week old.

**Due to this function's reliance on specific Github repo README formatting, this function
will need to be updated/abstracted to handle changes to the repos in the future.

Expand All @@ -58,26 +78,33 @@ def get_jobs(self, urlStr) -> List:
List[dict]: A list of dictionaries, each containing keys 'company', 'role', 'link',
and 'date' for the individual job postings.
"""
self.driver.get(urlStr)

table = self.driver.find_element(By.TAG_NAME, "table")
# Open fresh driver (or an existing one)
driver = self._get_driver()
driver.get(urlStr)

table = driver.find_element(By.TAG_NAME, "table")
body = table.find_element(By.TAG_NAME, "tbody")
rows = body.find_elements(By.TAG_NAME, "tr")

prevRowCompany = ""
jobs = []
for row in rows:
cols = row.find_elements(By.XPATH, "td")
date = cols[-1].text
if (self.is_more_than_one_week_ago(date)):
break

company = ""
role = ""
link = ""

try:
company = cols[0].find_element(By.TAG_NAME, "strong").find_element(By.TAG_NAME, "a").text
company = (
cols[0]
.find_element(By.TAG_NAME, "strong")
.find_element(By.TAG_NAME, "a")
.text
)
prevRowCompany = company
except NoSuchElementException:
try:
Expand All @@ -88,23 +115,29 @@ def get_jobs(self, urlStr) -> List:
prevRowCompany = company
except Exception:
company = "N/A"



try:
role = cols[1].find_element(By.TAG_NAME, "strong").find_element(By.TAG_NAME, "a").text
link = cols[1].find_element(By.TAG_NAME, "strong").find_element(By.TAG_NAME, "a").get_attribute("href")
role = (
cols[1]
.find_element(By.TAG_NAME, "strong")
.find_element(By.TAG_NAME, "a")
.text
)
link = (
cols[1]
.find_element(By.TAG_NAME, "strong")
.find_element(By.TAG_NAME, "a")
.get_attribute("href")
)
except NoSuchElementException:
role = cols[1].text

if link == "":
link = cols[3].find_element(By.TAG_NAME, "a").get_attribute("href")

newJob = {"company": company,
"role": role,
"link": link,
"date": date}

newJob = {"company": company, "role": role, "link": link, "date": date}
jobs.append(newJob)

return jobs

def get_finance_jobs(self) -> List[Dict]:
Expand All @@ -113,7 +146,7 @@ def get_finance_jobs(self) -> List[Dict]:

This method utilizes the GoogleSheetsModule to fetch cell data from a predetermined Google Sheets
document. It processes the data by separating header information from job rows, identifying the required
columns (Company, Opportunity, Link, Deadline), and extracting relevant details. The method utilizes
columns (Company, Opportunity, Link, Deadline), and extracting relevant details. The method utilizes
the _convert_serial_to_date() helper function to convert serial date formats into a human-readable string format.

Returns:
Expand Down Expand Up @@ -143,8 +176,10 @@ def get_finance_jobs(self) -> List[Dict]:
company_idx = headers.index("Company")
opp_idx = headers.index("Opportunity")
link_idx = headers.index("Link")

deadline_idxs = [i for i, header in enumerate(headers) if header == "Deadline"]

deadline_idxs = [
i for i, header in enumerate(headers) if header == "Deadline"
]
if len(deadline_idxs) > 1:
deadline_idx = deadline_idxs[1]
else:
Expand Down Expand Up @@ -178,21 +213,21 @@ def get_finance_jobs(self) -> List[Dict]:

raw_date = row[deadline_idx] if deadline_idx < len(row) else "N/A"
date_str = self._convert_serial_to_date(raw_date)

job = {
"company": row[company_idx] if company_idx < len(row) else "N/A",
"role": row[opp_idx] if opp_idx < len(row) else "N/A",
"link": hyperlink_url,
"date": date_str
"date": date_str,
}
job_listings.append(job)

return job_listings

except Exception as e:
print(e)
return []

def _convert_serial_to_date(self, raw_date) -> str:
"""
Converts a serial date (as returned by Google Sheets when using FORMULA mode)
Expand Down Expand Up @@ -247,4 +282,17 @@ def is_more_than_one_week_ago(self, dateStr: str) -> bool:
print(f"Error parsing date in alternative format: {e}")
return False

def _get_driver(self):
if self.driver is None:
self.driver = self._create_webdriver()
return self.driver

def _close_driver(self):
if self.driver is not None:
try:
self.driver.quit()
except Exception as e:
raise RuntimeError(f"Failed to close WebDriver: {e}")
self.driver = None

job_posting_service = JobPostingService()
3 changes: 3 additions & 0 deletions tests/services/test_job_posting_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ def test_get_jobs(job_service):
- Processes rows until a row with a date older than one week is encountered.
- Uses nested element lookups where available and falls back to previous row's company or plain text.
"""
# Patch _close_driver to do nothing for this test
job_service._close_driver = lambda: None

test_url = "http://example.com/jobs"
jobs = job_service.get_jobs(test_url)

Expand Down