Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ pip install -r requirements.txt

For conda:
```bash
conda install environment.yml
conda env create -f environment.yml
```

If you plan to use the pre-built Docker images, install Docker following these [instructions](https://docs.docker.com/get-docker/)
Expand Down
Binary file modified data/data-review-tool/article-relevance-output.parquet
Binary file not shown.
23 changes: 23 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
version: "3.9"
services:
article-relevance-prediction:
image: metaextractor-article-relevance:v0.0.1
build:
dockerfile: ./docker/article-relevance/Dockerfile
context: .
environment:
- N_RECENT=10
- MIN_DATE=
- MAX_DATE=
- TERM=
- AUTO_MIN_DATE=False
- AUTO_CHECK_DUP=False
- OUTPUT_PATH=/outputs/
- SEND_XDD=False
- DOI_FILE_PATH=/raw/gdd_api_return.json
- MODEL_PATH=/models/logistic_regression_model.joblib

volumes:
- ./data/article-relevance/outputs:/output
- ./data/article-relevance/processed/prediction_parquet:/parquet
- ./data/article-relevance/raw:/raw
- ./models/article-relevance:/models

data-review-tool:
image: metaextractor-data-review-tool:v0.0.1
build:
Expand Down
10 changes: 6 additions & 4 deletions docker/article-relevance/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Use the official Python 3.10 image as the base image
FROM python:3.10

ENV DOI_PATH="/raw"
ENV PARQUET_PATH="/parquet"
# Set the working directory inside the container
WORKDIR /app/

Expand All @@ -13,9 +15,6 @@ RUN pip install --no-cache-dir -r requirements.txt
# Copy the entire repository folder into the container
COPY src ./src

# Copy the model folder into the container
COPY models/article-relevance ./models/article-relevance

# Copy the shell script to the container
COPY docker/article-relevance/run-prediction.sh .

Expand All @@ -24,7 +23,10 @@ RUN chmod +x run-prediction.sh

# Mount volumes
RUN mkdir -p /output
VOLUME ["/output"]
RUN mkdir -p /raw
RUN mkdir -p /parquet
RUN mkdir -p /models
VOLUME ["/output", "/parquet", "/raw", "/models"]

# Set the entry point for the Docker container
ENTRYPOINT ["./run-prediction.sh"]
12 changes: 11 additions & 1 deletion src/article_relevance/relevance_prediction_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,9 +422,19 @@ def main():
opt = docopt(__doc__)

doi_list_file_path = opt["--doi_file_path"]
model_path = opt['--model_path']
output_path = opt['--output_path']
send_xdd = opt['--send_xdd']

# # /models directory is a mounted volume, containing the model object
# models = os.listdir("/models")
# models = [f for f in models if f.endswith(".joblib")]

# if models:
# model_path = os.path.join("/models", models[0])
# else:
# model_path = ""

model_path = opt['--model_path']

metadata_df = crossref_extract(doi_list_file_path)

Expand Down