Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
36b6d03
Finished edits up to 2.2
shaunhutch Jun 26, 2023
83f62d0
Updated as per Florencia's comments
shaunhutch Jun 26, 2023
5c6bd04
Merge branch '79-general-simons-report-fixes' into 68-make-mds-report…
shaunhutch Jun 26, 2023
6b9f842
Fixed gridlines
shaunhutch Jun 26, 2023
b50e6fa
feat: added refernce
tieandrews Jun 27, 2023
2b5f019
feat: fixed section 2.2
tieandrews Jun 27, 2023
4575c0c
feat: added robert v3 train metrics
tieandrews Jun 27, 2023
130ad59
docs: remove diagram legend
tieandrews Jun 27, 2023
8285991
Merge branch '79-general-simons-report-fixes' of github.com:NeotomaDB…
tieandrews Jun 27, 2023
7017adf
Adding Final MDS Report
shaunhutch Jun 27, 2023
466bcff
Update S-LSTM ref
brabbit61 Jun 27, 2023
46a3054
Merge branch '79-general-simons-report-fixes' into 68-make-mds-report…
shaunhutch Jun 27, 2023
aa435c6
Updated report
shaunhutch Jun 27, 2023
98d62af
Updated report
shaunhutch Jun 27, 2023
c6ef186
Updated references
shaunhutch Jun 27, 2023
c75d91b
Changed folder name
shaunhutch Jun 27, 2023
7fcd574
Minor updates to the report
brabbit61 Jun 28, 2023
2314816
Merge branch 'dev' into 79-general-simons-report-fixes
shaunhutch Jun 28, 2023
89829cc
Updated tense issues
shaunhutch Jun 28, 2023
c56fee6
Tense updates
shaunhutch Jun 28, 2023
44aa8da
Minor changes
shaunhutch Jun 28, 2023
9100c5e
Minor corrections
shaunhutch Jun 28, 2023
b15624b
Latest changes
shaunhutch Jun 28, 2023
2a1cbd2
report: compressed entity-extraction
tieandrews Jun 28, 2023
14858bb
report: finals cuts
tieandrews Jun 28, 2023
2482c55
Final Updates
shaunhutch Jun 28, 2023
882f523
Merge branch 'dev' into 68-make-mds-report-and-cut-down-report
shaunhutch Jun 28, 2023
ad0b119
Merge branch '79-general-simons-report-fixes' into 68-make-mds-report…
shaunhutch Jun 28, 2023
d44a050
Add training metric for spacy
brabbit61 Jun 28, 2023
cd544b1
docs: updated model results for report
tieandrews Jun 28, 2023
31028f8
feat: added notebook for detailed ner results
tieandrews Jun 28, 2023
8194129
final edits on mds report
tieandrews Jun 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,589 changes: 1,589 additions & 0 deletions notebooks/2.2-NER-model-comparison.ipynb

Large diffs are not rendered by default.

179 changes: 179 additions & 0 deletions reports/final_mds/assets/references.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
@article{Goring,
author = {Goring, Simon and Marsicek, Jeremiah and Ye, Shan and Williams, John and Meyers, Stephen and Peters, Shanan and Quinn, Daven and Schaen, Allen and Singer, Brad and Marcott, Shaun},
year = {2021},
month = {07},
pages = {},
title = {A Model Workflow for GeoDeepDive: Locating Pliocene and Pleistocene Ice-Rafted Debris},
doi = {10.31223/X54312}
}

@article{NeotomaDB,
author={Williams, J.W. and Grimm, E.G. and Blois, J. and Charles, D.F. and Davis, E. and Goring, S.J. and Graham, R. and Smith, A.J. and Anderson, M. and Arroyo-Cabrales, J. and Ashworth, A.C. and Betancourt, J.L. and Bills, B.W. and Booth, R.K. and Buckland, P. and Curry, B. and Giesecke, T. and Hausmann, S. and Jackson, S.T. and Latorre, C. and Nichols, J. and Purdum, T. and Roth, R.E. and Stryker, M. and Takahara, H.},
year={2018},
title={The Neotoma Paleoecology Database: A multi-proxy, international community-curated data resource},
journal={Quaternary Research},
volume={89},
pages={156-177}
}

@misc{geodeepdive,
title = {xDD API},
author = {{Peters, S.E., I.A. Ross, T. Rekatsinas, M. Livny}},
year = {2021},
note = {Retrieved on May 9, 2023 from https://xdd.wisc.edu},
howpublished = {JSON},
license = {Apache License, Version 2.0},
url = {geodeepdive.org}
}

@misc{crossref,
title = {Crossref REST API},
author = {{Crossref}},
year = {2023},
url = {https://www.crossref.org/services/metadata-delivery/rest-api/},
note = {[Accessed May 9, 2023]},
}

@misc{alex2022raft,
title={RAFT: A Real-World Few-Shot Text Classification Benchmark},
author={Neel Alex and Eli Lifland and Lewis Tunstall and Abhishek Thakur and Pegah Maham and C. Jess Riedel and Emmie Hine and Carolyn Ashurst and Paul Sedille and Alexis Carlier and Michael Noetel and Andreas Stuhlmüller},
year={2022},
eprint={2109.14076},
archivePrefix={arXiv},
primaryClass={cs.CL}
}

@software{spacy,
publisher={Explosion},
title = {spaCy NER},
url = {https://spacy.io/api/entityrecognizer},
version = {3.5},
date = {2023-05-09}

}

@software{huggingface,
title = {HuggingFace},
url = {https://huggingface.co/},
date = {2023-05-09},
version = {4.29.1}
}

@misc{conneau2020unsupervised,
title={Unsupervised Cross-lingual Representation Learning at Scale},
author={Alexis Conneau and Kartikay Khandelwal and Naman Goyal and Vishrav Chaudhary and Guillaume Wenzek and Francisco Guzmán and Edouard Grave and Myle Ott and Luke Zettlemoyer and Veselin Stoyanov},
year={2020},
eprint={1911.02116},
archivePrefix={arXiv},
primaryClass={cs.CL}
}

@misc{ontonotes,
author={Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston},
title={OntoNotes Release 5.0},
doi={ https://doi.org/10.35111/xmhb-2b84},
year={2013},
url={https://catalog.ldc.upenn.edu/LDC2013T19}
}

@software{LabelStudio,
title = {{Label Studio}: Data labeling software},
url = {https://github.com/heartexlabs/label-studio},
version = {1.7.3},
note={Open source software available from https://github.com/heartexlabs/label-studio},
date = {2023-05-09}
}

@article{inproceedings,
author = {Tran Thanh, Dien and Loc, Bui and Thai-Nghe, Nguyen},
year = {2019},
month = {11},
pages = {78-84},
title = {Article Classification using Natural Language Processing and Machine Learning},
doi = {10.1109/ACOMP.2019.00019}
}

@article{S-LSTM,
author = {Guillaume Lample, Miguel Ballesteros, Sandeep Subramanian, Kazuya Kawakami, Chris Dyer},
title = {Neural Architectures for Named Entity Recognition},
journal = {Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
year = {2016},
month = {3},
doi = {http://dx.doi.org/10.18653/v1/N16-1030},
}

@article{unsupervised,
author = {Weber, Tobias and Kranzlmüller, Dieter and Fromm, Michael and de Sousa, Nelson Tavares},
title = "{Using supervised learning to classify metadata of research data by field of study}",
journal = {Quantitative Science Studies},
volume = {1},
number = {2},
pages = {525-550},
year = {2020},
month = {06},
issn = {2641-3337},
doi = {10.1162/qss_a_00049},
url = {https://doi.org/10.1162/qss\_a\_00049},
eprint = {https://direct.mit.edu/qss/article-pdf/1/2/525/1885831/qss\_a\_00049.pdf},
}
@inproceedings{chinchor-sundheim-1993-muc,
title = "{MUC}-5 Evaluation Metrics",
author = "Chinchor, Nancy and
Sundheim, Beth",
booktitle = "Fifth Message Understanding Conference ({MUC}-5): Proceedings of a Conference Held in Baltimore, {M}aryland, August 25-27, 1993",
year = "1993",
url = "https://aclanthology.org/M93-1007",
}

@software{dash,
publisher={Plotly},
title = {Dash},
url = {https://dash.plotly.com},
version = {2.10.0},
date = {2023-05-25}

}
@Manual{shiny,
title = {shiny: Web Application Framework for R},
author = {Winston Chang and Joe Cheng and JJ Allaire and Carson Sievert and Barret Schloerke and Yihui Xie and Jeff Allen and Jonathan McPherson and Alan Dipert and Barbara Borges},
year = {2023},
note = {R package version 1.7.4.9002},
url = {https://shiny.rstudio.com/},
}

@misc{borealisai2023tutorial,
title = {Tutorial 17: Transformers III - Training},
author = {Borealis AI},
howpublished = {\url{https://www.borealisai.com/research-blogs/tutorial-17-transformers-iii-training/}},
year = {2023}
}
@misc{crossref,
title = {Crossref REST API},
author = {{Crossref}},
year = {2023},
url = {https://www.crossref.org/services/metadata-delivery/rest-api/},
note = {[Accessed May 9, 2023]},
}

@article{transformer-train-tips,
author = {Martin Popel and
Ondrej Bojar},
title = {Training Tips for the Transformer Model},
journal = {CoRR},
volume = {abs/1804.00247},
year = {2018},
url = {http://arxiv.org/abs/1804.00247},
eprinttype = {arXiv},
eprint = {1804.00247},
timestamp = {Mon, 13 Aug 2018 16:47:13 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1804-00247.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{roberta-ner-wang,
title={Application of Pre-training Models in Named Entity Recognition},
author={Yu Wang},
journal={2020 12th International Conference on Intelligent Human-Machine Systems and Cybernetics (IHMSC)},
year={2020},
volume={},
doi={10.1109/IHMSC49165.2020.00013}
}
Binary file not shown.
Loading