Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
502 changes: 251 additions & 251 deletions docs/prebuilt-indexes.md

Large diffs are not rendered by default.

154 changes: 77 additions & 77 deletions docs/reproduce/msmarco-v1-passage.html

Large diffs are not rendered by default.

1,048 changes: 505 additions & 543 deletions src/main/java/io/anserini/index/IndexInfo.java

Large diffs are not rendered by default.

44 changes: 33 additions & 11 deletions src/main/resources/reproduce/msmarco-v1-doc.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
conditions:
- name: bm25
- name: bm25-doc-default
display: "BM25 (k1=0.9, b=0.4)"
display_html: "BM25 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display_row: ""
Expand All @@ -21,7 +21,7 @@ conditions:
- AP@100: 0.3793
nDCG@10: 0.5286
R@1K: 0.8085
- name: bm25-doc2query
- name: bm25-d2q-t5-doc-default
display: "BM25 (k1=0.9, b=0.4) with doc2query-T5 expansions"
display_html: "BM25 with doc2query-T5 expansions (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display_row: ""
Expand All @@ -43,7 +43,7 @@ conditions:
- AP@100: 0.4230
nDCG@10: 0.5885
R@1K: 0.8403
- name: bm25-segmented
- name: bm25-doc-segmented-default
display: "BM25 segmented (k1=0.9, b=0.4) "
display_html: "BM25 segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display_row: ""
Expand All @@ -65,7 +65,7 @@ conditions:
- AP@100: 0.3586
nDCG@10: 0.5281
R@1K: 0.7755
- name: bm25-segmented-doc2query
- name: bm25-d2q-t5-doc-segmented-default
display: "BM25 segmented (k1=0.9, b=0.4) with doc2query-T5 expansions"
display_html: "BM25 segmented with doc2query-T5 expansions (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display_row: ""
Expand All @@ -87,9 +87,9 @@ conditions:
- AP@100: 0.4150
nDCG@10: 0.5957
R@1K: 0.8046
- name: unicoil-noexp-cached
display: "uniCOIL (no expansions)"
display_html: "uniCOIL (no expansions)"
- name: unicoil-noexp.cached
display: "uniCOIL (no expansions) using cached queries"
display_html: "uniCOIL (no expansions) using cached queries"
display_row: ""
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v1-doc-segmented.unicoil-noexp -topics $topics -output $output -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter \# -selectMaxPassage.hits 1000
topics:
Expand All @@ -109,9 +109,9 @@ conditions:
- AP@100: 0.3698
nDCG@10: 0.5893
R@1K: 0.7623
- name: unicoil-cached
display: "uniCOIL (with doc2query-T5 expansions)"
display_html: "uniCOIL (with doc2query-T5 expansions)"
- name: unicoil.cached
display: "uniCOIL (with doc2query-T5 expansions) using cached queries"
display_html: "uniCOIL (with doc2query-T5 expansions) using cached queries"
display_row: ""
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v1-doc-segmented.unicoil -topics $topics -output $output -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter \# -selectMaxPassage.hits 1000
topics:
Expand All @@ -130,4 +130,26 @@ conditions:
scores:
- AP@100: 0.3882
nDCG@10: 0.6033
R@1K: 0.7869
R@1K: 0.7869
- name: unicoil.onnx
display: "uniCOIL (with doc2query-T5 expansions) using ONNX"
display_html: "uniCOIL (with doc2query-T5 expansions) using ONNX"
display_row: ""
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v1-doc-segmented.unicoil -topics $topics -output $output -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter \# -selectMaxPassage.hits 1000 -encoder UniCoil
topics:
- topic_key: msmarco-doc.dev
eval_key: msmarco-doc.dev
scores:
- MRR@100: 0.3531
- topic_key: dl19-doc
eval_key: dl19-doc
scores:
- AP@100: 0.2789
nDCG@10: 0.6396
R@1K: 0.6652
- topic_key: dl20
eval_key: dl20-doc
scores:
- AP@100: 0.3882
nDCG@10: 0.6033
R@1K: 0.7869
16 changes: 8 additions & 8 deletions src/main/resources/reproduce/msmarco-v1-passage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ conditions:
- MAP: 0.2856
nDCG@10: 0.4796
R@1K: 0.7863
- name: splade-pp-ed.cached_q
- name: splade-pp-ed.cached
display: "SPLADE++ EnsembleDistil (cached queries)"
display_html: "SPLADE++ EnsembleDistil (cached queries)"
display_row: ""
Expand Down Expand Up @@ -68,7 +68,7 @@ conditions:
- MAP: 0.4999
nDCG@10: 0.7197
R@1K: 0.8998
- name: splade-v3.cached_q
- name: splade-v3.cached
display: "SPLADE-v3 (cached queries)"
display_html: "SPLADE-v3 (cached queries)"
display_row: ""
Expand Down Expand Up @@ -114,7 +114,7 @@ conditions:
- MAP: 0.5402
nDCG@10: 0.7522
R@1K: 0.9039
- name: cosdpr-distil.hnsw.cached_q
- name: cosdpr-distil.hnsw.cached
display: "cosDPR-distil w/ HNSW fp32 (cached queries)"
display_html: "cosDPR-distil w/ HNSW fp32 (cached queries)"
display_row: ""
Expand Down Expand Up @@ -160,7 +160,7 @@ conditions:
- MAP: 0.4876
nDCG@10: 0.7025
R@1K: 0.8540
- name: cosdpr-distil.hnsw-int8.cached_q
- name: cosdpr-distil.hnsw-int8.cached
display: "cosDPR-distil w/ HNSW int8 (cached queries)"
display_html: "cosDPR-distil w/ HNSW int8 (cached queries)"
display_row: ""
Expand Down Expand Up @@ -206,7 +206,7 @@ conditions:
- MAP: 0.4871
nDCG@10: 0.6996
R@1K: 0.8538
- name: bge-base-en-v1.5.hnsw.cached_q
- name: bge-base-en-v1.5.hnsw.cached
display: "bge-base-en-v1.5 w/ HNSW fp32 (cached queries)"
display_html: "bge-base-en-v1.5 w/ HNSW fp32 (cached queries)"
display_row: ""
Expand Down Expand Up @@ -252,7 +252,7 @@ conditions:
- MAP: 0.4626
nDCG@10: 0.6768
R@1K: 0.8526
- name: bge-base-en-v1.5.hnsw-int8.cached_q
- name: bge-base-en-v1.5.hnsw-int8.cached
display: "bge-base-en-v1.5 w/ HNSW int8 (cached queries)"
display_html: "bge-base-en-v1.5 w/ HNSW int8 (cached queries)"
display_row: ""
Expand Down Expand Up @@ -298,7 +298,7 @@ conditions:
- MAP: 0.4596
nDCG@10: 0.6767
R@1K: 0.8468
- name: cohere-embed-english-v3.0.hnsw.cached_q
- name: cohere-embed-english-v3.0.hnsw.cached
display: "cohere-embed-english-v3.0 w/ HNSW fp32 (cached queries)"
display_html: "cohere-embed-english-v3.0 w/ HNSW fp32 (cached queries)"
display_row: ""
Expand All @@ -321,7 +321,7 @@ conditions:
- MAP: 0.5068
nDCG@10: 0.7245
R@1K: 0.8682
- name: cohere-embed-english-v3.0.hnsw-int8.cached_q
- name: cohere-embed-english-v3.0.hnsw-int8.cached
display: "cohere-embed-english-v3.0 w/ HNSW int8 (cached queries)"
display_html: "cohere-embed-english-v3.0 w/ HNSW int8 (cached queries)"
display_row: ""
Expand Down
60 changes: 49 additions & 11 deletions src/main/resources/reproduce/msmarco-v2-doc.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
conditions:
- name: bm25
- name: bm25-doc-default
display: "BM25 (k1=0.9, b=0.4)"
display_html: "BM25 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display_row: ""
Expand Down Expand Up @@ -37,7 +37,7 @@ conditions:
nDCG@10: 0.2946
R@100: 0.2422
R@1K: 0.5262
- name: bm25-doc2query
- name: bm25-d2q-t5-doc-default
display: "BM25 (k1=0.9, b=0.4) with doc2query-T5 expansions"
display_html: "BM25 with doc2query-T5 expansions (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display_row: ""
Expand Down Expand Up @@ -75,7 +75,7 @@ conditions:
nDCG@10: 0.3511
R@100: 0.2773
R@1K: 0.5549
- name: bm25-segmented
- name: bm25-doc-segmented-default
display: "BM25 segmented (k1=0.9, b=0.4) "
display_html: "BM25 segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display_row: ""
Expand Down Expand Up @@ -113,7 +113,7 @@ conditions:
nDCG@10: 0.3405
R@100: 0.2884
R@1K: 0.5662
- name: bm25-segmented-doc2query
- name: bm25-d2q-t5-doc-segmented-default
display: "BM25 segmented (k1=0.9, b=0.4) with doc2query-T5 expansions"
display_html: "BM25 segmented with doc2query-T5 expansions (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display_row: ""
Expand Down Expand Up @@ -151,9 +151,9 @@ conditions:
nDCG@10: 0.3612
R@100: 0.3078
R@1K: 0.5967
- name: unicoil-noexp-0shot-cached
display: "uniCOIL (noexp) zero-shot"
display_html: "uniCOIL (noexp) zero-shot"
- name: unicoil-noexp-0shot.cached
display: "uniCOIL (noexp) zero-shot using cached queries"
display_html: "uniCOIL (noexp) zero-shot using cached queries"
display_row: ""
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v2-doc-segmented.unicoil-noexp-0shot -topics $topics -output $output -parallelism 16 -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter \# -selectMaxPassage.hits 1000
topics:
Expand Down Expand Up @@ -189,9 +189,9 @@ conditions:
nDCG@10: 0.3898
R@100: 0.2949
R@1K: 0.5462
- name: unicoil-0shot-cached
display: "uniCOIL (with doc2query-T5) zero-shot"
display_html: "uniCOIL (with doc2query-T5) zero-shot"
- name: unicoil-0shot.cached
display: "uniCOIL (with doc2query-T5) zero-shot using cached queries"
display_html: "uniCOIL (with doc2query-T5) zero-shot using cached queries"
display_row: ""
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v2-doc-segmented.unicoil-0shot -topics $topics -output $output -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter \# -selectMaxPassage.hits 1000
topics:
Expand Down Expand Up @@ -226,4 +226,42 @@ conditions:
MRR@100: 0.7793
nDCG@10: 0.4149
R@100: 0.3101
R@1K: 0.5753
R@1K: 0.5753
- name: unicoil-0shot.onnx
display: "uniCOIL (with doc2query-T5) zero-shot using ONNX"
display_html: "uniCOIL (with doc2query-T5) zero-shot using ONNX"
display_row: ""
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v2-doc-segmented.unicoil-0shot -topics $topics -output $output -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter \# -selectMaxPassage.hits 1000 -encoder UniCoil
topics:
- topic_key: msmarco-v2-doc.dev
eval_key: msmarco-v2-doc.dev
scores:
- MRR@100: 0.2419
- topic_key: msmarco-v2-doc.dev2
eval_key: msmarco-v2-doc.dev2
scores:
- MRR@100: 0.2445
- topic_key: dl21
eval_key: dl21-doc
scores:
- MAP@100: 0.2718
MRR@100: 0.9684
nDCG@10: 0.6783
R@100: 0.3700
R@1K: 0.7069
- topic_key: dl22
eval_key: dl22-doc
scores:
- MAP@100: 0.1400
MRR@100: 0.8170
nDCG@10: 0.4451
R@100: 0.2656
R@1K: 0.5235
- topic_key: dl23
eval_key: dl23-doc
scores:
- MAP@100: 0.1554
MRR@100: 0.7793
nDCG@10: 0.4150
R@100: 0.3101
R@1K: 0.5753
12 changes: 6 additions & 6 deletions src/main/resources/reproduce/msmarco-v2-passage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ conditions:
nDCG@10: 0.2627
R@100: 0.2329
R@1K: 0.4346
- name: unicoil-noexp-0shot
display: "Unicoil (no expansion, 0-shot)"
display_html: "Unicoil (no expansion, 0-shot)"
- name: unicoil-noexp-0shot.cached
display: "Unicoil (no expansion) zero-shot using cached queries"
display_html: "Unicoil (no expansion) zero-shot using cached queries"
display_row: ""
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v2-passage.unicoil-noexp-0shot -topics $topics -output $output -hits 1000 -impact -pretokenized
topics:
Expand Down Expand Up @@ -75,9 +75,9 @@ conditions:
nDCG@10: 0.3262
R@100: 0.2761
R@1K: 0.5070
- name: unicoil-0shot
display: "Unicoil (with doc2query-T5 expansions) zero shot"
display_html: "Unicoil (with doc2query-T5 expansions) zero shot"
- name: unicoil-0shot.cached
display: "Unicoil (with doc2query-T5 expansions) zero-shot using cached queries"
display_html: "Unicoil (with doc2query-T5 expansions) zero-shot using cached queries"
display_row: ""
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v2-passage.unicoil-0shot -topics $topics -output $output -hits 1000 -impact -pretokenized
topics:
Expand Down
4 changes: 2 additions & 2 deletions src/main/resources/reproduce/msmarco-v2.1-doc-segmented.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ conditions:
display: "SPLADE-v3 (cached queries)"
display_html: "SPLADE-v3 (cached queries)"
display_row: ""
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v2.1-doc-segmented-splade-v3 -topics $topics -output $output -impact -pretokenized -removeQuery -hits 1000
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v2.1-doc-segmented.splade-v3 -topics $topics -output $output -impact -pretokenized -removeQuery -hits 1000
topics:
- topic_key: rag24.test.splade-v3
eval_key: rag24.test-umbrela-all
Expand All @@ -399,7 +399,7 @@ conditions:
display: "SPLADE-v3 (ONNX)"
display_html: "SPLADE-v3 (ONNX)"
display_row: ""
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v2.1-doc-segmented-splade-v3 -topics $topics -output $output -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3
command: java -cp $fatjar --add-modules jdk.incubator.vector io.anserini.search.SearchCollection -threads $threads -index msmarco-v2.1-doc-segmented.splade-v3 -topics $topics -output $output -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3
topics:
- topic_key: rag24.test
eval_key: rag24.test-umbrela-all
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,13 @@ This means that the page is updated with every (successful) build.
for (IndexType type : grouped.keySet()) {
String typeHeading = "";
if (type == IndexType.SPARSE_INVERTED) {
typeHeading = "Lucene Standard Inverted Indexes";
typeHeading = "Lucene Inverted Indexes";
} else if (type == IndexType.SPARSE_IMPACT) {
typeHeading = "Lucene Impact Indexes";
} else if (type == IndexType.DENSE_HNSW) {
typeHeading = "Lucene HNSW Indexes";
} else if (type == IndexType.DENSE_FLAT) {
typeHeading = "Lucene Flat Indexes";
typeHeading = "Lucene Flat Vector Indexes";
}

md.append("### ").append(typeHeading).append("\n");
Expand Down
14 changes: 7 additions & 7 deletions src/test/java/io/anserini/doc/GenerateReproductionDocsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,18 @@ public class GenerateReproductionDocsTest {
public final static String COLLECTION = "msmarco-v1-passage";
public final static String[] MODELS = {
"bm25",
"splade-pp-ed.cached_q",
"splade-pp-ed.cached",
"splade-pp-ed.onnx",
"cosdpr-distil.hnsw.cached_q",
"cosdpr-distil.hnsw.cached",
"cosdpr-distil.hnsw.onnx",
"cosdpr-distil.hnsw-int8.cached_q",
"cosdpr-distil.hnsw-int8.cached",
"cosdpr-distil.hnsw-int8.onnx",
"bge-base-en-v1.5.hnsw.cached_q",
"bge-base-en-v1.5.hnsw.cached",
"bge-base-en-v1.5.hnsw.onnx",
"bge-base-en-v1.5.hnsw-int8.cached_q",
"bge-base-en-v1.5.hnsw-int8.cached",
"bge-base-en-v1.5.hnsw-int8.onnx",
"cohere-embed-english-v3.0.hnsw.cached_q",
"cohere-embed-english-v3.0.hnsw-int8.cached_q"
"cohere-embed-english-v3.0.hnsw.cached",
"cohere-embed-english-v3.0.hnsw-int8.cached"
};

public static String findMsMarcoTableTopicSetKeyV1(String topicKey) {
Expand Down