Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5029545

Browse files
committed
chore
1 parent 1954d1e commit 5029545

File tree

65 files changed

+1705
-189
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1705
-189
lines changed

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,38 @@
33
poetry install
44
<!-- poetry run pip install --upgrade --force-reinstall --no-deps "apache-beam[gcp]" "multiprocess==0.70.14" -->
55
poetry run pip install --upgrade --force-reinstall --no-deps "apache-beam[gcp]" "multiprocess==0.70.14" "dill==0.3.1.1"
6+
7+
8+
9+
| base models | batch size | lr | JSICK (val) | JSICK (test) | JSTS (train) | JSTS (val) |
10+
| ------------------------------------------------------------------------------------------------------------------------- | :--------: | :---: | :---------: | :----------: | :----------: | :--------: |
11+
| [cl-tohoku/bert-base-japanese-v2](https://huggingface.co/cl-tohoku/bert-base-japanese-v2) | | | | | | |
12+
| [cl-tohoku/bert-base-japanese-char-v2](https://huggingface.co/cl-tohoku/bert-base-japanese-char-v2) | | | | | | |
13+
| [cl-tohoku/bert-base-japanese](https://huggingface.co/cl-tohoku/bert-base-japanese) | | | | | | |
14+
| [cl-tohoku/bert-base-japanese-whole-word-masking](https://huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking) | | | | | | |
15+
| [cl-tohoku/bert-base-japanese-char](https://huggingface.co/cl-tohoku/bert-base-japanese-char) | | | | | | |
16+
| [ku-nlp/roberta-base-japanese-char-wwm](https://huggingface.co/ku-nlp/roberta-base-japanese-char-wwm) | | | | | | |
17+
| [studio-ousia/luke-japanese-base-lite](https://huggingface.co/studio-ousia/luke-japanese-base-lite) | | | | | | |
18+
| | | | | | | |
19+
| [ku-nlp/deberta-v2-base-japanese](https://huggingface.co/ku-nlp/deberta-v2-base-japanese) | | | | | | |
20+
| [nlp-waseda/roberta-base-japanese](https://huggingface.co/nlp-waseda/roberta-base-japanese) | | | | | | |
21+
| [megagonlabs/roberta-long-japanese](https://huggingface.co/megagonlabs/roberta-long-japanese) | | | | | | |
22+
| | | | | | | |
23+
| [bert-base-multilingual-cased](https://huggingface.co/bert-base-multilingual-cased) | | | | | | |
24+
| [xlm-roberta-base](https://huggingface.co/xlm-roberta-base) | | | | | | |
25+
| [microsoft/mdeberta-v3-base](https://huggingface.co/microsoft/mdeberta-v3-base) | | | | | | |
26+
| [studio-ousia/mluke-base-lite](https://huggingface.co/studio-ousia/mluke-base-lite) | | | | | | |
27+
28+
29+
30+
| large models | batch size | lr | JSICK (val) | JSICK (test) | JSTS (train) | JSTS (val) |
31+
| ------------------------------------------------------------------------------------------------------- | :--------: | :---: | :---------: | :----------: | :----------: | :--------: |
32+
| [cl-tohoku/bert-large-japanese](https://huggingface.co/cl-tohoku/bert-large-japanese) | | | | | | |
33+
| [ku-nlp/roberta-large-japanese-char-wwm](https://huggingface.co/ku-nlp/roberta-large-japanese-char-wwm) | | | | | | |
34+
| [studio-ousia/luke-japanese-large-lite](https://huggingface.co/studio-ousia/luke-japanese-large-lite) | | | | | | |
35+
| | | | | | | |
36+
| [nlp-waseda/roberta-large-japanese](https://huggingface.co/nlp-waseda/roberta-large-japanese) | | | | | | |
37+
| [ku-nlp/deberta-v2-large-japanese](https://huggingface.co/ku-nlp/deberta-v2-large-japanese) | | | | | | |
38+
| | | | | | | |
39+
| [xlm-roberta-large](https://huggingface.co/xlm-roberta-large) | | | | | | |
40+
| [studio-ousia/mluke-large-lite](https://huggingface.co/studio-ousia/mluke-large-lite) | | | | | | |

scripts/10/0.sh

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,25 @@
11
device="cuda:0"
2-
model_name="cl-tohoku/bert-large-japanese"
32

43
for i in 0 1 2; do
5-
for batch_size in 32 64 128 256 512; do
4+
for model_name in studio-ousia/luke-japanese-large-lite studio-ousia/luke-japanese-base-lite; do
65
for lr in 1e-5 3e-5 5e-5; do
7-
poetry run python src/train_unsup.py \
8-
--dataset_name wiki40b \
9-
--model_name $model_name \
10-
--batch_size $batch_size \
11-
--lr $lr \
12-
--device $device
6+
for batch_size in 512; do
7+
poetry run python src/train_sup.py \
8+
--dataset_name jsnli+nu-snli \
9+
--model_name $model_name \
10+
--batch_size $batch_size \
11+
--lr $lr \
12+
--gradient_checkpointing \
13+
--device $device
14+
done
15+
for batch_size in 256 128 64; do
16+
poetry run python src/train_sup.py \
17+
--dataset_name jsnli+nu-snli \
18+
--model_name $model_name \
19+
--batch_size $batch_size \
20+
--lr $lr \
21+
--device $device
22+
done
1323
done
1424
done
1525
done

scripts/10/1.sh

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,25 @@
11
device="cuda:1"
2-
model_name="studio-ousia/luke-japanese-large-lite"
32

43
for i in 0 1 2; do
5-
for batch_size in 32 64 128 256 512; do
4+
for model_name in cl-tohoku/bert-large-japanese ku-nlp/roberta-large-japanese-char-wwm; do
65
for lr in 1e-5 3e-5 5e-5; do
7-
poetry run python src/train_unsup.py \
8-
--dataset_name wiki40b \
9-
--model_name $model_name \
10-
--batch_size $batch_size \
11-
--lr $lr \
12-
--device $device
6+
for batch_size in 512; do
7+
poetry run python src/train_sup.py \
8+
--dataset_name jsnli+nu-snli \
9+
--model_name $model_name \
10+
--batch_size $batch_size \
11+
--lr $lr \
12+
--gradient_checkpointing \
13+
--device $device
14+
done
15+
for batch_size in 256 128 64; do
16+
poetry run python src/train_sup.py \
17+
--dataset_name jsnli+nu-snli \
18+
--model_name $model_name \
19+
--batch_size $batch_size \
20+
--lr $lr \
21+
--device $device
22+
done
1323
done
1424
done
1525
done

scripts/10/2.sh

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,27 @@
11
device="cuda:2"
2-
model_name="cl-tohoku/bert-base-japanese-v2"
32

43
for i in 0 1 2; do
5-
for batch_size in 32 64 128 256 512; do
4+
for model_name in nlp-waseda/roberta-large-japanese ku-nlp/deberta-v2-large-japanese; do
65
for lr in 1e-5 3e-5 5e-5; do
7-
poetry run python src/train_unsup.py \
8-
--dataset_name wiki40b \
9-
--model_name $model_name \
10-
--batch_size $batch_size \
11-
--lr $lr \
12-
--device $device
6+
for batch_size in 512; do
7+
poetry run python src/train_sup.py \
8+
--dataset_name jsnli+nu-snli \
9+
--model_name $model_name \
10+
--batch_size $batch_size \
11+
--lr $lr \
12+
--use_jumanpp \
13+
--gradient_checkpointing \
14+
--device $device
15+
done
16+
for batch_size in 256 128 64; do
17+
poetry run python src/train_sup.py \
18+
--dataset_name jsnli+nu-snli \
19+
--model_name $model_name \
20+
--batch_size $batch_size \
21+
--lr $lr \
22+
--use_jumanpp \
23+
--device $device
24+
done
1325
done
1426
done
1527
done

scripts/10/3.sh

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,25 @@
11
device="cuda:3"
2-
model_name="ku-nlp/deberta-v2-large-japanese"
32

43
for i in 0 1 2; do
5-
for batch_size in 32 64 128 256 512; do
4+
for model_name in xlm-roberta-large studio-ousia/mluke-large-lite; do
65
for lr in 1e-5 3e-5 5e-5; do
7-
poetry run python src/train_unsup.py \
8-
--dataset_name wiki40b \
9-
--model_name $model_name \
10-
--batch_size $batch_size \
11-
--lr $lr \
12-
--device $device
6+
for batch_size in 512; do
7+
poetry run python src/train_sup.py \
8+
--dataset_name jsnli+nu-snli \
9+
--model_name $model_name \
10+
--batch_size $batch_size \
11+
--lr $lr \
12+
--gradient_checkpointing \
13+
--device $device
14+
done
15+
for batch_size in 256 128 64; do
16+
poetry run python src/train_sup.py \
17+
--dataset_name jsnli+nu-snli \
18+
--model_name $model_name \
19+
--batch_size $batch_size \
20+
--lr $lr \
21+
--device $device
22+
done
1323
done
1424
done
1525
done

scripts/10/prev/2023-03-10/0.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
device="cuda:0"
2+
3+
for i in 0 1 2; do
4+
for model_name in cl-tohoku/bert-base-japanese-v2 cl-tohoku/bert-base-japanese-char-v2 cl-tohoku/bert-base-japanese cl-tohoku/bert-base-japanese-whole-word-masking; do
5+
for batch_size in 64 128 256 512; do
6+
for lr in 1e-5 3e-5 5e-5; do
7+
poetry run python src/train_sup.py \
8+
--dataset_name nu-snli \
9+
--model_name $model_name \
10+
--batch_size $batch_size \
11+
--lr $lr \
12+
--device $device
13+
14+
poetry run python src/train_sup.py \
15+
--dataset_name nu-mnli \
16+
--model_name $model_name \
17+
--batch_size $batch_size \
18+
--lr $lr \
19+
--device $device
20+
21+
poetry run python src/train_sup.py \
22+
--dataset_name nu-snli+mnli \
23+
--model_name $model_name \
24+
--batch_size $batch_size \
25+
--lr $lr \
26+
--device $device
27+
done
28+
done
29+
done
30+
done

scripts/10/prev/2023-03-10/1.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
device="cuda:1"
2+
3+
for i in 0 1 2; do
4+
for model_name in cl-tohoku/bert-large-japanese ku-nlp/roberta-large-japanese-char-wwm; do
5+
for batch_size in 64 128 256 512; do
6+
for lr in 1e-5 3e-5 5e-5; do
7+
poetry run python src/train_sup.py \
8+
--dataset_name nu-snli \
9+
--model_name $model_name \
10+
--batch_size $batch_size \
11+
--lr $lr \
12+
--device $device
13+
14+
poetry run python src/train_sup.py \
15+
--dataset_name nu-mnli \
16+
--model_name $model_name \
17+
--batch_size $batch_size \
18+
--lr $lr \
19+
--device $device
20+
21+
poetry run python src/train_sup.py \
22+
--dataset_name nu-snli+mnli \
23+
--model_name $model_name \
24+
--batch_size $batch_size \
25+
--lr $lr \
26+
--device $device
27+
done
28+
done
29+
done
30+
done

scripts/10/prev/2023-03-10/2.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
device="cuda:2"
2+
3+
for i in 0 1 2; do
4+
for model_name in nlp-waseda/roberta-large-japanese ku-nlp/deberta-v2-large-japanese; do
5+
for batch_size in 64 128 256 512; do
6+
for lr in 1e-5 3e-5 5e-5; do
7+
poetry run python src/train_sup.py \
8+
--dataset_name nu-snli \
9+
--model_name $model_name \
10+
--batch_size $batch_size \
11+
--lr $lr \
12+
--use_jumanpp \
13+
--device $device
14+
15+
poetry run python src/train_sup.py \
16+
--dataset_name nu-mnli \
17+
--model_name $model_name \
18+
--batch_size $batch_size \
19+
--lr $lr \
20+
--use_jumanpp \
21+
--device $device
22+
23+
poetry run python src/train_sup.py \
24+
--dataset_name nu-snli+mnli \
25+
--model_name $model_name \
26+
--batch_size $batch_size \
27+
--lr $lr \
28+
--use_jumanpp \
29+
--device $device
30+
done
31+
done
32+
done
33+
done

scripts/10/prev/2023-03-10/3.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
device="cuda:3"
2+
3+
for i in 0 1 2; do
4+
for model_name in xlm-roberta-large studio-ousia/mluke-large-lite; do
5+
for batch_size in 64 128 256 512; do
6+
for lr in 1e-5 3e-5 5e-5; do
7+
poetry run python src/train_sup.py \
8+
--dataset_name nu-snli \
9+
--model_name $model_name \
10+
--batch_size $batch_size \
11+
--lr $lr \
12+
--device $device
13+
14+
poetry run python src/train_sup.py \
15+
--dataset_name nu-mnli \
16+
--model_name $model_name \
17+
--batch_size $batch_size \
18+
--lr $lr \
19+
--device $device
20+
21+
poetry run python src/train_sup.py \
22+
--dataset_name nu-snli+mnli \
23+
--model_name $model_name \
24+
--batch_size $batch_size \
25+
--lr $lr \
26+
--device $device
27+
done
28+
done
29+
done
30+
done

scripts/10/prev/2023-03-11/0.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
device="cuda:0"
2+
3+
for batch_size in 1024; do
4+
for i in 0 1; do
5+
for model_name in cl-tohoku/bert-large-japanese ku-nlp/roberta-large-japanese-char-wwm; do
6+
for lr in 1e-5 3e-5 5e-5; do
7+
poetry run python src/train_sup.py \
8+
--dataset_name nu-snli \
9+
--model_name $model_name \
10+
--batch_size $batch_size \
11+
--lr $lr \
12+
--gradient_checkpointing \
13+
--device $device
14+
15+
poetry run python src/train_sup.py \
16+
--dataset_name nu-snli+mnli \
17+
--model_name $model_name \
18+
--batch_size $batch_size \
19+
--lr $lr \
20+
--gradient_checkpointing \
21+
--device $device
22+
done
23+
done
24+
done
25+
done

0 commit comments

Comments
 (0)