Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ef2cd69

Browse files
committed
scripts : add pod-llama.sh
1 parent 6c32d8c commit ef2cd69

File tree

1 file changed

+213
-0
lines changed

1 file changed

+213
-0
lines changed

scripts/pod-llama.sh

+213
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#!/bin/bash
2+
#
3+
# Use this script only on fresh pods (runpod.io)!
4+
# Otherwise, it can break your environment!
5+
#
6+
7+
if [ -z "$1" ]; then
8+
echo "Usage: $0 <data>"
9+
echo " 0: no models"
10+
echo " 1: tinyllama-1b"
11+
echo " 2: codellama-7b"
12+
echo " 3: codellama-13b"
13+
echo " 4: codellama-34b"
14+
echo " 5: codellama-7b-instruct"
15+
echo " 6: codellama-13b-instruct"
16+
echo " 7: codellama-34b-instruct"
17+
18+
exit 1
19+
fi
20+
21+
set -x
22+
23+
# setup deps
24+
apt-get update
25+
apt-get install -y git-lfs cmake cmake-curses-gui vim ruby
26+
git-lfs install
27+
28+
if [ ! -d "/workspace" ]; then
29+
ln -sfn $(pwd) /workspace
30+
fi
31+
32+
# download data
33+
cd /workspace
34+
35+
# this is useful to git clone repos without doubling the disk size due to .git
36+
git clone https://github.com/iboB/git-lfs-download
37+
ln -sfn /workspace/git-lfs-download/git-lfs-download /usr/local/bin/git-lfs-download
38+
39+
# llama.cpp
40+
cd /workspace
41+
git clone https://github.com/ggerganov/llama.cpp
42+
43+
cd llama.cpp
44+
45+
LLAMA_CUBLAS=1 make -j
46+
47+
ln -sfn /workspace/TinyLlama-1.1B-Chat-v0.3 ./models/tinyllama-1b
48+
ln -sfn /workspace/CodeLlama-7b-hf ./models/codellama-7b
49+
ln -sfn /workspace/CodeLlama-13b-hf ./models/codellama-13b
50+
ln -sfn /workspace/CodeLlama-34b-hf ./models/codellama-34b
51+
ln -sfn /workspace/CodeLlama-7b-Instruct-hf ./models/codellama-7b-instruct
52+
ln -sfn /workspace/CodeLlama-13b-Instruct-hf ./models/codellama-13b-instruct
53+
ln -sfn /workspace/CodeLlama-34b-Instruct-hf ./models/codellama-34b-instruct
54+
55+
pip install -r requirements.txt
56+
57+
# cmake
58+
cd /workspace/llama.cpp
59+
60+
mkdir build-cublas
61+
cd build-cublas
62+
63+
cmake -DLLAMA_CUBLAS=1 ../
64+
make -j
65+
66+
if [ "$1" -eq "0" ]; then
67+
exit 0
68+
fi
69+
70+
# more models
71+
if [ "$1" -eq "1" ]; then
72+
cd /workspace
73+
74+
git-lfs-download https://huggingface.co/PY007/TinyLlama-1.1B-Chat-v0.3
75+
76+
cd /workspace/llama.cpp
77+
78+
python3 convert.py ./models/tinyllama-1b --outfile ./models/tinyllama-1b/ggml-model-f16.gguf --outtype f16
79+
80+
./quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_0.gguf q4_0
81+
./quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_k.gguf q4_k
82+
./quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q8_0.gguf q8_0
83+
fi
84+
85+
if [ "$1" -eq "2" ]; then
86+
cd /workspace
87+
88+
git-lfs-download https://huggingface.co/codellama/CodeLlama-7b-hf --without *safetensors*
89+
rm -v ./CodeLlama-7b-hf/*safetensors*
90+
91+
cd /workspace/llama.cpp
92+
93+
python3 convert.py ./models/codellama-7b --outfile ./models/codellama-7b/ggml-model-f16.gguf --outtype f16
94+
95+
./quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_0.gguf q4_0
96+
./quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_k.gguf q4_k
97+
./quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q8_0.gguf q8_0
98+
fi
99+
100+
if [ "$1" -eq "3" ]; then
101+
cd /workspace
102+
103+
git-lfs-download https://huggingface.co/codellama/CodeLlama-13b-hf --without *safetensors*
104+
rm -v ./CodeLlama-13b-hf/*safetensors*
105+
106+
cd /workspace/llama.cpp
107+
108+
python3 convert.py ./models/codellama-13b --outfile ./models/codellama-13b/ggml-model-f16.gguf --outtype f16
109+
110+
./quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_0.gguf q4_0
111+
./quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_k.gguf q4_k
112+
./quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q8_0.gguf q8_0
113+
fi
114+
115+
if [ "$1" -eq "4" ]; then
116+
cd /workspace
117+
118+
git-lfs-download https://huggingface.co/codellama/CodeLlama-34b-hf --without *safetensors*
119+
rm -v ./CodeLlama-34b-hf/*safetensors*
120+
121+
cd /workspace/llama.cpp
122+
123+
python3 convert.py ./models/codellama-34b --outfile ./models/codellama-34b/ggml-model-f16.gguf --outtype f16
124+
125+
./quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_0.gguf q4_0
126+
./quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_k.gguf q4_k
127+
./quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q8_0.gguf q8_0
128+
fi
129+
130+
if [ "$1" -eq "5" ]; then
131+
cd /workspace
132+
133+
git-lfs-download https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf --without *safetensors*
134+
rm -v ./CodeLlama-7b-Instruct-hf/*safetensors*
135+
136+
cd /workspace/llama.cpp
137+
138+
python3 convert.py ./models/codellama-7b-instruct --outfile ./models/codellama-7b-instruct/ggml-model-f16.gguf --outtype f16
139+
140+
./quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_0.gguf q4_0
141+
./quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_k.gguf q4_k
142+
./quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q8_0.gguf q8_0
143+
fi
144+
145+
if [ "$1" -eq "6" ]; then
146+
cd /workspace
147+
148+
git-lfs-download https://huggingface.co/codellama/CodeLlama-13b-Instruct-hf --without *safetensors*
149+
rm -v ./CodeLlama-13b-Instruct-hf/*safetensors*
150+
151+
cd /workspace/llama.cpp
152+
153+
python3 convert.py ./models/codellama-13b-instruct --outfile ./models/codellama-13b-instruct/ggml-model-f16.gguf --outtype f16
154+
155+
./quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_0.gguf q4_0
156+
./quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_k.gguf q4_k
157+
./quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q8_0.gguf q8_0
158+
fi
159+
160+
if [ "$1" -eq "7" ]; then
161+
cd /workspace
162+
163+
git-lfs-download https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf --without *safetensors*
164+
rm -v ./CodeLlama-34b-Instruct-hf/*safetensors*
165+
166+
cd /workspace/llama.cpp
167+
168+
python3 convert.py ./models/codellama-34b-instruct --outfile ./models/codellama-34b-instruct/ggml-model-f16.gguf --outtype f16
169+
170+
./quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_0.gguf q4_0
171+
./quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_k.gguf q4_k
172+
./quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q8_0.gguf q8_0
173+
fi
174+
175+
if [ "$1" -eq "1" ]; then
176+
# perf + perplexity
177+
cd /workspace/llama.cpp/build-cublas
178+
179+
make -j && ../scripts/run-all-perf.sh tinyllama-1b "f16" "-ngl 99 -t 1 -p 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,32,64,128,256,512,1024,2048 -n 128"
180+
181+
../scripts/get-wikitext-2.sh
182+
unzip wikitext-2-raw-v1.zip
183+
184+
make -j && ./bin/perplexity -m ../models/tinyllama-1b/ggml-model-f16.gguf -f ./wikitext-2-raw/wiki.test.raw -ngl 100 --chunks 32
185+
186+
# batched
187+
cd /workspace/llama.cpp
188+
189+
LLAMA_CUBLAS=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
190+
191+
# batched-bench
192+
cd /workspace/llama.cpp
193+
194+
LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
195+
196+
# parallel
197+
cd /workspace/llama.cpp
198+
199+
LLAMA_CUBLAS=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
200+
201+
fi
202+
203+
# speculative
204+
#if [ "$1" -eq "7" ]; then
205+
# cd /workspace/llama.cpp
206+
#
207+
# LLAMA_CUBLAS=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
208+
#fi
209+
210+
# more benches
211+
#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
212+
#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
213+

0 commit comments

Comments
 (0)