File tree 4 files changed +24
-7
lines changed
4 files changed +24
-7
lines changed Original file line number Diff line number Diff line change
1
+ # [ Code Llama] ( https://huggingface.co/collections/meta-llama/code-llama-family-661da32d0a9d678b6f55b933 )
2
+
3
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
4
+ | :----------:| :----------:| :----------:| :----------:|
5
+ | [ ** ` 7b-hf ` ** ] ( https://huggingface.co/meta-llama/CodeLlama-7b-hf ) | 1x a40 | - tokens/s | - tokens/s |
6
+ | [ ` 7b-Instruct-hf ` ] ( https://huggingface.co/meta-llama/CodeLlama-7b-Instruct-hf ) | 1x a40 | - tokens/s | - tokens/s |
7
+ | [ ` 13b-hf ` ] ( https://huggingface.co/meta-llama/CodeLlama-13b-hf ) | 1x a40 | - tokens/s | - tokens/s |
8
+ | [ ` 13b-Instruct-hf ` ] ( https://huggingface.co/meta-llama/CodeLlama-13b-Instruct-hf ) | 1x a40 | - tokens/s | - tokens/s |
9
+ | [ ` 34b-hf ` ] ( https://huggingface.co/meta-llama/CodeLlama-34b-hf ) | 2x a40 | - tokens/s | - tokens/s |
10
+ | [ ` 34b-Instruct-hf ` ] ( https://huggingface.co/meta-llama/CodeLlama-34b-Instruct-hf ) | 2x a40 | - tokens/s | - tokens/s |
11
+ | [ ` 70b-hf ` ] ( https://huggingface.co/meta-llama/CodeLlama-70b-hf ) | 4x a40 | - tokens/s | - tokens/s |
12
+ | [ ` 70b-Instruct-hf ` ] ( https://huggingface.co/meta-llama/CodeLlama-70b-Instruct-hf ) | 4x a40 | - tokens/s | - tokens/s |
Original file line number Diff line number Diff line change
1
+ export MODEL_NAME=" CodeLlama"
2
+ export MODEL_VARIANT=" 7b-hf"
3
+ export NUM_NODES=1
4
+ export NUM_GPUS=1
5
+ export VLLM_MAX_LOGPROBS=32000
Original file line number Diff line number Diff line change 2
2
3
3
| Variant | Suggested resource allocation |
4
4
| :----------:| :----------:|
5
- | [ ** ` 7b ` ** ] ( https://huggingface.co/meta-llama/Llama-2-7b-hf ) | 1x a40 |
6
- | [ ` 7b-chat ` ] ( https://huggingface.co/meta-llama/Llama-2-7b-chat-hf ) | 1x a40 |
7
- | [ ` 13b ` ] ( https://huggingface.co/meta-llama/Llama-2-13b-hf ) | 1x a40 |
8
- | [ ` 13b-chat ` ] ( https://huggingface.co/meta-llama/Llama-2-13b-chat-hf ) | 1x a40 |
9
- | [ ` 70b ` ] ( https://huggingface.co/meta-llama/Llama-2-70b-hf ) | 4x a40 |
10
- | [ ` 70b-chat ` ] ( https://huggingface.co/meta-llama/Llama-2-70b-chat-hf ) | 4x a40 |
5
+ | [ ** ` 7b-hf ` ** ] ( https://huggingface.co/meta-llama/Llama-2-7b-hf ) | 1x a40 |
6
+ | [ ` 7b-chat-hf ` ] ( https://huggingface.co/meta-llama/Llama-2-7b-chat-hf ) | 1x a40 |
7
+ | [ ` 13b-hf ` ] ( https://huggingface.co/meta-llama/Llama-2-13b-hf ) | 1x a40 |
8
+ | [ ` 13b-chat-hf ` ] ( https://huggingface.co/meta-llama/Llama-2-13b-chat-hf ) | 1x a40 |
9
+ | [ ` 70b-hf ` ] ( https://huggingface.co/meta-llama/Llama-2-70b-hf ) | 4x a40 |
10
+ | [ ` 70b-chat-hf ` ] ( https://huggingface.co/meta-llama/Llama-2-70b-chat-hf ) | 4x a40 |
Original file line number Diff line number Diff line change 1
1
[tool .poetry ]
2
2
name = " vector-inference"
3
- version = " 0.2.0 "
3
+ version = " 0.2.1 "
4
4
description = " Efficient LLM inference on Slurm clusters using vLLM."
5
5
authors = [" XkunW <marshall.wang@vectorinstitute.ai>" ]
6
6
license = " MIT license"
You can’t perform that action at this time.
0 commit comments