yentinglin
/

Llama-3-Taiwan-70B-Instruct

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

yentinglin commited on Jun 7

Commit

a2ab71e

•

1 Parent(s): 2413fcb

Update README.md

Files changed (1) hide show

README.md +3 -3

README.md CHANGED Viewed

@@ -214,7 +214,7 @@ class EosListStoppingCriteria(StoppingCriteria):
         return self.eos_sequence in last_ids
 # Initialize the model with automatic device mapping
-llm = pipeline("text-generation", model="yentinglin/Llama-3-Taiwan-70B-Instruct-rc1", device_map="auto")
 tokenizer = llm.tokenizer
 # Define a conversation example
@@ -258,7 +258,7 @@ docker run \
   -p "${PORT}:8000" \
   --ipc=host \
   vllm/vllm-openai:v0.4.0.post1 \
-  --model "yentinglin/Llama-3-Taiwan-70B-Instruct-rc1" \
   -tp "${NUM_GPUS}"
 ```
@@ -277,7 +277,7 @@ client = OpenAI(
 )
 chat_response = client.chat.completions.create(
-    model="yentinglin/Llama-3-Taiwan-70B-Instruct-rc1",
     messages=[
         {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": "Tell me a joke."},

         return self.eos_sequence in last_ids
 # Initialize the model with automatic device mapping
+llm = pipeline("text-generation", model="yentinglin/Llama-3-Taiwan-70B-Instruct", device_map="auto")
 tokenizer = llm.tokenizer
 # Define a conversation example
   -p "${PORT}:8000" \
   --ipc=host \
   vllm/vllm-openai:v0.4.0.post1 \
+  --model "yentinglin/Llama-3-Taiwan-70B-Instruct" \
   -tp "${NUM_GPUS}"
 ```
 )
 chat_response = client.chat.completions.create(
+    model="yentinglin/Llama-3-Taiwan-70B-Instruct",
     messages=[
         {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": "Tell me a joke."},