Wi-zz commited on
Commit
4bfda25
β€’
1 Parent(s): 7033d9d

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -28,7 +28,7 @@ from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedToke
28
  from typing import List, Union
29
 
30
  # Constants
31
- CLIP_PATH = "OpenGVLab/InternViT-300M-448px"
32
  VLM_PROMPT = "A descriptive caption for this image:\n"
33
  MODEL_PATH = "unsloth/Meta-Llama-3.1-8B-bnb-4bit"
34
  CHECKPOINT_PATH = Path("wpkklhc6")
@@ -49,8 +49,8 @@ class ImageAdapter(nn.Module):
49
 
50
  def load_models():
51
  print("Loading CLIP πŸ“Ž")
52
- clip_processor = AutoProcessor.from_pretrained(CLIP_PATH, trust_remote_code=True)
53
- clip_model = AutoModel.from_pretrained(CLIP_PATH, trust_remote_code=True).vision_model.eval().requires_grad_(False).to("cuda")
54
 
55
  print("Loading tokenizer πŸͺ™")
56
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False)
@@ -66,7 +66,6 @@ def load_models():
66
 
67
  return clip_processor, clip_model, tokenizer, text_model, image_adapter
68
 
69
-
70
  @torch.no_grad()
71
  def stream_chat(input_images: List[Image.Image], batch_size: int, pbar: tqdm, models: tuple) -> List[str]:
72
  clip_processor, clip_model, tokenizer, text_model, image_adapter = models
 
28
  from typing import List, Union
29
 
30
  # Constants
31
+ CLIP_PATH = "google/siglip-so400m-patch14-384"
32
  VLM_PROMPT = "A descriptive caption for this image:\n"
33
  MODEL_PATH = "unsloth/Meta-Llama-3.1-8B-bnb-4bit"
34
  CHECKPOINT_PATH = Path("wpkklhc6")
 
49
 
50
  def load_models():
51
  print("Loading CLIP πŸ“Ž")
52
+ clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
53
+ clip_model = AutoModel.from_pretrained(CLIP_PATH).vision_model.eval().requires_grad_(False).to("cuda")
54
 
55
  print("Loading tokenizer πŸͺ™")
56
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False)
 
66
 
67
  return clip_processor, clip_model, tokenizer, text_model, image_adapter
68
 
 
69
  @torch.no_grad()
70
  def stream_chat(input_images: List[Image.Image], batch_size: int, pbar: tqdm, models: tuple) -> List[str]:
71
  clip_processor, clip_model, tokenizer, text_model, image_adapter = models