update to chat-v1.1

Files changed (10) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "cogvlm-chat",
   "architectures": [
     "CogVLMForCausalLM"
   ],

 {
+  "_name_or_path": "cogvlm-chat-v1.1",
   "architectures": [
     "CogVLMForCausalLM"
   ],

model-00001-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33c8a1fac87166147265084f0105715e2c7e8200d9114c4354fd32549b0b7a3e
 size 4938885184

 version https://git-lfs.github.com/spec/v1
+oid sha256:e29f6ec471ca55789ab14947b527729b9c30313ceb1e7726590b85f9f6406cca
 size 4938885184

model-00002-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be8b2647278351fad70022f1cad3ee77e3540bbaf845c7c58f4351b09ecfe6e1
 size 4947290688

 version https://git-lfs.github.com/spec/v1
+oid sha256:e82356882701b1a778408f31e676d17c2aff799c543e8596ed74bc805b4a1213
 size 4947290688

model-00003-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d74b96de273d26f263dd485858ad4a9488cedeb8e5e2c4b38485f504e472f1e3
 size 4947307592

 version https://git-lfs.github.com/spec/v1
+oid sha256:04096f84f42798d0c89319ff8254995a2a3512c16ec88dfd078ce421867d92ec
 size 4947307592

model-00004-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41d8faae19f310bb1210dc5435adcb7ab0614500d04cdc9e3e8ed53e083449e1
 size 4991331080

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b42af0bb16647959b3e55def4b3c66ab8c3a25fd948a5245c81d070f2b4313d
 size 4991331080

model-00005-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd848d623b8bb6493f67bf2a05fbabd4971ab6cb65d7061664c19273a5532c76
 size 4991331088

 version https://git-lfs.github.com/spec/v1
+oid sha256:38c07825790e055dd169376479994a58a4f59775ba7cf31d5ca25d8a465e7b0c
 size 4991331088

model-00006-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:16a0c1faf56687643e7bf10418ca64879625d6f9eccebcafecfae20da2f3f6bb
 size 4970162920

 version https://git-lfs.github.com/spec/v1
+oid sha256:d01880ca5677e69a5f8632f9dda62814f0c549b5a40d4f7e136065e5d64c1a7d
 size 4970162920

model-00007-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49595dbcce29f4368f9705c6afb0acae4fe86bbb71ac0cffd123dd1daefa2f50
 size 4960543792

 version https://git-lfs.github.com/spec/v1
+oid sha256:e70b0e10d2ac8800e69e514b6a9b04ac28cd7db43985ce62daa4e0e639b4e5ba
 size 4960543792

model-00008-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fde07fd1de183c38dbbb8f6265e97776361395d7a9da4342b2dea63f9aee7bb9
 size 532677104

 version https://git-lfs.github.com/spec/v1
+oid sha256:a756381ef65b92af7f1fb97da3d59cb04586080982de86d76805299898223294
 size 532677104

modeling_cogvlm.py CHANGED Viewed

@@ -538,25 +538,23 @@ class CogVLMModel(CogVLMPreTrainedModel):
         return combined_attention_mask
-def chat_history_to_prompt(history, query):
-    prompt = " [INST] "
-    for i, (old_query, response) in enumerate(history):
-        prompt += old_query + " [/INST] " + response + " [INST] "
-    prompt += query + " [/INST] "
-    return prompt
-def base_history_to_prompt(history, query):
-    prompt = query
     return prompt
-_history_to_prompt = {
-    "base": base_history_to_prompt,
-    "chat": chat_history_to_prompt
-}
 class CogVLMForCausalLM(CogVLMPreTrainedModel):
     _auto_class = "AutoModelForCausalLM"
@@ -744,14 +742,14 @@ class CogVLMForCausalLM(CogVLMPreTrainedModel):
             query: str,
             history: Optional[List[Tuple[str, str]]] = None,
             images: Optional[List["PIL.Image"]] = None,
-            template_version: Optional[Literal["base", "chat"]] = None,
     ):
         image_size: int = self.config.vision_config['image_size']
         patch_size: int = self.config.vision_config['patch_size']
         template_version = template_version or self.config.template_version
         assert images is None or len(images) <= 1, f"not support multi images by now."
         history = history or []
-        text = _history_to_prompt[template_version](history, query)
         input_ids = [tokenizer.bos_token_id]
         token_type_ids = [LANGUAGE_TOKEN_TYPE]

         return combined_attention_mask
+def _history_to_prompt(signal_type, history, query):
+    if signal_type == 'base':
+        return query
+    elif signal_type == 'vqa':
+        answer_format = 'Short answer:'
+    elif signal_type == 'chat':
+        answer_format = 'Answer:'
+    else:
+        assert False, f"Unknown signal type {signal_type}"
+    prompt = ''
+    for i, (old_query, response) in enumerate(history):
+        prompt += 'Question: ' + old_query + " {} ".format(answer_format) + response + "\n"
+    prompt += 'Question: {} {}'.format(query, answer_format)
     return prompt
 class CogVLMForCausalLM(CogVLMPreTrainedModel):
     _auto_class = "AutoModelForCausalLM"
             query: str,
             history: Optional[List[Tuple[str, str]]] = None,
             images: Optional[List["PIL.Image"]] = None,
+            template_version: Optional[Literal["base", "chat", "vqa"]] = None,
     ):
         image_size: int = self.config.vision_config['image_size']
         patch_size: int = self.config.vision_config['patch_size']
         template_version = template_version or self.config.template_version
         assert images is None or len(images) <= 1, f"not support multi images by now."
         history = history or []
+        text = _history_to_prompt(template_version, history, query)
         input_ids = [tokenizer.bos_token_id]
         token_type_ids = [LANGUAGE_TOKEN_TYPE]