Update README.md
Browse files
README.md
CHANGED
@@ -116,6 +116,7 @@ from Phi_3V_MoE.moe_phi3_v import Phi3VForCausalLMMoE, Phi3VForCausalLMMoEConfig
|
|
116 |
|
117 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
118 |
|
|
|
119 |
model_name_1 = f"lamm-mit/Cephalo-Phi-3-vision-128k-4b-beta"
|
120 |
model_1 = AutoModelForCausalLM.from_pretrained(
|
121 |
model_name_1,
|
@@ -123,6 +124,7 @@ model_1 = AutoModelForCausalLM.from_pretrained(
|
|
123 |
|
124 |
).to(device)
|
125 |
|
|
|
126 |
model_name_2 = f"microsoft/Phi-3-vision-128k-instruct"
|
127 |
model_2 = AutoModelForCausalLM.from_pretrained(
|
128 |
model_name_2,
|
@@ -130,15 +132,15 @@ model_2 = AutoModelForCausalLM.from_pretrained(
|
|
130 |
|
131 |
).to(device)
|
132 |
|
133 |
-
|
134 |
-
|
135 |
model_3 = AutoModelForCausalLM.from_pretrained(
|
136 |
model_name_3,
|
137 |
trust_remote_code=True, torch_dtype=torch.bfloat16,
|
138 |
|
139 |
).to(device)
|
140 |
|
141 |
-
dtype = torch.bfloat16 # Desired dtype for new layers
|
142 |
|
143 |
# Initialize the models
|
144 |
base_model = copy.deepcopy(model_2) # Your base model
|
|
|
116 |
|
117 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
118 |
|
119 |
+
#Model specialized in bio-inspired/mechanics and materials
|
120 |
model_name_1 = f"lamm-mit/Cephalo-Phi-3-vision-128k-4b-beta"
|
121 |
model_1 = AutoModelForCausalLM.from_pretrained(
|
122 |
model_name_1,
|
|
|
124 |
|
125 |
).to(device)
|
126 |
|
127 |
+
#Original model
|
128 |
model_name_2 = f"microsoft/Phi-3-vision-128k-instruct"
|
129 |
model_2 = AutoModelForCausalLM.from_pretrained(
|
130 |
model_name_2,
|
|
|
132 |
|
133 |
).to(device)
|
134 |
|
135 |
+
#Model trained on conversion of images to LaTeX formulas
|
136 |
+
model_name_3 = f"lamm-mit/Cephalo-LaTeX-Phi-3-vision-128k-4b-alpha"
|
137 |
model_3 = AutoModelForCausalLM.from_pretrained(
|
138 |
model_name_3,
|
139 |
trust_remote_code=True, torch_dtype=torch.bfloat16,
|
140 |
|
141 |
).to(device)
|
142 |
|
143 |
+
dtype = torch.bfloat16 # Desired dtype for new layers in MoE model
|
144 |
|
145 |
# Initialize the models
|
146 |
base_model = copy.deepcopy(model_2) # Your base model
|