Spaces:
Paused
Paused
Added 4bit support
Browse files
main_backend_lighteval.py
CHANGED
@@ -52,16 +52,20 @@ def run_auto_eval():
|
|
52 |
eval_request = eval_requests[0]
|
53 |
pp.pprint(eval_request)
|
54 |
|
|
|
|
|
|
|
|
|
55 |
# For GPU
|
56 |
-
if not eval_request or
|
57 |
raise ValueError("Couldn't detect number of params, please make sure the metadata is available")
|
58 |
-
elif
|
59 |
instance_size, instance_type, cap = "x1", "nvidia-t4", 40
|
60 |
-
elif
|
61 |
instance_size, instance_type, cap = "x1", "nvidia-a10g", 40
|
62 |
-
elif
|
63 |
instance_size, instance_type, cap = "x4", "nvidia-a10g", 20
|
64 |
-
elif
|
65 |
instance_size, instance_type, cap = "x2", "nvidia-a100", 5
|
66 |
else:
|
67 |
set_eval_request(
|
|
|
52 |
eval_request = eval_requests[0]
|
53 |
pp.pprint(eval_request)
|
54 |
|
55 |
+
params_size = eval_request.params
|
56 |
+
if eval_request.precision == '4bit':
|
57 |
+
params_size //= 2
|
58 |
+
|
59 |
# For GPU
|
60 |
+
if not eval_request or params_size < 0:
|
61 |
raise ValueError("Couldn't detect number of params, please make sure the metadata is available")
|
62 |
+
elif params_size < 4:
|
63 |
instance_size, instance_type, cap = "x1", "nvidia-t4", 40
|
64 |
+
elif params_size < 9:
|
65 |
instance_size, instance_type, cap = "x1", "nvidia-a10g", 40
|
66 |
+
elif params_size < 30:
|
67 |
instance_size, instance_type, cap = "x4", "nvidia-a10g", 20
|
68 |
+
elif params_size < 45:
|
69 |
instance_size, instance_type, cap = "x2", "nvidia-a100", 5
|
70 |
else:
|
71 |
set_eval_request(
|