Commit
•
141e879
1
Parent(s):
a854397
add custom handler
Browse files- README.md +145 -72
- optimize_model.ipynb +46 -4
README.md
CHANGED
@@ -7,20 +7,21 @@ tags:
|
|
7 |
library_name: generic
|
8 |
---
|
9 |
|
10 |
-
# Optimized and Quantized [
|
11 |
|
12 |
|
13 |
-
This repository implements a `custom`
|
14 |
|
15 |
-
Below is also describe how we converted & optimized the model, based on the [Accelerate
|
16 |
-
|
17 |
-
To use deploy this model a an Inference Endpoint you have to select `Custom` as task to use the `pipeline.py` file. -> _double check if it is selected_
|
18 |
|
19 |
### expected Request payload
|
20 |
|
21 |
```json
|
22 |
{
|
23 |
-
|
|
|
|
|
|
|
24 |
}
|
25 |
```
|
26 |
|
@@ -38,9 +39,8 @@ ENDPOINT_URL = ""
|
|
38 |
HF_TOKEN = ""
|
39 |
|
40 |
|
41 |
-
def predict(
|
42 |
-
|
43 |
-
payload = {"inputs": document_string}
|
44 |
response = r.post(
|
45 |
ENDPOINT_URL, headers={"Authorization": f"Bearer {HF_TOKEN}"}, json=payload
|
46 |
)
|
@@ -48,65 +48,114 @@ def predict(document_string:str=None):
|
|
48 |
|
49 |
|
50 |
prediction = predict(
|
51 |
-
|
|
|
52 |
)
|
53 |
```
|
54 |
|
55 |
expected output
|
56 |
|
57 |
```python
|
58 |
-
{
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
-0.013600599952042103,
|
64 |
-
...
|
65 |
}
|
66 |
```
|
67 |
|
68 |
|
69 |
|
70 |
-
|
71 |
|
72 |
Steps:
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
76 |
|
77 |
Helpful links:
|
78 |
-
* [Accelerate
|
|
|
|
|
79 |
* [Create Custom Handler Endpoints](https://link-to-docs)
|
80 |
|
81 |
## Setup & Installation
|
82 |
|
|
|
83 |
```python
|
84 |
%%writefile requirements.txt
|
85 |
-
optimum[onnxruntime]==1.
|
86 |
mkl-include
|
87 |
mkl
|
88 |
```
|
89 |
|
90 |
-
install requirements
|
91 |
|
92 |
```python
|
93 |
!pip install -r requirements.txt
|
94 |
```
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
## 1. Convert model to ONNX
|
97 |
|
98 |
|
99 |
```python
|
100 |
-
from optimum.onnxruntime import
|
101 |
from transformers import AutoTokenizer
|
102 |
from pathlib import Path
|
103 |
|
104 |
|
105 |
-
model_id="
|
106 |
onnx_path = Path(".")
|
107 |
|
108 |
# load vanilla transformers and convert to onnx
|
109 |
-
model =
|
110 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
111 |
|
112 |
# save onnx checkpoint and tokenizer
|
@@ -122,55 +171,48 @@ tokenizer.save_pretrained(onnx_path)
|
|
122 |
from optimum.onnxruntime import ORTOptimizer, ORTQuantizer
|
123 |
from optimum.onnxruntime.configuration import OptimizationConfig, AutoQuantizationConfig
|
124 |
|
125 |
-
#
|
126 |
-
optimizer = ORTOptimizer.from_pretrained(
|
|
|
|
|
127 |
optimization_config = OptimizationConfig(optimization_level=99) # enable all optimizations
|
128 |
|
129 |
-
#
|
130 |
-
optimizer.
|
131 |
-
|
132 |
-
onnx_optimized_model_output_path=onnx_path / "model-optimized.onnx",
|
133 |
-
optimization_config=optimization_config,
|
134 |
-
)
|
135 |
|
136 |
|
|
|
137 |
# create ORTQuantizer and define quantization configuration
|
138 |
-
dynamic_quantizer = ORTQuantizer.from_pretrained(
|
139 |
dqconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
|
140 |
|
141 |
# apply the quantization configuration to the model
|
142 |
-
model_quantized_path = dynamic_quantizer.
|
143 |
-
|
144 |
-
onnx_quantized_model_output_path=onnx_path / "model-quantized.onnx",
|
145 |
quantization_config=dqconfig,
|
146 |
)
|
147 |
|
148 |
-
|
149 |
```
|
150 |
|
151 |
## 3. Create Custom Handler for Inference Endpoints
|
152 |
|
153 |
|
|
|
154 |
```python
|
155 |
-
%%writefile
|
156 |
from typing import Dict, List, Any
|
157 |
-
from optimum.onnxruntime import
|
158 |
-
from transformers import AutoTokenizer
|
159 |
-
import torch.nn.functional as F
|
160 |
-
import torch
|
161 |
-
|
162 |
-
# copied from the model card
|
163 |
-
def mean_pooling(model_output, attention_mask):
|
164 |
-
token_embeddings = model_output[0] #First element of model_output contains all token embeddings
|
165 |
-
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
166 |
-
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
167 |
|
168 |
|
169 |
-
class
|
170 |
def __init__(self, path=""):
|
171 |
# load the optimized model
|
172 |
-
self.model =
|
173 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
|
|
|
|
174 |
|
175 |
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
|
176 |
"""
|
@@ -178,42 +220,73 @@ class PreTrainedPipeline():
|
|
178 |
data (:obj:):
|
179 |
includes the input data and the parameters for the inference.
|
180 |
Return:
|
181 |
-
A :obj:`list`:. The list contains the
|
182 |
"""
|
183 |
inputs = data.get("inputs", data)
|
184 |
-
|
185 |
-
# tokenize the input
|
186 |
-
encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, return_tensors='pt')
|
187 |
# run the model
|
188 |
-
|
189 |
-
#
|
190 |
-
|
191 |
-
# Normalize embeddings
|
192 |
-
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
|
193 |
-
# postprocess the prediction
|
194 |
-
return {"embeddings": sentence_embeddings.tolist()}
|
195 |
```
|
196 |
|
197 |
-
|
|
|
198 |
|
199 |
|
200 |
```python
|
201 |
-
from
|
202 |
|
203 |
# init handler
|
204 |
-
my_handler =
|
205 |
|
206 |
# prepare sample payload
|
207 |
-
|
|
|
208 |
|
209 |
-
|
210 |
-
%timeit my_handler(request)
|
211 |
|
|
|
|
|
212 |
```
|
213 |
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
216 |
```
|
217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
```
|
219 |
|
|
|
7 |
library_name: generic
|
8 |
---
|
9 |
|
10 |
+
# Optimized and Quantized [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2) with a custom handler.py
|
11 |
|
12 |
|
13 |
+
This repository implements a `custom` handler for `question-answering` for 🤗 Inference Endpoints for accelerated inference using [🤗 Optiumum](https://huggingface.co/docs/optimum/index). The code for the customized handler is in the [handler.py](https://huggingface.co/philschmid/roberta-base-squad2-optimized/blob/main/handler.py).
|
14 |
|
15 |
+
Below is also describe how we converted & optimized the model, based on the [Accelerate Transformers with Hugging Face Optimum](https://huggingface.co/blog/optimum-inference) blog post. You can also check out the [notebook](https://huggingface.co/philschmid/roberta-base-squad2-optimized/blob/main/optimize_model.ipynb).
|
|
|
|
|
16 |
|
17 |
### expected Request payload
|
18 |
|
19 |
```json
|
20 |
{
|
21 |
+
"inputs": {
|
22 |
+
"question": "As what is Philipp working?",
|
23 |
+
"context": "Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value."
|
24 |
+
}
|
25 |
}
|
26 |
```
|
27 |
|
|
|
39 |
HF_TOKEN = ""
|
40 |
|
41 |
|
42 |
+
def predict(question:str=None,context:str=None):
|
43 |
+
payload = {"inputs": {"question": question, "context": context}}
|
|
|
44 |
response = r.post(
|
45 |
ENDPOINT_URL, headers={"Authorization": f"Bearer {HF_TOKEN}"}, json=payload
|
46 |
)
|
|
|
48 |
|
49 |
|
50 |
prediction = predict(
|
51 |
+
question="As what is Philipp working?",
|
52 |
+
context="Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science."
|
53 |
)
|
54 |
```
|
55 |
|
56 |
expected output
|
57 |
|
58 |
```python
|
59 |
+
{
|
60 |
+
'score': 0.4749588668346405,
|
61 |
+
'start': 88,
|
62 |
+
'end': 102,
|
63 |
+
'answer': 'Technical Lead'
|
|
|
|
|
64 |
}
|
65 |
```
|
66 |
|
67 |
|
68 |
|
69 |
+
# Convert & Optimize model with Optimum
|
70 |
|
71 |
Steps:
|
72 |
+
1. [Convert model to ONNX](#1-convert-model-to-onnx)
|
73 |
+
2. [Optimize & quantize model with Optimum](#2-optimize--quantize-model-with-optimum)
|
74 |
+
3. [Create Custom Handler for Inference Endpoints](#3-create-custom-handler-for-inference-endpoints)
|
75 |
+
4. [Test Custom Handler Locally](#4-test-custom-handler-locally)
|
76 |
+
5. [Push to repository and create Inference Endpoint](#5-push-to-repository-and-create-inference-endpoint)
|
77 |
|
78 |
Helpful links:
|
79 |
+
* [Accelerate Transformers with Hugging Face Optimum](https://huggingface.co/blog/optimum-inference)
|
80 |
+
* [Optimizing Transformers for GPUs with Optimum](https://www.philschmid.de/optimizing-transformers-with-optimum-gpu)
|
81 |
+
* [Optimum Documentation](https://huggingface.co/docs/optimum/onnxruntime/modeling_ort)
|
82 |
* [Create Custom Handler Endpoints](https://link-to-docs)
|
83 |
|
84 |
## Setup & Installation
|
85 |
|
86 |
+
|
87 |
```python
|
88 |
%%writefile requirements.txt
|
89 |
+
optimum[onnxruntime]==1.4.0
|
90 |
mkl-include
|
91 |
mkl
|
92 |
```
|
93 |
|
|
|
94 |
|
95 |
```python
|
96 |
!pip install -r requirements.txt
|
97 |
```
|
98 |
|
99 |
+
## 0. Base line Performance
|
100 |
+
|
101 |
+
|
102 |
+
```python
|
103 |
+
from transformers import pipeline
|
104 |
+
|
105 |
+
qa = pipeline("question-answering",model="deepset/roberta-base-squad2")
|
106 |
+
```
|
107 |
+
|
108 |
+
Okay, let's test the performance (latency) with sequence length of 128.
|
109 |
+
|
110 |
+
|
111 |
+
```python
|
112 |
+
context="Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value."
|
113 |
+
question="As what is Philipp working?"
|
114 |
+
|
115 |
+
payload = {"inputs": {"question": question, "context": context}}
|
116 |
+
```
|
117 |
+
|
118 |
+
|
119 |
+
```python
|
120 |
+
from time import perf_counter
|
121 |
+
import numpy as np
|
122 |
+
|
123 |
+
def measure_latency(pipe,payload):
|
124 |
+
latencies = []
|
125 |
+
# warm up
|
126 |
+
for _ in range(10):
|
127 |
+
_ = pipe(question=payload["inputs"]["question"], context=payload["inputs"]["context"])
|
128 |
+
# Timed run
|
129 |
+
for _ in range(50):
|
130 |
+
start_time = perf_counter()
|
131 |
+
_ = pipe(question=payload["inputs"]["question"], context=payload["inputs"]["context"])
|
132 |
+
latency = perf_counter() - start_time
|
133 |
+
latencies.append(latency)
|
134 |
+
# Compute run statistics
|
135 |
+
time_avg_ms = 1000 * np.mean(latencies)
|
136 |
+
time_std_ms = 1000 * np.std(latencies)
|
137 |
+
return f"Average latency (ms) - {time_avg_ms:.2f} +\- {time_std_ms:.2f}"
|
138 |
+
|
139 |
+
print(f"Vanilla model {measure_latency(qa,payload)}")
|
140 |
+
# Vanilla model Average latency (ms) - 64.15 +\- 2.44
|
141 |
+
```
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
## 1. Convert model to ONNX
|
146 |
|
147 |
|
148 |
```python
|
149 |
+
from optimum.onnxruntime import ORTModelForQuestionAnswering
|
150 |
from transformers import AutoTokenizer
|
151 |
from pathlib import Path
|
152 |
|
153 |
|
154 |
+
model_id="deepset/roberta-base-squad2"
|
155 |
onnx_path = Path(".")
|
156 |
|
157 |
# load vanilla transformers and convert to onnx
|
158 |
+
model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True)
|
159 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
160 |
|
161 |
# save onnx checkpoint and tokenizer
|
|
|
171 |
from optimum.onnxruntime import ORTOptimizer, ORTQuantizer
|
172 |
from optimum.onnxruntime.configuration import OptimizationConfig, AutoQuantizationConfig
|
173 |
|
174 |
+
# Create the optimizer
|
175 |
+
optimizer = ORTOptimizer.from_pretrained(model)
|
176 |
+
|
177 |
+
# Define the optimization strategy by creating the appropriate configuration
|
178 |
optimization_config = OptimizationConfig(optimization_level=99) # enable all optimizations
|
179 |
|
180 |
+
# Optimize the model
|
181 |
+
optimizer.optimize(save_dir=onnx_path, optimization_config=optimization_config)
|
182 |
+
```
|
|
|
|
|
|
|
183 |
|
184 |
|
185 |
+
```python
|
186 |
# create ORTQuantizer and define quantization configuration
|
187 |
+
dynamic_quantizer = ORTQuantizer.from_pretrained(onnx_path, file_name="model_optimized.onnx")
|
188 |
dqconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
|
189 |
|
190 |
# apply the quantization configuration to the model
|
191 |
+
model_quantized_path = dynamic_quantizer.quantize(
|
192 |
+
save_dir=onnx_path,
|
|
|
193 |
quantization_config=dqconfig,
|
194 |
)
|
195 |
|
|
|
196 |
```
|
197 |
|
198 |
## 3. Create Custom Handler for Inference Endpoints
|
199 |
|
200 |
|
201 |
+
|
202 |
```python
|
203 |
+
%%writefile handler.py
|
204 |
from typing import Dict, List, Any
|
205 |
+
from optimum.onnxruntime import ORTModelForQuestionAnswering
|
206 |
+
from transformers import AutoTokenizer, pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
|
208 |
|
209 |
+
class EndpointHandler():
|
210 |
def __init__(self, path=""):
|
211 |
# load the optimized model
|
212 |
+
self.model = ORTModelForQuestionAnswering.from_pretrained(path, file_name="model_optimized_quantized.onnx")
|
213 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
214 |
+
# create pipeline
|
215 |
+
self.pipeline = pipeline("question-answering", model=self.model, tokenizer=self.tokenizer)
|
216 |
|
217 |
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
|
218 |
"""
|
|
|
220 |
data (:obj:):
|
221 |
includes the input data and the parameters for the inference.
|
222 |
Return:
|
223 |
+
A :obj:`list`:. The list contains the answer and scores of the inference inputs
|
224 |
"""
|
225 |
inputs = data.get("inputs", data)
|
|
|
|
|
|
|
226 |
# run the model
|
227 |
+
prediction = self.pipeline(**inputs)
|
228 |
+
# return prediction
|
229 |
+
return prediction
|
|
|
|
|
|
|
|
|
230 |
```
|
231 |
|
232 |
+
## 4. Test Custom Handler Locally
|
233 |
+
|
234 |
|
235 |
|
236 |
```python
|
237 |
+
from handler import EndpointHandler
|
238 |
|
239 |
# init handler
|
240 |
+
my_handler = EndpointHandler(path=".")
|
241 |
|
242 |
# prepare sample payload
|
243 |
+
context="Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value."
|
244 |
+
question="As what is Philipp working?"
|
245 |
|
246 |
+
payload = {"inputs": {"question": question, "context": context}}
|
|
|
247 |
|
248 |
+
# test the handler
|
249 |
+
my_handler(payload)
|
250 |
```
|
251 |
|
252 |
+
|
253 |
+
```python
|
254 |
+
from time import perf_counter
|
255 |
+
import numpy as np
|
256 |
+
|
257 |
+
def measure_latency(handler,payload):
|
258 |
+
latencies = []
|
259 |
+
# warm up
|
260 |
+
for _ in range(10):
|
261 |
+
_ = handler(payload)
|
262 |
+
# Timed run
|
263 |
+
for _ in range(50):
|
264 |
+
start_time = perf_counter()
|
265 |
+
_ = handler(payload)
|
266 |
+
latency = perf_counter() - start_time
|
267 |
+
latencies.append(latency)
|
268 |
+
# Compute run statistics
|
269 |
+
time_avg_ms = 1000 * np.mean(latencies)
|
270 |
+
time_std_ms = 1000 * np.std(latencies)
|
271 |
+
return f"Average latency (ms) - {time_avg_ms:.2f} +\- {time_std_ms:.2f}"
|
272 |
+
|
273 |
+
print(f"Optimized & Quantized model {measure_latency(my_handler,payload)}")
|
274 |
+
# Optimized & Quantized model Average latency (ms) - 29.90 +\- 0.53
|
275 |
|
276 |
```
|
277 |
+
|
278 |
+
`Vanilla model Average latency (ms) - 64.15 +\- 2.44`
|
279 |
+
|
280 |
+
## 5. Push to repository and create Inference Endpoint
|
281 |
+
|
282 |
+
|
283 |
+
|
284 |
+
```python
|
285 |
+
# add all our new files
|
286 |
+
!git add *
|
287 |
+
# commit our files
|
288 |
+
!git commit -m "add custom handler"
|
289 |
+
# push the files to the hub
|
290 |
+
!git push
|
291 |
```
|
292 |
|
optimize_model.ipynb
CHANGED
@@ -84,9 +84,20 @@
|
|
84 |
},
|
85 |
{
|
86 |
"cell_type": "code",
|
87 |
-
"execution_count":
|
88 |
"metadata": {},
|
89 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
"source": [
|
91 |
"context=\"Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value.\" \n",
|
92 |
"question=\"As what is Philipp working?\" \n",
|
@@ -395,9 +406,33 @@
|
|
395 |
},
|
396 |
{
|
397 |
"cell_type": "code",
|
398 |
-
"execution_count":
|
399 |
"metadata": {},
|
400 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
401 |
"source": [
|
402 |
"# add all our new files\n",
|
403 |
"!git add * \n",
|
@@ -406,6 +441,13 @@
|
|
406 |
"# push the files to the hub\n",
|
407 |
"!git push"
|
408 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
409 |
}
|
410 |
],
|
411 |
"metadata": {
|
|
|
84 |
},
|
85 |
{
|
86 |
"cell_type": "code",
|
87 |
+
"execution_count": 3,
|
88 |
"metadata": {},
|
89 |
+
"outputs": [
|
90 |
+
{
|
91 |
+
"data": {
|
92 |
+
"text/plain": [
|
93 |
+
"'{\"inputs\": {\"question\": \"As what is Philipp working?\", \"context\": \"Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value.\"}}'"
|
94 |
+
]
|
95 |
+
},
|
96 |
+
"execution_count": 3,
|
97 |
+
"metadata": {},
|
98 |
+
"output_type": "execute_result"
|
99 |
+
}
|
100 |
+
],
|
101 |
"source": [
|
102 |
"context=\"Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value.\" \n",
|
103 |
"question=\"As what is Philipp working?\" \n",
|
|
|
406 |
},
|
407 |
{
|
408 |
"cell_type": "code",
|
409 |
+
"execution_count": 1,
|
410 |
"metadata": {},
|
411 |
+
"outputs": [
|
412 |
+
{
|
413 |
+
"name": "stdout",
|
414 |
+
"output_type": "stream",
|
415 |
+
"text": [
|
416 |
+
"[main a854397] add custom handler\n",
|
417 |
+
" 14 files changed, 151227 insertions(+)\n",
|
418 |
+
" create mode 100644 README.md\n",
|
419 |
+
" create mode 100644 config.json\n",
|
420 |
+
" create mode 100644 handler.py\n",
|
421 |
+
" create mode 100644 merges.txt\n",
|
422 |
+
" create mode 100644 model.onnx\n",
|
423 |
+
" create mode 100644 model_optimized.onnx\n",
|
424 |
+
" create mode 100644 model_optimized_quantized.onnx\n",
|
425 |
+
" create mode 100644 optimize_model.ipynb\n",
|
426 |
+
" create mode 100644 ort_config.json\n",
|
427 |
+
" create mode 100644 requirements.txt\n",
|
428 |
+
" create mode 100644 special_tokens_map.json\n",
|
429 |
+
" create mode 100644 tokenizer.json\n",
|
430 |
+
" create mode 100644 tokenizer_config.json\n",
|
431 |
+
" create mode 100644 vocab.json\n",
|
432 |
+
"Username for 'https://huggingface.co': ^C\n"
|
433 |
+
]
|
434 |
+
}
|
435 |
+
],
|
436 |
"source": [
|
437 |
"# add all our new files\n",
|
438 |
"!git add * \n",
|
|
|
441 |
"# push the files to the hub\n",
|
442 |
"!git push"
|
443 |
]
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"cell_type": "code",
|
447 |
+
"execution_count": null,
|
448 |
+
"metadata": {},
|
449 |
+
"outputs": [],
|
450 |
+
"source": []
|
451 |
}
|
452 |
],
|
453 |
"metadata": {
|