saattrupdan
commited on
Commit
•
23ff65c
1
Parent(s):
267941c
feat: Add app
Browse files- .gitignore +1 -0
- README.md +5 -5
- app.py +50 -0
- requirements.txt +78 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.venv/
|
README.md
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
---
|
2 |
-
title: Named Entity Recognition
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license:
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Danish Named Entity Recognition
|
3 |
+
emoji: 🎯
|
4 |
+
colorFrom: orange
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.12.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: mit
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Gradio app that showcases Scandinavian NER models."""
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
from transformers import pipeline
|
5 |
+
|
6 |
+
|
7 |
+
def main():
|
8 |
+
pipe = pipeline(
|
9 |
+
task="token-classification",
|
10 |
+
model="saattrupdan/nbailab-base-ner-scandi",
|
11 |
+
aggregation_strategy="first",
|
12 |
+
)
|
13 |
+
|
14 |
+
examples = [
|
15 |
+
"Hans er en professor på Københavns Universitetet i København, og han er en rigtig københavner. Hans kat, altså Hans' kat, Lisa, er supersød. Han fik købt en Mona Lisa på tilbud i Netto og gav den til sin kat, og nu er Mona Lisa'en Lisa's kæreste eje. Hans bror Peter og Hans besluttede, at Peterskirken skulle have fint besøg. Men nu har de begge Corona." ,
|
16 |
+
"Borghild handler på Bunnpris, der de har et spesialtilbud på en Mona Lisa-kopi.",
|
17 |
+
"Johan är från Djurgården och fick Corona förra veckan.",
|
18 |
+
"Ulf Hjalmar Ed Kristersson (føddur 29. desember 1963 í Lund) er siviløkonomur og politikari.",
|
19 |
+
"Space Exploration Technologies Corporation, betur þekkt sem SpaceX, er bandarískur flugtækniframleiðandi og geimferðafyrirtæki með höfuðstöðvar í Hawthorne í Kaliforníu. Það var stofnað af athafnamanninum Elon Musk árið 2002."
|
20 |
+
]
|
21 |
+
|
22 |
+
def ner(text):
|
23 |
+
output: list[dict] = pipe(text)
|
24 |
+
output = [{'entity': dct['entity_group']} | dct for dct in output]
|
25 |
+
print(output)
|
26 |
+
return {"text": text, "entities": output}
|
27 |
+
|
28 |
+
demo = gr.Interface(
|
29 |
+
ner,
|
30 |
+
gr.Textbox(placeholder="Enter sentence here..."),
|
31 |
+
gr.HighlightedText(),
|
32 |
+
examples=examples,
|
33 |
+
title="Scandinavian Named Entity Recognition",
|
34 |
+
description="""
|
35 |
+
Locate and classify named entities in Danish, Swedish, Norwegian, Icelandic and Faroese text. Write any text in the box below, and the model will predict and highlight the following entities:
|
36 |
+
|
37 |
+
| **Tag** | **Name** | **Description** |
|
38 |
+
| :------ | :------- | :-------------- |
|
39 |
+
| `PER` | Person | The name of a person (e.g., *Birgitte* and *Mohammed*) |
|
40 |
+
| `LOC` | Location | The name of a location (e.g., *Tyskland* and *Djurgården*) |
|
41 |
+
| `ORG` | Organisation | The name of an organisation (e.g., *Bunnpris* and *Landsbankinn*) |
|
42 |
+
| `MISC` | Miscellaneous | A named entity of a different kind (e.g., *Ūjķnustu pund* and *Mona Lisa*) |
|
43 |
+
|
44 |
+
_Also, be patient, as this demo is running on a CPU!_""",
|
45 |
+
)
|
46 |
+
|
47 |
+
demo.launch()
|
48 |
+
|
49 |
+
if __name__ == "__main__":
|
50 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.8.3
|
2 |
+
aiosignal==1.3.1
|
3 |
+
anyio==3.6.2
|
4 |
+
async-timeout==4.0.2
|
5 |
+
attrs==22.1.0
|
6 |
+
bcrypt==4.0.1
|
7 |
+
beautifulsoup4==4.11.1
|
8 |
+
certifi==2022.9.24
|
9 |
+
cffi==1.15.1
|
10 |
+
charset-normalizer==2.1.1
|
11 |
+
click==8.1.3
|
12 |
+
contourpy==1.0.6
|
13 |
+
cryptography==38.0.4
|
14 |
+
cycler==0.11.0
|
15 |
+
fastapi==0.88.0
|
16 |
+
fasttext-wheel==0.9.2
|
17 |
+
ffmpy==0.3.0
|
18 |
+
filelock==3.8.0
|
19 |
+
fonttools==4.38.0
|
20 |
+
frozenlist==1.3.3
|
21 |
+
fsspec==2022.11.0
|
22 |
+
gdown==4.5.4
|
23 |
+
gradio==3.12.0
|
24 |
+
h11==0.12.0
|
25 |
+
httpcore==0.15.0
|
26 |
+
httpx==0.23.1
|
27 |
+
huggingface-hub==0.11.1
|
28 |
+
idna==3.4
|
29 |
+
Jinja2==3.1.2
|
30 |
+
kiwisolver==1.4.4
|
31 |
+
linkify-it-py==1.0.3
|
32 |
+
luga==0.2.6
|
33 |
+
markdown-it-py==2.1.0
|
34 |
+
MarkupSafe==2.1.1
|
35 |
+
matplotlib==3.6.2
|
36 |
+
mdit-py-plugins==0.3.1
|
37 |
+
mdurl==0.1.2
|
38 |
+
mpmath==1.3.0
|
39 |
+
multidict==6.0.2
|
40 |
+
networkx==3.1
|
41 |
+
nptyping==1.4.4
|
42 |
+
numpy==1.23.5
|
43 |
+
orjson==3.8.2
|
44 |
+
packaging==21.3
|
45 |
+
pandas==1.5.2
|
46 |
+
paramiko==2.12.0
|
47 |
+
Pillow==9.3.0
|
48 |
+
pybind11==2.10.1
|
49 |
+
pycparser==2.21
|
50 |
+
pycryptodome==3.16.0
|
51 |
+
pydantic==1.10.2
|
52 |
+
pydub==0.25.1
|
53 |
+
PyNaCl==1.5.0
|
54 |
+
pyparsing==3.0.9
|
55 |
+
PySocks==1.7.1
|
56 |
+
python-dateutil==2.8.2
|
57 |
+
python-multipart==0.0.5
|
58 |
+
pytz==2022.6
|
59 |
+
PyYAML==6.0
|
60 |
+
regex==2022.10.31
|
61 |
+
requests==2.28.1
|
62 |
+
rfc3986==1.5.0
|
63 |
+
six==1.16.0
|
64 |
+
sniffio==1.3.0
|
65 |
+
soupsieve==2.3.2.post1
|
66 |
+
starlette==0.22.0
|
67 |
+
sympy==1.11.1
|
68 |
+
tokenizers==0.13.2
|
69 |
+
torch==2.0.0
|
70 |
+
tqdm==4.64.1
|
71 |
+
transformers==4.28.1
|
72 |
+
typing_extensions==4.4.0
|
73 |
+
typish==1.9.3
|
74 |
+
uc-micro-py==1.0.1
|
75 |
+
urllib3==1.26.13
|
76 |
+
uvicorn==0.20.0
|
77 |
+
websockets==10.4
|
78 |
+
yarl==1.8.1
|