saattrupdan commited on
Commit
23ff65c
1 Parent(s): 267941c

feat: Add app

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. README.md +5 -5
  3. app.py +50 -0
  4. requirements.txt +78 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv/
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: Named Entity Recognition
3
- emoji: 🐢
4
- colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 3.28.2
8
  app_file: app.py
9
  pinned: false
10
- license: openrail
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Danish Named Entity Recognition
3
+ emoji: 🎯
4
+ colorFrom: orange
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 3.12.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio app that showcases Scandinavian NER models."""
2
+
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+
6
+
7
+ def main():
8
+ pipe = pipeline(
9
+ task="token-classification",
10
+ model="saattrupdan/nbailab-base-ner-scandi",
11
+ aggregation_strategy="first",
12
+ )
13
+
14
+ examples = [
15
+ "Hans er en professor på Københavns Universitetet i København, og han er en rigtig københavner. Hans kat, altså Hans' kat, Lisa, er supersød. Han fik købt en Mona Lisa på tilbud i Netto og gav den til sin kat, og nu er Mona Lisa'en Lisa's kæreste eje. Hans bror Peter og Hans besluttede, at Peterskirken skulle have fint besøg. Men nu har de begge Corona." ,
16
+ "Borghild handler på Bunnpris, der de har et spesialtilbud på en Mona Lisa-kopi.",
17
+ "Johan är från Djurgården och fick Corona förra veckan.",
18
+ "Ulf Hjalmar Ed Kristersson (føddur 29. desember 1963 í Lund) er siviløkonomur og politikari.",
19
+ "Space Exploration Technologies Corporation, betur þekkt sem SpaceX, er bandarískur flugtækniframleiðandi og geimferðafyrirtæki með höfuðstöðvar í Hawthorne í Kaliforníu. Það var stofnað af athafnamanninum Elon Musk árið 2002."
20
+ ]
21
+
22
+ def ner(text):
23
+ output: list[dict] = pipe(text)
24
+ output = [{'entity': dct['entity_group']} | dct for dct in output]
25
+ print(output)
26
+ return {"text": text, "entities": output}
27
+
28
+ demo = gr.Interface(
29
+ ner,
30
+ gr.Textbox(placeholder="Enter sentence here..."),
31
+ gr.HighlightedText(),
32
+ examples=examples,
33
+ title="Scandinavian Named Entity Recognition",
34
+ description="""
35
+ Locate and classify named entities in Danish, Swedish, Norwegian, Icelandic and Faroese text. Write any text in the box below, and the model will predict and highlight the following entities:
36
+
37
+ | **Tag** | **Name** | **Description** |
38
+ | :------ | :------- | :-------------- |
39
+ | `PER` | Person | The name of a person (e.g., *Birgitte* and *Mohammed*) |
40
+ | `LOC` | Location | The name of a location (e.g., *Tyskland* and *Djurgården*) |
41
+ | `ORG` | Organisation | The name of an organisation (e.g., *Bunnpris* and *Landsbankinn*) |
42
+ | `MISC` | Miscellaneous | A named entity of a different kind (e.g., *Ūjķnustu pund* and *Mona Lisa*) |
43
+
44
+ _Also, be patient, as this demo is running on a CPU!_""",
45
+ )
46
+
47
+ demo.launch()
48
+
49
+ if __name__ == "__main__":
50
+ main()
requirements.txt ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.3
2
+ aiosignal==1.3.1
3
+ anyio==3.6.2
4
+ async-timeout==4.0.2
5
+ attrs==22.1.0
6
+ bcrypt==4.0.1
7
+ beautifulsoup4==4.11.1
8
+ certifi==2022.9.24
9
+ cffi==1.15.1
10
+ charset-normalizer==2.1.1
11
+ click==8.1.3
12
+ contourpy==1.0.6
13
+ cryptography==38.0.4
14
+ cycler==0.11.0
15
+ fastapi==0.88.0
16
+ fasttext-wheel==0.9.2
17
+ ffmpy==0.3.0
18
+ filelock==3.8.0
19
+ fonttools==4.38.0
20
+ frozenlist==1.3.3
21
+ fsspec==2022.11.0
22
+ gdown==4.5.4
23
+ gradio==3.12.0
24
+ h11==0.12.0
25
+ httpcore==0.15.0
26
+ httpx==0.23.1
27
+ huggingface-hub==0.11.1
28
+ idna==3.4
29
+ Jinja2==3.1.2
30
+ kiwisolver==1.4.4
31
+ linkify-it-py==1.0.3
32
+ luga==0.2.6
33
+ markdown-it-py==2.1.0
34
+ MarkupSafe==2.1.1
35
+ matplotlib==3.6.2
36
+ mdit-py-plugins==0.3.1
37
+ mdurl==0.1.2
38
+ mpmath==1.3.0
39
+ multidict==6.0.2
40
+ networkx==3.1
41
+ nptyping==1.4.4
42
+ numpy==1.23.5
43
+ orjson==3.8.2
44
+ packaging==21.3
45
+ pandas==1.5.2
46
+ paramiko==2.12.0
47
+ Pillow==9.3.0
48
+ pybind11==2.10.1
49
+ pycparser==2.21
50
+ pycryptodome==3.16.0
51
+ pydantic==1.10.2
52
+ pydub==0.25.1
53
+ PyNaCl==1.5.0
54
+ pyparsing==3.0.9
55
+ PySocks==1.7.1
56
+ python-dateutil==2.8.2
57
+ python-multipart==0.0.5
58
+ pytz==2022.6
59
+ PyYAML==6.0
60
+ regex==2022.10.31
61
+ requests==2.28.1
62
+ rfc3986==1.5.0
63
+ six==1.16.0
64
+ sniffio==1.3.0
65
+ soupsieve==2.3.2.post1
66
+ starlette==0.22.0
67
+ sympy==1.11.1
68
+ tokenizers==0.13.2
69
+ torch==2.0.0
70
+ tqdm==4.64.1
71
+ transformers==4.28.1
72
+ typing_extensions==4.4.0
73
+ typish==1.9.3
74
+ uc-micro-py==1.0.1
75
+ urllib3==1.26.13
76
+ uvicorn==0.20.0
77
+ websockets==10.4
78
+ yarl==1.8.1