Spaces:
Runtime error
Runtime error
create app
Browse files- .gitignore +1 -0
- app.py +67 -0
- poetry.lock +0 -0
- pyproject.toml +18 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
app.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
+
import subprocess
|
4 |
+
|
5 |
+
def get_dataset_dependencies(dataset: str) -> set[str]:
|
6 |
+
script_name = dataset.split("/")[-1] + ".py"
|
7 |
+
input_file = hf_hub_download(repo_id=dataset, filename=script_name, repo_type="dataset")
|
8 |
+
result = subprocess.run(["findpydeps", "-i", input_file, "--no-header"], capture_output=True, text=True)
|
9 |
+
return set(d for d in result.stdout.split("\n") if d)
|
10 |
+
|
11 |
+
def update(datasets: str):
|
12 |
+
all_dependencies = set()
|
13 |
+
for dataset in datasets.split("\n"):
|
14 |
+
dataset = dataset.strip()
|
15 |
+
print(dataset)
|
16 |
+
if not dataset:
|
17 |
+
continue
|
18 |
+
try:
|
19 |
+
dependencies = get_dataset_dependencies(dataset)
|
20 |
+
print(f"Dependencies for {dataset} processed: {len(dependencies)}")
|
21 |
+
all_dependencies.update(dependencies)
|
22 |
+
except Exception as e:
|
23 |
+
print(f"Error processing {dataset}: {e}")
|
24 |
+
continue
|
25 |
+
return "\n".join(sorted(list(all_dependencies)))
|
26 |
+
|
27 |
+
with gr.Blocks() as demo:
|
28 |
+
gr.Markdown("""# Script-based dataset dependencies
|
29 |
+
|
30 |
+
Paste a list of newline-separated dataset names, and then click **Run** to see the list of dependencies in their scripts.
|
31 |
+
""")
|
32 |
+
with gr.Row():
|
33 |
+
inp = gr.Textbox(placeholder="mnist\ncifar10", label="Datasets", lines=10, max_lines=10)
|
34 |
+
out = gr.Textbox(label="Dependencies", lines=10, max_lines=10, show_copy_button=True)
|
35 |
+
btn = gr.Button("Run")
|
36 |
+
examples = ["mnist\ncifar10", "mnist", """espnet/yodas
|
37 |
+
gaia-benchmark/GAIA
|
38 |
+
google/fleurs
|
39 |
+
mozilla-foundation/common_voice_1_0
|
40 |
+
mozilla-foundation/common_voice_10_0
|
41 |
+
mozilla-foundation/common_voice_11_0
|
42 |
+
mozilla-foundation/common_voice_12_0
|
43 |
+
mozilla-foundation/common_voice_13_0
|
44 |
+
mozilla-foundation/common_voice_14_0
|
45 |
+
mozilla-foundation/common_voice_15_0
|
46 |
+
mozilla-foundation/common_voice_16_0
|
47 |
+
mozilla-foundation/common_voice_16_1
|
48 |
+
mozilla-foundation/common_voice_2_0
|
49 |
+
mozilla-foundation/common_voice_3_0
|
50 |
+
mozilla-foundation/common_voice_4_0
|
51 |
+
mozilla-foundation/common_voice_5_0
|
52 |
+
mozilla-foundation/common_voice_5_1
|
53 |
+
mozilla-foundation/common_voice_6_0
|
54 |
+
mozilla-foundation/common_voice_6_1
|
55 |
+
mozilla-foundation/common_voice_7_0
|
56 |
+
mozilla-foundation/common_voice_8_0
|
57 |
+
mozilla-foundation/common_voice_9_0
|
58 |
+
poloclub/diffusiondb
|
59 |
+
pufanyi/MIMICIT
|
60 |
+
speechcolab/gigaspeech
|
61 |
+
togethercomputer/RedPajama-Data-1T
|
62 |
+
togethercomputer/RedPajama-Data-V2
|
63 |
+
""" ]
|
64 |
+
gr.Examples(examples, inp, label="Example Datasets", )
|
65 |
+
btn.click(fn=update, inputs=inp, outputs=out)
|
66 |
+
|
67 |
+
demo.launch()
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "find-script-based-datasets-dependencies"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["Sylvain Lesage <[email protected]>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.9"
|
10 |
+
gradio = "4.23.0"
|
11 |
+
findpydeps = "^0.2.6"
|
12 |
+
pip = "^24.0"
|
13 |
+
huggingface-hub = "^0.22.1"
|
14 |
+
|
15 |
+
|
16 |
+
[build-system]
|
17 |
+
requires = ["poetry-core"]
|
18 |
+
build-backend = "poetry.core.masonry.api"
|