Spaces:

datasets-topics
/

topics-generator

Sleeping

App Files Files Community

asoria HF staff commited on Oct 3

Commit

2441f3f

•

1 Parent(s): c77685f

Remove zero GPU code temporaly

Browse files

Files changed (2) hide show

app.py +12 -17
requirements.txt +2 -2

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
-import spaces
 import requests
 import logging
 import duckdb
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from bertopic import BERTopic
-import pandas as pd
 import gradio as gr
 from bertopic.representation import (
     KeyBERTInspired,
@@ -13,10 +12,9 @@ from bertopic.representation import (
 )
 from umap import UMAP
 import numpy as np
-from torch import cuda
-from torch import bfloat16
 from transformers import (
-    # BitsAndBytesConfig,
     AutoTokenizer,
     AutoModelForCausalLM,
     pipeline,
@@ -44,12 +42,12 @@ model_id = "meta-llama/Llama-2-7b-chat-hf"
 device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"
 logging.info(device)
-# bnb_config = BitsAndBytesConfig(
-#     load_in_4bit=True,  # 4-bit quantization
-#     bnb_4bit_quant_type="nf4",  # Normalized float 4
-#     bnb_4bit_use_double_quant=True,  # Second quantization after the first
-#     bnb_4bit_compute_dtype=bfloat16,  # Computation type
-# )
 tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -57,7 +55,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     trust_remote_code=True,
-    # quantization_config=bnb_config,
     device_map="auto",
 )
@@ -113,12 +111,12 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
     return df[column].tolist()
-@spaces.GPU
 def calculate_embeddings(docs):
     return sentence_model.encode(docs, show_progress_bar=True, batch_size=100)
-@spaces.GPU
 def fit_model(base_model, docs, embeddings):
     new_model = BERTopic(
         "english",
@@ -242,9 +240,6 @@ with gr.Blocks() as demo:
         outputs=[topics_df, topics_plot],
     )
-    # TODO: choose num_rows, random, or offset -> By default limit max to 1176 rows
-    # -> From the article, it could be in GPU 1176/sec
     def _resolve_dataset_selection(
         dataset: str, default_subset: str, default_split: str, text_feature
     ):

+# import spaces
 import requests
 import logging
 import duckdb
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from bertopic import BERTopic
 import gradio as gr
 from bertopic.representation import (
     KeyBERTInspired,
 )
 from umap import UMAP
 import numpy as np
+from torch import cuda, bfloat16
 from transformers import (
+    BitsAndBytesConfig,
     AutoTokenizer,
     AutoModelForCausalLM,
     pipeline,
 device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"
 logging.info(device)
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,  # 4-bit quantization
+    bnb_4bit_quant_type="nf4",  # Normalized float 4
+    bnb_4bit_use_double_quant=True,  # Second quantization after the first
+    bnb_4bit_compute_dtype=bfloat16,  # Computation type
+)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     trust_remote_code=True,
+    quantization_config=bnb_config,
     device_map="auto",
 )
     return df[column].tolist()
+# @spaces.GPU
 def calculate_embeddings(docs):
     return sentence_model.encode(docs, show_progress_bar=True, batch_size=100)
+# @spaces.GPU
 def fit_model(base_model, docs, embeddings):
     new_model = BERTopic(
         "english",
         outputs=[topics_df, topics_plot],
     )
     def _resolve_dataset_selection(
         dataset: str, default_subset: str, default_split: str, text_feature
     ):

requirements.txt CHANGED Viewed

@@ -4,8 +4,8 @@ gradio_huggingfacehub_search==0.0.7
 duckdb
 umap-learn
 sentence-transformers
-bitsandbytes-cuda110
-# datamapplot
 bertopic
 pandas
 torch

 duckdb
 umap-learn
 sentence-transformers
+bitsandbytes
+datamapplot
 bertopic
 pandas
 torch