Spaces:
Running
Running
Create demo.py
Browse files
demo.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import duckdb
|
2 |
+
import polars as pl
|
3 |
+
from datasets import load_dataset
|
4 |
+
from model2vec import StaticModel
|
5 |
+
|
6 |
+
# Load a model from the HuggingFace hub (in this case the potion-base-8M model)
|
7 |
+
model_name = "minishlab/potion-base-8M"
|
8 |
+
model = StaticModel.from_pretrained(model_name)
|
9 |
+
|
10 |
+
# Make embeddings
|
11 |
+
ds = load_dataset("fka/awesome-chatgpt-prompts")
|
12 |
+
df = ds["train"].to_polars()
|
13 |
+
embeddings = model.encode(df["act"])
|
14 |
+
df = df.with_columns(pl.Series(embeddings).alias("embeddings"))
|
15 |
+
vector = model.encode("An Ethereum Developer", show_progress_bar=True)
|
16 |
+
duckdb.sql(
|
17 |
+
query=f"""
|
18 |
+
SELECT *
|
19 |
+
FROM df
|
20 |
+
ORDER BY array_cosine_distance(embeddings, {vector.tolist()}::FLOAT[256])
|
21 |
+
LIMIT 10
|
22 |
+
"""
|
23 |
+
).show()
|