Revert "Update embedding model for WebSearch (#437)"
Browse filesThis reverts commit f88542b1121903c1df34b20bd7b8213148e85ca0.
src/lib/server/websearch/sentenceSimilarity.ts
CHANGED
@@ -6,14 +6,16 @@ function innerProduct(tensor1: Tensor, tensor2: Tensor) {
|
|
6 |
return 1.0 - dot(tensor1.data, tensor2.data);
|
7 |
}
|
8 |
|
9 |
-
const extractor = await pipeline("feature-extraction", "Xenova/
|
10 |
|
11 |
export async function findSimilarSentences(
|
12 |
query: string,
|
13 |
sentences: string[],
|
14 |
{ topK = 5 }: { topK: number }
|
15 |
) {
|
16 |
-
|
|
|
|
|
17 |
const output: Tensor = await extractor(input, { pooling: "mean", normalize: true });
|
18 |
|
19 |
const queryTensor: Tensor = output[0];
|
|
|
6 |
return 1.0 - dot(tensor1.data, tensor2.data);
|
7 |
}
|
8 |
|
9 |
+
const extractor = await pipeline("feature-extraction", "Xenova/e5-small-v2");
|
10 |
|
11 |
export async function findSimilarSentences(
|
12 |
query: string,
|
13 |
sentences: string[],
|
14 |
{ topK = 5 }: { topK: number }
|
15 |
) {
|
16 |
+
// this preprocessing step is suggested for e5-small-v2 model
|
17 |
+
// see more: https://huggingface.co/intfloat/e5-small-v2/blob/main/README.md?code=true#L2631
|
18 |
+
const input = [`query: ${query}`, ...sentences.map((s) => `passage: ${s}`)];
|
19 |
const output: Tensor = await extractor(input, { pooling: "mean", normalize: true });
|
20 |
|
21 |
const queryTensor: Tensor = output[0];
|