Backend refactor for streaming endpoints (#444)
Browse files* wip: complete refactor of streaming backend
* working refactoring
* fix missing first token & perf regression in output quality
* lint
* Fix websearch loading from db
* fix loading
* fix invalidate
* remove logs
* fix SSR error
* typo: paragraphs
* fixed save on abort
* lint
* lint
* remove debug log in console
* lint for real
- src/lib/buildPrompt.ts +18 -33
- src/lib/components/OpenWebSearchResults.svelte +5 -5
- src/lib/components/chat/ChatMessage.svelte +15 -10
- src/lib/components/chat/ChatMessages.svelte +5 -17
- src/lib/components/chat/ChatWindow.svelte +2 -4
- src/lib/server/getInit.ts +58 -0
- src/lib/server/websearch/parseWeb.ts +1 -1
- src/lib/server/websearch/runWebSearch.ts +112 -0
- src/lib/types/Message.ts +5 -1
- src/lib/types/MessageUpdate.ts +39 -0
- src/lib/types/WebSearch.ts +7 -33
- src/routes/conversation/[id]/+page.server.ts +0 -19
- src/routes/conversation/[id]/+page.svelte +78 -129
- src/routes/conversation/[id]/+server.ts +181 -173
- src/routes/conversation/[id]/share/+server.ts +1 -1
- src/routes/conversation/[id]/summarize/+server.ts +1 -1
- src/routes/conversation/[id]/web-search/+server.ts +0 -165
- src/routes/r/[id]/+page.server.ts +0 -20
- src/routes/r/[id]/+page.svelte +0 -1
- src/routes/search/[id]/+server.ts +1 -1
src/lib/buildPrompt.ts
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
import type { BackendModel } from "./server/models";
|
2 |
import type { Message } from "./types/Message";
|
3 |
-
import { collections } from "$lib/server/database";
|
4 |
-
import { ObjectId } from "mongodb";
|
5 |
-
import { authCondition } from "./server/auth";
|
6 |
import { format } from "date-fns";
|
|
|
7 |
/**
|
8 |
* Convert [{user: "assistant", content: "hi"}, {user: "user", content: "hello"}] to:
|
9 |
*
|
@@ -14,44 +12,31 @@ interface buildPromptOptions {
|
|
14 |
messages: Pick<Message, "from" | "content">[];
|
15 |
model: BackendModel;
|
16 |
locals?: App.Locals;
|
17 |
-
|
18 |
preprompt?: string;
|
19 |
}
|
20 |
|
21 |
export async function buildPrompt({
|
22 |
messages,
|
23 |
model,
|
24 |
-
|
25 |
-
webSearchId,
|
26 |
preprompt,
|
27 |
}: buildPromptOptions): Promise<string> {
|
28 |
-
if (
|
29 |
-
const
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
if (webSearch.context) {
|
44 |
-
const messagesWithoutLastUsrMsg = messages.slice(0, -1);
|
45 |
-
const lastUserMsg = messages.slice(-1)[0];
|
46 |
-
const currentDate = format(new Date(), "MMMM d, yyyy");
|
47 |
-
messages = [
|
48 |
-
...messagesWithoutLastUsrMsg,
|
49 |
-
{
|
50 |
-
from: "user",
|
51 |
-
content: `Please answer my question "${lastUserMsg.content}" using the supplied context below (paragrpahs from various websites). For the context, today is ${currentDate}: \n=====================\n${webSearch.context}\n=====================\nSo my question is "${lastUserMsg.content}"`,
|
52 |
-
},
|
53 |
-
];
|
54 |
-
}
|
55 |
}
|
56 |
|
57 |
return (
|
|
|
1 |
import type { BackendModel } from "./server/models";
|
2 |
import type { Message } from "./types/Message";
|
|
|
|
|
|
|
3 |
import { format } from "date-fns";
|
4 |
+
import type { WebSearch } from "./types/WebSearch";
|
5 |
/**
|
6 |
* Convert [{user: "assistant", content: "hi"}, {user: "user", content: "hello"}] to:
|
7 |
*
|
|
|
12 |
messages: Pick<Message, "from" | "content">[];
|
13 |
model: BackendModel;
|
14 |
locals?: App.Locals;
|
15 |
+
webSearch?: WebSearch;
|
16 |
preprompt?: string;
|
17 |
}
|
18 |
|
19 |
export async function buildPrompt({
|
20 |
messages,
|
21 |
model,
|
22 |
+
webSearch,
|
|
|
23 |
preprompt,
|
24 |
}: buildPromptOptions): Promise<string> {
|
25 |
+
if (webSearch && webSearch.context) {
|
26 |
+
const messagesWithoutLastUsrMsg = messages.slice(0, -1);
|
27 |
+
const lastUserMsg = messages.slice(-1)[0];
|
28 |
+
const currentDate = format(new Date(), "MMMM d, yyyy");
|
29 |
+
messages = [
|
30 |
+
...messagesWithoutLastUsrMsg,
|
31 |
+
{
|
32 |
+
from: "user",
|
33 |
+
content: `Please answer my question "${lastUserMsg.content}" using the supplied context below (paragraphs from various websites). For the context, today is ${currentDate}:
|
34 |
+
=====================
|
35 |
+
${webSearch.context}
|
36 |
+
=====================
|
37 |
+
So my question is "${lastUserMsg.content}"`,
|
38 |
+
},
|
39 |
+
];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
}
|
41 |
|
42 |
return (
|
src/lib/components/OpenWebSearchResults.svelte
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
<script lang="ts">
|
2 |
-
import type {
|
3 |
import CarbonCaretRight from "~icons/carbon/caret-right";
|
4 |
|
5 |
import CarbonCheckmark from "~icons/carbon/checkmark-filled";
|
@@ -9,11 +9,11 @@
|
|
9 |
|
10 |
export let loading = false;
|
11 |
export let classNames = "";
|
12 |
-
export let webSearchMessages:
|
13 |
|
14 |
let detailsOpen: boolean;
|
15 |
let error: boolean;
|
16 |
-
$: error = webSearchMessages[webSearchMessages.length -
|
17 |
</script>
|
18 |
|
19 |
<details
|
@@ -46,7 +46,7 @@
|
|
46 |
{:else}
|
47 |
<ol>
|
48 |
{#each webSearchMessages as message}
|
49 |
-
{#if message.
|
50 |
<li class="group border-l pb-6 last:!border-transparent last:pb-0 dark:border-gray-800">
|
51 |
<div class="flex items-start">
|
52 |
<div
|
@@ -64,7 +64,7 @@
|
|
64 |
</p>
|
65 |
{/if}
|
66 |
</li>
|
67 |
-
{:else if message.
|
68 |
<li class="group border-l pb-6 last:!border-transparent last:pb-0 dark:border-gray-800">
|
69 |
<div class="flex items-start">
|
70 |
<CarbonError
|
|
|
1 |
<script lang="ts">
|
2 |
+
import type { WebSearchUpdate } from "$lib/types/MessageUpdate";
|
3 |
import CarbonCaretRight from "~icons/carbon/caret-right";
|
4 |
|
5 |
import CarbonCheckmark from "~icons/carbon/checkmark-filled";
|
|
|
9 |
|
10 |
export let loading = false;
|
11 |
export let classNames = "";
|
12 |
+
export let webSearchMessages: WebSearchUpdate[] = [];
|
13 |
|
14 |
let detailsOpen: boolean;
|
15 |
let error: boolean;
|
16 |
+
$: error = webSearchMessages[webSearchMessages.length - 1]?.messageType === "error";
|
17 |
</script>
|
18 |
|
19 |
<details
|
|
|
46 |
{:else}
|
47 |
<ol>
|
48 |
{#each webSearchMessages as message}
|
49 |
+
{#if message.messageType === "update"}
|
50 |
<li class="group border-l pb-6 last:!border-transparent last:pb-0 dark:border-gray-800">
|
51 |
<div class="flex items-start">
|
52 |
<div
|
|
|
64 |
</p>
|
65 |
{/if}
|
66 |
</li>
|
67 |
+
{:else if message.messageType === "error"}
|
68 |
<li class="group border-l pb-6 last:!border-transparent last:pb-0 dark:border-gray-800">
|
69 |
<div class="flex items-start">
|
70 |
<CarbonError
|
src/lib/components/chat/ChatMessage.svelte
CHANGED
@@ -14,9 +14,9 @@
|
|
14 |
import CarbonThumbsDown from "~icons/carbon/thumbs-down";
|
15 |
import { PUBLIC_SEP_TOKEN } from "$lib/constants/publicSepToken";
|
16 |
import type { Model } from "$lib/types/Model";
|
17 |
-
import type { WebSearchMessage, WebSearchMessageSources } from "$lib/types/WebSearch";
|
18 |
|
19 |
import OpenWebSearchResults from "../OpenWebSearchResults.svelte";
|
|
|
20 |
|
21 |
function sanitizeMd(md: string) {
|
22 |
let ret = md
|
@@ -48,7 +48,7 @@
|
|
48 |
export let readOnly = false;
|
49 |
export let isTapped = false;
|
50 |
|
51 |
-
export let webSearchMessages:
|
52 |
|
53 |
const dispatch = createEventDispatcher<{
|
54 |
retry: { content: string; id: Message["id"] };
|
@@ -104,18 +104,23 @@
|
|
104 |
}
|
105 |
});
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
$: downloadLink =
|
108 |
message.from === "user" ? `${$page.url.pathname}/message/${message.id}/prompt` : undefined;
|
109 |
|
110 |
let webSearchIsDone = true;
|
111 |
|
112 |
$: webSearchIsDone =
|
113 |
-
|
114 |
-
webSearchMessages[webSearchMessages.length - 1].type === "result";
|
115 |
|
116 |
-
$: webSearchSources =
|
117 |
-
|
118 |
-
|
119 |
</script>
|
120 |
|
121 |
{#if message.from === "assistant"}
|
@@ -132,11 +137,11 @@
|
|
132 |
<div
|
133 |
class="relative min-h-[calc(2rem+theme(spacing[3.5])*2)] min-w-[60px] break-words rounded-2xl border border-gray-100 bg-gradient-to-br from-gray-50 px-5 py-3.5 text-gray-600 prose-pre:my-2 dark:border-gray-800 dark:from-gray-800/40 dark:text-gray-300"
|
134 |
>
|
135 |
-
{#if
|
136 |
<OpenWebSearchResults
|
137 |
classNames={tokens.length ? "mb-3.5" : ""}
|
138 |
-
{
|
139 |
-
loading={!
|
140 |
/>
|
141 |
{/if}
|
142 |
{#if !message.content && (webSearchIsDone || (webSearchMessages && webSearchMessages.length === 0))}
|
|
|
14 |
import CarbonThumbsDown from "~icons/carbon/thumbs-down";
|
15 |
import { PUBLIC_SEP_TOKEN } from "$lib/constants/publicSepToken";
|
16 |
import type { Model } from "$lib/types/Model";
|
|
|
17 |
|
18 |
import OpenWebSearchResults from "../OpenWebSearchResults.svelte";
|
19 |
+
import type { WebSearchUpdate } from "$lib/types/MessageUpdate";
|
20 |
|
21 |
function sanitizeMd(md: string) {
|
22 |
let ret = md
|
|
|
48 |
export let readOnly = false;
|
49 |
export let isTapped = false;
|
50 |
|
51 |
+
export let webSearchMessages: WebSearchUpdate[];
|
52 |
|
53 |
const dispatch = createEventDispatcher<{
|
54 |
retry: { content: string; id: Message["id"] };
|
|
|
104 |
}
|
105 |
});
|
106 |
|
107 |
+
let searchUpdates: WebSearchUpdate[] = [];
|
108 |
+
|
109 |
+
$: searchUpdates = ((webSearchMessages.length > 0
|
110 |
+
? webSearchMessages
|
111 |
+
: message.updates?.filter(({ type }) => type === "webSearch")) ?? []) as WebSearchUpdate[];
|
112 |
+
|
113 |
$: downloadLink =
|
114 |
message.from === "user" ? `${$page.url.pathname}/message/${message.id}/prompt` : undefined;
|
115 |
|
116 |
let webSearchIsDone = true;
|
117 |
|
118 |
$: webSearchIsDone =
|
119 |
+
searchUpdates.length > 0 && searchUpdates[searchUpdates.length - 1].messageType === "sources";
|
|
|
120 |
|
121 |
+
$: webSearchSources =
|
122 |
+
searchUpdates &&
|
123 |
+
searchUpdates?.filter(({ messageType }) => messageType === "sources")?.[0]?.sources;
|
124 |
</script>
|
125 |
|
126 |
{#if message.from === "assistant"}
|
|
|
137 |
<div
|
138 |
class="relative min-h-[calc(2rem+theme(spacing[3.5])*2)] min-w-[60px] break-words rounded-2xl border border-gray-100 bg-gradient-to-br from-gray-50 px-5 py-3.5 text-gray-600 prose-pre:my-2 dark:border-gray-800 dark:from-gray-800/40 dark:text-gray-300"
|
139 |
>
|
140 |
+
{#if searchUpdates && searchUpdates.length > 0}
|
141 |
<OpenWebSearchResults
|
142 |
classNames={tokens.length ? "mb-3.5" : ""}
|
143 |
+
webSearchMessages={searchUpdates}
|
144 |
+
loading={!(searchUpdates[searchUpdates.length - 1]?.messageType === "sources")}
|
145 |
/>
|
146 |
{/if}
|
147 |
{#if !message.content && (webSearchIsDone || (webSearchMessages && webSearchMessages.length === 0))}
|
src/lib/components/chat/ChatMessages.svelte
CHANGED
@@ -8,7 +8,8 @@
|
|
8 |
import type { LayoutData } from "../../../routes/$types";
|
9 |
import ChatIntroduction from "./ChatIntroduction.svelte";
|
10 |
import ChatMessage from "./ChatMessage.svelte";
|
11 |
-
import type {
|
|
|
12 |
|
13 |
export let messages: Message[];
|
14 |
export let loading: boolean;
|
@@ -18,12 +19,10 @@
|
|
18 |
export let settings: LayoutData["settings"];
|
19 |
export let models: Model[];
|
20 |
export let readOnly: boolean;
|
21 |
-
export let searches: Record<string, WebSearchMessage[]>;
|
22 |
|
23 |
-
let webSearchArray: Array<WebSearchMessage[] | undefined> = [];
|
24 |
let chatContainer: HTMLElement;
|
25 |
|
26 |
-
export let webSearchMessages:
|
27 |
|
28 |
async function scrollToBottom() {
|
29 |
await tick();
|
@@ -31,20 +30,9 @@
|
|
31 |
}
|
32 |
|
33 |
// If last message is from user, scroll to bottom
|
34 |
-
$: if (messages[messages.length - 1]?.from === "user") {
|
35 |
scrollToBottom();
|
36 |
}
|
37 |
-
|
38 |
-
$: messages,
|
39 |
-
(webSearchArray = messages.map((message, idx) => {
|
40 |
-
if (message.webSearchId) {
|
41 |
-
return searches[message.webSearchId] ?? [];
|
42 |
-
} else if (idx === messages.length - 1) {
|
43 |
-
return webSearchMessages;
|
44 |
-
} else {
|
45 |
-
return [];
|
46 |
-
}
|
47 |
-
}));
|
48 |
</script>
|
49 |
|
50 |
<div
|
@@ -60,7 +48,7 @@
|
|
60 |
{isAuthor}
|
61 |
{readOnly}
|
62 |
model={currentModel}
|
63 |
-
webSearchMessages={
|
64 |
on:retry
|
65 |
on:vote
|
66 |
/>
|
|
|
8 |
import type { LayoutData } from "../../../routes/$types";
|
9 |
import ChatIntroduction from "./ChatIntroduction.svelte";
|
10 |
import ChatMessage from "./ChatMessage.svelte";
|
11 |
+
import type { WebSearchUpdate } from "$lib/types/MessageUpdate";
|
12 |
+
import { browser } from "$app/environment";
|
13 |
|
14 |
export let messages: Message[];
|
15 |
export let loading: boolean;
|
|
|
19 |
export let settings: LayoutData["settings"];
|
20 |
export let models: Model[];
|
21 |
export let readOnly: boolean;
|
|
|
22 |
|
|
|
23 |
let chatContainer: HTMLElement;
|
24 |
|
25 |
+
export let webSearchMessages: WebSearchUpdate[] = [];
|
26 |
|
27 |
async function scrollToBottom() {
|
28 |
await tick();
|
|
|
30 |
}
|
31 |
|
32 |
// If last message is from user, scroll to bottom
|
33 |
+
$: if (browser && messages[messages.length - 1]?.from === "user") {
|
34 |
scrollToBottom();
|
35 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
</script>
|
37 |
|
38 |
<div
|
|
|
48 |
{isAuthor}
|
49 |
{readOnly}
|
50 |
model={currentModel}
|
51 |
+
webSearchMessages={i === messages.length - 1 ? webSearchMessages : []}
|
52 |
on:retry
|
53 |
on:vote
|
54 |
/>
|
src/lib/components/chat/ChatWindow.svelte
CHANGED
@@ -13,8 +13,8 @@
|
|
13 |
import type { Model } from "$lib/types/Model";
|
14 |
import type { LayoutData } from "../../../routes/$types";
|
15 |
import WebSearchToggle from "../WebSearchToggle.svelte";
|
16 |
-
import type { WebSearchMessage } from "$lib/types/WebSearch";
|
17 |
import LoginModal from "../LoginModal.svelte";
|
|
|
18 |
|
19 |
export let messages: Message[] = [];
|
20 |
export let loading = false;
|
@@ -23,8 +23,7 @@
|
|
23 |
export let currentModel: Model;
|
24 |
export let models: Model[];
|
25 |
export let settings: LayoutData["settings"];
|
26 |
-
export let webSearchMessages:
|
27 |
-
export let searches: Record<string, WebSearchMessage[]> = {};
|
28 |
|
29 |
export let loginRequired = false;
|
30 |
$: isReadOnly = !models.some((model) => model.id === currentModel.id);
|
@@ -60,7 +59,6 @@
|
|
60 |
readOnly={isReadOnly}
|
61 |
isAuthor={!shared}
|
62 |
{webSearchMessages}
|
63 |
-
{searches}
|
64 |
on:message
|
65 |
on:vote
|
66 |
on:retry={(ev) => {
|
|
|
13 |
import type { Model } from "$lib/types/Model";
|
14 |
import type { LayoutData } from "../../../routes/$types";
|
15 |
import WebSearchToggle from "../WebSearchToggle.svelte";
|
|
|
16 |
import LoginModal from "../LoginModal.svelte";
|
17 |
+
import type { WebSearchUpdate } from "$lib/types/MessageUpdate";
|
18 |
|
19 |
export let messages: Message[] = [];
|
20 |
export let loading = false;
|
|
|
23 |
export let currentModel: Model;
|
24 |
export let models: Model[];
|
25 |
export let settings: LayoutData["settings"];
|
26 |
+
export let webSearchMessages: WebSearchUpdate[] = [];
|
|
|
27 |
|
28 |
export let loginRequired = false;
|
29 |
$: isReadOnly = !models.some((model) => model.id === currentModel.id);
|
|
|
59 |
readOnly={isReadOnly}
|
60 |
isAuthor={!shared}
|
61 |
{webSearchMessages}
|
|
|
62 |
on:message
|
63 |
on:vote
|
64 |
on:retry={(ev) => {
|
src/lib/server/getInit.ts
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { Options, RequestArgs } from "@huggingface/inference";
|
2 |
+
|
3 |
+
const HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
|
4 |
+
|
5 |
+
/**
|
6 |
+
* Helper that prepares request arguments
|
7 |
+
*/
|
8 |
+
export function makeRequestOptions(
|
9 |
+
args: RequestArgs & {
|
10 |
+
data?: Blob | ArrayBuffer;
|
11 |
+
stream?: boolean;
|
12 |
+
},
|
13 |
+
options?: Options & {
|
14 |
+
/** For internal HF use, which is why it's not exposed in {@link Options} */
|
15 |
+
includeCredentials?: boolean;
|
16 |
+
}
|
17 |
+
): { url: string; info: RequestInit } {
|
18 |
+
const { model, accessToken, ...otherArgs } = args;
|
19 |
+
|
20 |
+
const headers: Record<string, string> = {};
|
21 |
+
if (accessToken) {
|
22 |
+
headers["Authorization"] = `Bearer ${accessToken}`;
|
23 |
+
}
|
24 |
+
|
25 |
+
const binary = "data" in args && !!args.data;
|
26 |
+
|
27 |
+
if (!binary) {
|
28 |
+
headers["Content-Type"] = "application/json";
|
29 |
+
} else {
|
30 |
+
if (options?.wait_for_model) {
|
31 |
+
headers["X-Wait-For-Model"] = "true";
|
32 |
+
}
|
33 |
+
if (options?.use_cache === false) {
|
34 |
+
headers["X-Use-Cache"] = "false";
|
35 |
+
}
|
36 |
+
if (options?.dont_load_model) {
|
37 |
+
headers["X-Load-Model"] = "0";
|
38 |
+
}
|
39 |
+
}
|
40 |
+
|
41 |
+
const url =
|
42 |
+
/^http(s?):/.test(model) || model.startsWith("/")
|
43 |
+
? model
|
44 |
+
: `${HF_INFERENCE_API_BASE_URL}${model}`;
|
45 |
+
const info: RequestInit = {
|
46 |
+
headers,
|
47 |
+
method: "POST",
|
48 |
+
body: binary
|
49 |
+
? args.data
|
50 |
+
: JSON.stringify({
|
51 |
+
...otherArgs,
|
52 |
+
options,
|
53 |
+
}),
|
54 |
+
credentials: options?.includeCredentials ? "include" : "same-origin",
|
55 |
+
};
|
56 |
+
|
57 |
+
return { url, info };
|
58 |
+
}
|
src/lib/server/websearch/parseWeb.ts
CHANGED
@@ -5,7 +5,7 @@ export async function parseWeb(url: string) {
|
|
5 |
setTimeout(() => abortController.abort(), 10000);
|
6 |
const htmlString = await fetch(url, { signal: abortController.signal })
|
7 |
.then((response) => response.text())
|
8 |
-
.catch(
|
9 |
|
10 |
const virtualConsole = new VirtualConsole();
|
11 |
virtualConsole.on("error", () => {
|
|
|
5 |
setTimeout(() => abortController.abort(), 10000);
|
6 |
const htmlString = await fetch(url, { signal: abortController.signal })
|
7 |
.then((response) => response.text())
|
8 |
+
.catch();
|
9 |
|
10 |
const virtualConsole = new VirtualConsole();
|
11 |
virtualConsole.on("error", () => {
|
src/lib/server/websearch/runWebSearch.ts
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { searchWeb } from "$lib/server/websearch/searchWeb";
|
2 |
+
import type { Message } from "$lib/types/Message";
|
3 |
+
import type { WebSearch, WebSearchSource } from "$lib/types/WebSearch";
|
4 |
+
import { generateQuery } from "$lib/server/websearch/generateQuery";
|
5 |
+
import { parseWeb } from "$lib/server/websearch/parseWeb";
|
6 |
+
import { chunk } from "$lib/utils/chunk";
|
7 |
+
import { findSimilarSentences } from "$lib/server/websearch/sentenceSimilarity";
|
8 |
+
import type { Conversation } from "$lib/types/Conversation";
|
9 |
+
import type { MessageUpdate } from "$lib/types/MessageUpdate";
|
10 |
+
|
11 |
+
const MAX_N_PAGES_SCRAPE = 10 as const;
|
12 |
+
const MAX_N_PAGES_EMBED = 5 as const;
|
13 |
+
|
14 |
+
export async function runWebSearch(
|
15 |
+
conv: Conversation,
|
16 |
+
prompt: string,
|
17 |
+
updatePad: (upd: MessageUpdate) => void
|
18 |
+
) {
|
19 |
+
const messages = (() => {
|
20 |
+
return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
|
21 |
+
})() satisfies Message[];
|
22 |
+
|
23 |
+
const webSearch: WebSearch = {
|
24 |
+
prompt: prompt,
|
25 |
+
searchQuery: "",
|
26 |
+
results: [],
|
27 |
+
context: "",
|
28 |
+
contextSources: [],
|
29 |
+
createdAt: new Date(),
|
30 |
+
updatedAt: new Date(),
|
31 |
+
};
|
32 |
+
|
33 |
+
function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
|
34 |
+
updatePad({ type: "webSearch", messageType: type ?? "update", message: message, args: args });
|
35 |
+
}
|
36 |
+
|
37 |
+
try {
|
38 |
+
webSearch.searchQuery = await generateQuery(messages);
|
39 |
+
appendUpdate("Searching Google", [webSearch.searchQuery]);
|
40 |
+
const results = await searchWeb(webSearch.searchQuery);
|
41 |
+
webSearch.results =
|
42 |
+
(results.organic_results &&
|
43 |
+
results.organic_results.map((el: { title: string; link: string }) => {
|
44 |
+
const { title, link } = el;
|
45 |
+
const { hostname } = new URL(link);
|
46 |
+
return { title, link, hostname };
|
47 |
+
})) ??
|
48 |
+
[];
|
49 |
+
webSearch.results = webSearch.results
|
50 |
+
.filter(({ link }) => !link.includes("youtube.com")) // filter out youtube links
|
51 |
+
.slice(0, MAX_N_PAGES_SCRAPE); // limit to first 10 links only
|
52 |
+
|
53 |
+
let paragraphChunks: { source: WebSearchSource; text: string }[] = [];
|
54 |
+
if (webSearch.results.length > 0) {
|
55 |
+
appendUpdate("Browsing results");
|
56 |
+
const promises = webSearch.results.map(async (result) => {
|
57 |
+
const { link } = result;
|
58 |
+
let text = "";
|
59 |
+
try {
|
60 |
+
text = await parseWeb(link);
|
61 |
+
appendUpdate("Browsing webpage", [link]);
|
62 |
+
} catch (e) {
|
63 |
+
console.error(`Error parsing webpage "${link}"`, e);
|
64 |
+
}
|
65 |
+
const CHUNK_CAR_LEN = 512;
|
66 |
+
const MAX_N_CHUNKS = 100;
|
67 |
+
const texts = chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS);
|
68 |
+
return texts.map((t) => ({ source: result, text: t }));
|
69 |
+
});
|
70 |
+
const nestedParagraphChunks = (await Promise.all(promises)).slice(0, MAX_N_PAGES_EMBED);
|
71 |
+
paragraphChunks = nestedParagraphChunks.flat();
|
72 |
+
if (!paragraphChunks.length) {
|
73 |
+
throw new Error("No text found on the first 5 results");
|
74 |
+
}
|
75 |
+
} else {
|
76 |
+
throw new Error("No results found for this search query");
|
77 |
+
}
|
78 |
+
|
79 |
+
appendUpdate("Extracting relevant information");
|
80 |
+
const topKClosestParagraphs = 8;
|
81 |
+
const texts = paragraphChunks.map(({ text }) => text);
|
82 |
+
const indices = await findSimilarSentences(prompt, texts, {
|
83 |
+
topK: topKClosestParagraphs,
|
84 |
+
});
|
85 |
+
webSearch.context = indices.map((idx) => texts[idx]).join("");
|
86 |
+
|
87 |
+
const usedSources = new Set<string>();
|
88 |
+
for (const idx of indices) {
|
89 |
+
const { source } = paragraphChunks[idx];
|
90 |
+
if (!usedSources.has(source.link)) {
|
91 |
+
usedSources.add(source.link);
|
92 |
+
webSearch.contextSources.push(source);
|
93 |
+
updatePad({
|
94 |
+
type: "webSearch",
|
95 |
+
messageType: "sources",
|
96 |
+
message: "sources",
|
97 |
+
sources: webSearch.contextSources,
|
98 |
+
});
|
99 |
+
}
|
100 |
+
}
|
101 |
+
} catch (searchError) {
|
102 |
+
if (searchError instanceof Error) {
|
103 |
+
appendUpdate(
|
104 |
+
"An error occurred with the web search",
|
105 |
+
[JSON.stringify(searchError.message)],
|
106 |
+
"error"
|
107 |
+
);
|
108 |
+
}
|
109 |
+
}
|
110 |
+
|
111 |
+
return webSearch;
|
112 |
+
}
|
src/lib/types/Message.ts
CHANGED
@@ -1,9 +1,13 @@
|
|
|
|
1 |
import type { Timestamps } from "./Timestamps";
|
|
|
2 |
|
3 |
export type Message = Partial<Timestamps> & {
|
4 |
from: "user" | "assistant";
|
5 |
id: ReturnType<typeof crypto.randomUUID>;
|
6 |
content: string;
|
7 |
-
|
|
|
|
|
8 |
score?: -1 | 0 | 1;
|
9 |
};
|
|
|
1 |
+
import type { MessageUpdate } from "./MessageUpdate";
|
2 |
import type { Timestamps } from "./Timestamps";
|
3 |
+
import type { WebSearch } from "./WebSearch";
|
4 |
|
5 |
export type Message = Partial<Timestamps> & {
|
6 |
from: "user" | "assistant";
|
7 |
id: ReturnType<typeof crypto.randomUUID>;
|
8 |
content: string;
|
9 |
+
updates?: MessageUpdate[];
|
10 |
+
webSearchId?: WebSearch["_id"]; // legacy version
|
11 |
+
webSearch?: WebSearch;
|
12 |
score?: -1 | 0 | 1;
|
13 |
};
|
src/lib/types/MessageUpdate.ts
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { WebSearchSource } from "./WebSearch";
|
2 |
+
|
3 |
+
export type FinalAnswer = {
|
4 |
+
type: "finalAnswer";
|
5 |
+
text: string;
|
6 |
+
};
|
7 |
+
|
8 |
+
export type TextStreamUpdate = {
|
9 |
+
type: "stream";
|
10 |
+
token: string;
|
11 |
+
};
|
12 |
+
|
13 |
+
export type AgentUpdate = {
|
14 |
+
type: "agent";
|
15 |
+
agent: string;
|
16 |
+
content: string;
|
17 |
+
binary?: Blob;
|
18 |
+
};
|
19 |
+
|
20 |
+
export type WebSearchUpdate = {
|
21 |
+
type: "webSearch";
|
22 |
+
messageType: "update" | "error" | "sources";
|
23 |
+
message: string;
|
24 |
+
args?: string[];
|
25 |
+
sources?: WebSearchSource[];
|
26 |
+
};
|
27 |
+
|
28 |
+
export type StatusUpdate = {
|
29 |
+
type: "status";
|
30 |
+
status: "started" | "pending" | "finished" | "error";
|
31 |
+
message?: string;
|
32 |
+
};
|
33 |
+
|
34 |
+
export type MessageUpdate =
|
35 |
+
| FinalAnswer
|
36 |
+
| TextStreamUpdate
|
37 |
+
| AgentUpdate
|
38 |
+
| WebSearchUpdate
|
39 |
+
| StatusUpdate;
|
src/lib/types/WebSearch.ts
CHANGED
@@ -2,16 +2,9 @@ import type { ObjectId } from "mongodb";
|
|
2 |
import type { Conversation } from "./Conversation";
|
3 |
import type { Timestamps } from "./Timestamps";
|
4 |
|
5 |
-
export interface WebSearchSource {
|
6 |
-
title: string;
|
7 |
-
link: string;
|
8 |
-
hostname: string;
|
9 |
-
}
|
10 |
-
|
11 |
export interface WebSearch extends Timestamps {
|
12 |
-
_id
|
13 |
-
|
14 |
-
convId: Conversation["_id"];
|
15 |
|
16 |
prompt: string;
|
17 |
|
@@ -19,34 +12,15 @@ export interface WebSearch extends Timestamps {
|
|
19 |
results: WebSearchSource[];
|
20 |
context: string;
|
21 |
contextSources: WebSearchSource[];
|
22 |
-
|
23 |
-
messages: WebSearchMessage[];
|
24 |
}
|
25 |
|
26 |
-
export
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
}
|
31 |
-
|
32 |
-
export type WebSearchMessageError = {
|
33 |
-
type: "error";
|
34 |
-
message: string;
|
35 |
-
args?: string[];
|
36 |
-
};
|
37 |
-
|
38 |
-
export type WebSearchMessageResult = {
|
39 |
-
type: "result";
|
40 |
-
id: string;
|
41 |
-
};
|
42 |
|
43 |
export type WebSearchMessageSources = {
|
44 |
type: "sources";
|
45 |
sources: WebSearchSource[];
|
46 |
};
|
47 |
-
|
48 |
-
export type WebSearchMessage =
|
49 |
-
| WebSearchMessageUpdate
|
50 |
-
| WebSearchMessageResult
|
51 |
-
| WebSearchMessageError
|
52 |
-
| WebSearchMessageSources;
|
|
|
2 |
import type { Conversation } from "./Conversation";
|
3 |
import type { Timestamps } from "./Timestamps";
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
export interface WebSearch extends Timestamps {
|
6 |
+
_id?: ObjectId;
|
7 |
+
convId?: Conversation["_id"];
|
|
|
8 |
|
9 |
prompt: string;
|
10 |
|
|
|
12 |
results: WebSearchSource[];
|
13 |
context: string;
|
14 |
contextSources: WebSearchSource[];
|
|
|
|
|
15 |
}
|
16 |
|
17 |
+
export interface WebSearchSource {
|
18 |
+
title: string;
|
19 |
+
link: string;
|
20 |
+
hostname: string;
|
21 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
export type WebSearchMessageSources = {
|
24 |
type: "sources";
|
25 |
sources: WebSearchSource[];
|
26 |
};
|
|
|
|
|
|
|
|
|
|
|
|
src/routes/conversation/[id]/+page.server.ts
CHANGED
@@ -2,7 +2,6 @@ import { collections } from "$lib/server/database";
|
|
2 |
import { ObjectId } from "mongodb";
|
3 |
import { error } from "@sveltejs/kit";
|
4 |
import { authCondition } from "$lib/server/auth";
|
5 |
-
import type { WebSearchMessageResult, WebSearchMessageSources } from "$lib/types/WebSearch";
|
6 |
import { UrlDependency } from "$lib/types/UrlDependency";
|
7 |
|
8 |
export const load = async ({ params, depends, locals }) => {
|
@@ -30,27 +29,9 @@ export const load = async ({ params, depends, locals }) => {
|
|
30 |
throw error(404, "Conversation not found.");
|
31 |
}
|
32 |
|
33 |
-
const webSearchesId = conversation.messages
|
34 |
-
.filter((message) => message.webSearchId)
|
35 |
-
.map((message) => new ObjectId(message.webSearchId));
|
36 |
-
|
37 |
-
const results = await collections.webSearches.find({ _id: { $in: webSearchesId } }).toArray();
|
38 |
-
|
39 |
-
const searches = Object.fromEntries(
|
40 |
-
results.map((x) => [
|
41 |
-
x._id.toString(),
|
42 |
-
[
|
43 |
-
...x.messages,
|
44 |
-
{ type: "sources", sources: x.contextSources ?? [] } satisfies WebSearchMessageSources,
|
45 |
-
{ type: "result", id: x._id.toString() } satisfies WebSearchMessageResult,
|
46 |
-
],
|
47 |
-
])
|
48 |
-
);
|
49 |
-
|
50 |
return {
|
51 |
messages: conversation.messages,
|
52 |
title: conversation.title,
|
53 |
model: conversation.model,
|
54 |
-
searches,
|
55 |
};
|
56 |
};
|
|
|
2 |
import { ObjectId } from "mongodb";
|
3 |
import { error } from "@sveltejs/kit";
|
4 |
import { authCondition } from "$lib/server/auth";
|
|
|
5 |
import { UrlDependency } from "$lib/types/UrlDependency";
|
6 |
|
7 |
export const load = async ({ params, depends, locals }) => {
|
|
|
29 |
throw error(404, "Conversation not found.");
|
30 |
}
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
return {
|
33 |
messages: conversation.messages,
|
34 |
title: conversation.title,
|
35 |
model: conversation.model,
|
|
|
36 |
};
|
37 |
};
|
src/routes/conversation/[id]/+page.svelte
CHANGED
@@ -4,7 +4,6 @@
|
|
4 |
import { pendingMessageIdToRetry } from "$lib/stores/pendingMessageIdToRetry";
|
5 |
import { onMount } from "svelte";
|
6 |
import { page } from "$app/stores";
|
7 |
-
import { textGenerationStream, type Options } from "@huggingface/inference";
|
8 |
import { invalidate } from "$app/navigation";
|
9 |
import { base } from "$app/paths";
|
10 |
import { shareConversation } from "$lib/shareConversation";
|
@@ -13,9 +12,9 @@
|
|
13 |
import { randomUUID } from "$lib/utils/randomUuid";
|
14 |
import { findCurrentModel } from "$lib/utils/models";
|
15 |
import { webSearchParameters } from "$lib/stores/webSearchParameters";
|
16 |
-
import type { WebSearchMessage } from "$lib/types/WebSearch";
|
17 |
import type { Message } from "$lib/types/Message";
|
18 |
import { PUBLIC_APP_DISCLAIMER } from "$env/static/public";
|
|
|
19 |
|
20 |
export let data;
|
21 |
|
@@ -23,7 +22,7 @@
|
|
23 |
let lastLoadedMessages = data.messages;
|
24 |
let isAborted = false;
|
25 |
|
26 |
-
let webSearchMessages:
|
27 |
|
28 |
// Since we modify the messages array locally, we don't want to reset it if an old version is passed
|
29 |
$: if (data.messages !== lastLoadedMessages) {
|
@@ -35,91 +34,13 @@
|
|
35 |
let pending = false;
|
36 |
let loginRequired = false;
|
37 |
|
38 |
-
async function getTextGenerationStream(
|
39 |
-
inputs: string,
|
40 |
-
messageId: string,
|
41 |
-
isRetry = false,
|
42 |
-
webSearchId?: string
|
43 |
-
) {
|
44 |
-
let conversationId = $page.params.id;
|
45 |
-
const responseId = randomUUID();
|
46 |
-
|
47 |
-
const response = textGenerationStream(
|
48 |
-
{
|
49 |
-
model: $page.url.href,
|
50 |
-
inputs,
|
51 |
-
parameters: {
|
52 |
-
...data.models.find((m) => m.id === data.model)?.parameters,
|
53 |
-
return_full_text: false,
|
54 |
-
},
|
55 |
-
},
|
56 |
-
{
|
57 |
-
id: messageId,
|
58 |
-
response_id: responseId,
|
59 |
-
is_retry: isRetry,
|
60 |
-
use_cache: false,
|
61 |
-
web_search_id: webSearchId,
|
62 |
-
} as Options
|
63 |
-
);
|
64 |
-
|
65 |
-
for await (const output of response) {
|
66 |
-
pending = false;
|
67 |
-
|
68 |
-
if (!output) {
|
69 |
-
break;
|
70 |
-
}
|
71 |
-
|
72 |
-
if (conversationId !== $page.params.id) {
|
73 |
-
fetch(`${base}/conversation/${conversationId}/stop-generating`, {
|
74 |
-
method: "POST",
|
75 |
-
}).catch(console.error);
|
76 |
-
break;
|
77 |
-
}
|
78 |
-
|
79 |
-
if (isAborted) {
|
80 |
-
isAborted = false;
|
81 |
-
fetch(`${base}/conversation/${conversationId}/stop-generating`, {
|
82 |
-
method: "POST",
|
83 |
-
}).catch(console.error);
|
84 |
-
break;
|
85 |
-
}
|
86 |
-
|
87 |
-
// final message
|
88 |
-
if (output.generated_text) {
|
89 |
-
const lastMessage = messages[messages.length - 1];
|
90 |
-
|
91 |
-
if (lastMessage) {
|
92 |
-
lastMessage.content = output.generated_text;
|
93 |
-
lastMessage.webSearchId = webSearchId;
|
94 |
-
messages = [...messages];
|
95 |
-
}
|
96 |
-
break;
|
97 |
-
}
|
98 |
-
|
99 |
-
if (!output.token.special) {
|
100 |
-
const lastMessage = messages[messages.length - 1];
|
101 |
-
|
102 |
-
if (lastMessage?.from !== "assistant") {
|
103 |
-
// First token has a space at the beginning, trim it
|
104 |
-
messages = [
|
105 |
-
...messages,
|
106 |
-
// id doesn't match the backend id but it's not important for assistant messages
|
107 |
-
{ from: "assistant", content: output.token.text.trimStart(), id: responseId },
|
108 |
-
];
|
109 |
-
} else {
|
110 |
-
lastMessage.content += output.token.text;
|
111 |
-
messages = [...messages];
|
112 |
-
}
|
113 |
-
}
|
114 |
-
}
|
115 |
-
}
|
116 |
-
|
117 |
async function summarizeTitle(id: string) {
|
118 |
await fetch(`${base}/conversation/${id}/summarize`, {
|
119 |
method: "POST",
|
120 |
});
|
121 |
}
|
122 |
|
|
|
123 |
async function writeMessage(message: string, messageId = randomUUID()) {
|
124 |
if (!message.trim()) return;
|
125 |
|
@@ -128,76 +49,105 @@
|
|
128 |
loading = true;
|
129 |
pending = true;
|
130 |
|
|
|
|
|
131 |
let retryMessageIndex = messages.findIndex((msg) => msg.id === messageId);
|
132 |
const isRetry = retryMessageIndex !== -1;
|
|
|
133 |
if (!isRetry) {
|
134 |
retryMessageIndex = messages.length;
|
135 |
}
|
136 |
|
|
|
137 |
messages = [
|
138 |
...messages.slice(0, retryMessageIndex),
|
139 |
{ from: "user", content: message, id: messageId },
|
140 |
];
|
141 |
|
142 |
-
|
143 |
-
if ($webSearchParameters.useSearch) {
|
144 |
-
webSearchMessages = [];
|
145 |
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
161 |
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
164 |
return;
|
165 |
}
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
if (done) {
|
171 |
-
reader.cancel();
|
172 |
-
return;
|
173 |
-
}
|
174 |
-
|
175 |
-
try {
|
176 |
-
webSearchMessages = (JSON.parse(value) as { messages: WebSearchMessage[] })
|
177 |
-
.messages;
|
178 |
-
} catch (parseError) {
|
179 |
-
// in case of parsing error we wait for the next message
|
180 |
-
return;
|
181 |
-
}
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
}
|
190 |
-
})
|
191 |
-
|
192 |
-
|
193 |
-
}
|
194 |
-
|
|
|
195 |
}
|
196 |
|
197 |
-
|
198 |
-
|
199 |
webSearchMessages = [];
|
200 |
|
|
|
|
|
201 |
if (messages.filter((m) => m.from === "user").length === 1) {
|
202 |
summarizeTitle($page.params.id)
|
203 |
.then(() => invalidate(UrlDependency.ConversationList))
|
@@ -283,7 +233,6 @@
|
|
283 |
{pending}
|
284 |
{messages}
|
285 |
bind:webSearchMessages
|
286 |
-
searches={{ ...data.searches }}
|
287 |
on:message={(event) => writeMessage(event.detail)}
|
288 |
on:retry={(event) => writeMessage(event.detail.content, event.detail.id)}
|
289 |
on:vote={(event) => voteMessage(event.detail.score, event.detail.id)}
|
|
|
4 |
import { pendingMessageIdToRetry } from "$lib/stores/pendingMessageIdToRetry";
|
5 |
import { onMount } from "svelte";
|
6 |
import { page } from "$app/stores";
|
|
|
7 |
import { invalidate } from "$app/navigation";
|
8 |
import { base } from "$app/paths";
|
9 |
import { shareConversation } from "$lib/shareConversation";
|
|
|
12 |
import { randomUUID } from "$lib/utils/randomUuid";
|
13 |
import { findCurrentModel } from "$lib/utils/models";
|
14 |
import { webSearchParameters } from "$lib/stores/webSearchParameters";
|
|
|
15 |
import type { Message } from "$lib/types/Message";
|
16 |
import { PUBLIC_APP_DISCLAIMER } from "$env/static/public";
|
17 |
+
import type { MessageUpdate, WebSearchUpdate } from "$lib/types/MessageUpdate";
|
18 |
|
19 |
export let data;
|
20 |
|
|
|
22 |
let lastLoadedMessages = data.messages;
|
23 |
let isAborted = false;
|
24 |
|
25 |
+
let webSearchMessages: WebSearchUpdate[] = [];
|
26 |
|
27 |
// Since we modify the messages array locally, we don't want to reset it if an old version is passed
|
28 |
$: if (data.messages !== lastLoadedMessages) {
|
|
|
34 |
let pending = false;
|
35 |
let loginRequired = false;
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
async function summarizeTitle(id: string) {
|
38 |
await fetch(`${base}/conversation/${id}/summarize`, {
|
39 |
method: "POST",
|
40 |
});
|
41 |
}
|
42 |
|
43 |
+
// this function is used to send new message to the backends
|
44 |
async function writeMessage(message: string, messageId = randomUUID()) {
|
45 |
if (!message.trim()) return;
|
46 |
|
|
|
49 |
loading = true;
|
50 |
pending = true;
|
51 |
|
52 |
+
// first we check if the messageId already exists, indicating a retry
|
53 |
+
|
54 |
let retryMessageIndex = messages.findIndex((msg) => msg.id === messageId);
|
55 |
const isRetry = retryMessageIndex !== -1;
|
56 |
+
// if it's not a retry we just use the whole array
|
57 |
if (!isRetry) {
|
58 |
retryMessageIndex = messages.length;
|
59 |
}
|
60 |
|
61 |
+
// slice up to the point of the retry
|
62 |
messages = [
|
63 |
...messages.slice(0, retryMessageIndex),
|
64 |
{ from: "user", content: message, id: messageId },
|
65 |
];
|
66 |
|
67 |
+
const responseId = randomUUID();
|
|
|
|
|
68 |
|
69 |
+
const response = await fetch(`${base}/conversation/${$page.params.id}`, {
|
70 |
+
method: "POST",
|
71 |
+
headers: { "Content-Type": "application/json" },
|
72 |
+
body: JSON.stringify({
|
73 |
+
inputs: message,
|
74 |
+
id: messageId,
|
75 |
+
response_id: responseId,
|
76 |
+
is_retry: isRetry,
|
77 |
+
web_search: $webSearchParameters.useSearch,
|
78 |
+
}),
|
79 |
+
});
|
80 |
+
|
81 |
+
if (!response.body) {
|
82 |
+
throw new Error("Body not defined");
|
83 |
+
}
|
84 |
+
|
85 |
+
// eslint-disable-next-line no-undef
|
86 |
+
const encoder = new TextDecoderStream();
|
87 |
+
const reader = response?.body?.pipeThrough(encoder).getReader();
|
88 |
+
let finalAnswer = "";
|
89 |
|
90 |
+
// this is a bit ugly
|
91 |
+
// we read the stream until we get the final answer
|
92 |
+
while (finalAnswer === "") {
|
93 |
+
// await new Promise((r) => setTimeout(r, 25));
|
94 |
|
95 |
+
// check for abort
|
96 |
+
if (isAborted) {
|
97 |
+
reader?.cancel();
|
98 |
+
break;
|
99 |
+
}
|
100 |
|
101 |
+
// if there is something to read
|
102 |
+
await reader?.read().then(async ({ done, value }) => {
|
103 |
+
// we read, if it's done we cancel
|
104 |
+
if (done) {
|
105 |
+
reader.cancel();
|
106 |
return;
|
107 |
}
|
108 |
|
109 |
+
if (!value) {
|
110 |
+
return;
|
111 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
+
// if it's not done we parse the value, which contains all messages
|
114 |
+
const inputs = value.split("\n");
|
115 |
+
inputs.forEach((el: string) => {
|
116 |
+
try {
|
117 |
+
let update = JSON.parse(el) as MessageUpdate;
|
118 |
+
if (update.type === "finalAnswer") {
|
119 |
+
finalAnswer = update.text;
|
120 |
+
invalidate(UrlDependency.Conversation);
|
121 |
+
} else if (update.type === "stream") {
|
122 |
+
pending = false;
|
123 |
+
|
124 |
+
let lastMessage = messages[messages.length - 1];
|
125 |
+
|
126 |
+
if (lastMessage.from !== "assistant") {
|
127 |
+
messages = [
|
128 |
+
...messages,
|
129 |
+
{ from: "assistant", id: randomUUID(), content: update.token },
|
130 |
+
];
|
131 |
+
} else {
|
132 |
+
lastMessage.content += update.token;
|
133 |
+
messages = [...messages];
|
134 |
+
}
|
135 |
+
} else if (update.type === "webSearch") {
|
136 |
+
webSearchMessages = [...webSearchMessages, update];
|
137 |
}
|
138 |
+
} catch (parseError) {
|
139 |
+
// in case of parsing error we wait for the next message
|
140 |
+
return;
|
141 |
+
}
|
142 |
+
});
|
143 |
+
});
|
144 |
}
|
145 |
|
146 |
+
// reset the websearchmessages
|
|
|
147 |
webSearchMessages = [];
|
148 |
|
149 |
+
// do title summarization
|
150 |
+
// TODO: we should change this to wait until there is an assistant response.
|
151 |
if (messages.filter((m) => m.from === "user").length === 1) {
|
152 |
summarizeTitle($page.params.id)
|
153 |
.then(() => invalidate(UrlDependency.ConversationList))
|
|
|
233 |
{pending}
|
234 |
{messages}
|
235 |
bind:webSearchMessages
|
|
|
236 |
on:message={(event) => writeMessage(event.detail)}
|
237 |
on:retry={(event) => writeMessage(event.detail.content, event.detail.id)}
|
238 |
on:vote={(event) => voteMessage(event.detail.score, event.detail.id)}
|
src/routes/conversation/[id]/+server.ts
CHANGED
@@ -1,34 +1,37 @@
|
|
1 |
-
import { MESSAGES_BEFORE_LOGIN, RATE_LIMIT } from "$env/static/private";
|
2 |
import { buildPrompt } from "$lib/buildPrompt";
|
3 |
import { PUBLIC_SEP_TOKEN } from "$lib/constants/publicSepToken";
|
4 |
-
import { abortedGenerations } from "$lib/server/abortedGenerations";
|
5 |
import { authCondition, requiresUser } from "$lib/server/auth";
|
6 |
import { collections } from "$lib/server/database";
|
7 |
import { modelEndpoint } from "$lib/server/modelEndpoint";
|
8 |
import { models } from "$lib/server/models";
|
9 |
-
import { ERROR_MESSAGES } from "$lib/stores/errors
|
10 |
import type { Message } from "$lib/types/Message";
|
11 |
-
import { concatUint8Arrays } from "$lib/utils/concatUint8Arrays";
|
12 |
-
import { streamToAsyncIterable } from "$lib/utils/streamToAsyncIterable";
|
13 |
import { trimPrefix } from "$lib/utils/trimPrefix";
|
14 |
import { trimSuffix } from "$lib/utils/trimSuffix";
|
15 |
-
import
|
16 |
import { error } from "@sveltejs/kit";
|
17 |
import { ObjectId } from "mongodb";
|
18 |
import { z } from "zod";
|
19 |
import { AwsClient } from "aws4fetch";
|
|
|
|
|
|
|
|
|
20 |
|
21 |
export async function POST({ request, fetch, locals, params, getClientAddress }) {
|
22 |
const id = z.string().parse(params.id);
|
23 |
const convId = new ObjectId(id);
|
24 |
-
const
|
25 |
|
26 |
const userId = locals.user?._id ?? locals.sessionId;
|
27 |
|
|
|
28 |
if (!userId) {
|
29 |
throw error(401, "Unauthorized");
|
30 |
}
|
31 |
|
|
|
32 |
const conv = await collections.conversations.findOne({
|
33 |
_id: convId,
|
34 |
...authCondition(locals),
|
@@ -38,12 +41,14 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
|
|
38 |
throw error(404, "Conversation not found");
|
39 |
}
|
40 |
|
|
|
41 |
await collections.messageEvents.insertOne({
|
42 |
userId: userId,
|
43 |
createdAt: new Date(),
|
44 |
ip: getClientAddress(),
|
45 |
});
|
46 |
|
|
|
47 |
if (
|
48 |
!locals.user?._id &&
|
49 |
requiresUser &&
|
@@ -52,6 +57,7 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
|
|
52 |
throw error(429, "Exceeded number of messages before login");
|
53 |
}
|
54 |
|
|
|
55 |
const nEvents = Math.max(
|
56 |
await collections.messageEvents.countDocuments({ userId }),
|
57 |
await collections.messageEvents.countDocuments({ ip: getClientAddress() })
|
@@ -61,6 +67,7 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
|
|
61 |
throw error(429, ERROR_MESSAGES.rateLimited);
|
62 |
}
|
63 |
|
|
|
64 |
const model = models.find((m) => m.id === conv.model);
|
65 |
const settings = await collections.settings.findOne(authCondition(locals));
|
66 |
|
@@ -68,24 +75,30 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
|
|
68 |
throw error(410, "Model not available anymore");
|
69 |
}
|
70 |
|
|
|
71 |
const json = await request.json();
|
|
|
72 |
const {
|
73 |
inputs: newPrompt,
|
74 |
-
|
|
|
|
|
|
|
75 |
} = z
|
76 |
.object({
|
77 |
inputs: z.string().trim().min(1),
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
web_search_id: z.ostring(),
|
83 |
-
}),
|
84 |
})
|
85 |
.parse(json);
|
86 |
|
87 |
-
|
|
|
|
|
88 |
if (is_retry && messageId) {
|
|
|
89 |
let retryMessageIdx = conv.messages.findIndex((message) => message.id === messageId);
|
90 |
if (retryMessageIdx === -1) {
|
91 |
retryMessageIdx = conv.messages.length;
|
@@ -94,7 +107,8 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
|
|
94 |
...conv.messages.slice(0, retryMessageIdx),
|
95 |
{ content: newPrompt, from: "user", id: messageId as Message["id"], updatedAt: new Date() },
|
96 |
];
|
97 |
-
}
|
|
|
98 |
return [
|
99 |
...conv.messages,
|
100 |
{
|
@@ -107,109 +121,171 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
|
|
107 |
];
|
108 |
})() satisfies Message[];
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
preprompt: settings?.customPrompts?.[model.id] ?? model.preprompt,
|
115 |
-
locals: locals,
|
116 |
-
});
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
const requestParams = JSON.stringify({
|
125 |
-
...json,
|
126 |
-
inputs: prompt,
|
127 |
-
});
|
128 |
-
|
129 |
-
const aws = new AwsClient({
|
130 |
-
accessKeyId: randomEndpoint.accessKey,
|
131 |
-
secretAccessKey: randomEndpoint.secretKey,
|
132 |
-
sessionToken: randomEndpoint.sessionToken,
|
133 |
-
service: "sagemaker",
|
134 |
-
});
|
135 |
-
|
136 |
-
resp = await aws.fetch(randomEndpoint.url, {
|
137 |
-
method: "POST",
|
138 |
-
body: requestParams,
|
139 |
-
signal: abortController.signal,
|
140 |
-
headers: {
|
141 |
-
"Content-Type": "application/json",
|
142 |
-
},
|
143 |
-
});
|
144 |
-
} else {
|
145 |
-
resp = await fetch(randomEndpoint.url, {
|
146 |
-
headers: {
|
147 |
-
"Content-Type": request.headers.get("Content-Type") ?? "application/json",
|
148 |
-
Authorization: randomEndpoint.authorization,
|
149 |
-
},
|
150 |
-
method: "POST",
|
151 |
-
body: JSON.stringify({
|
152 |
-
...json,
|
153 |
-
inputs: prompt,
|
154 |
-
}),
|
155 |
-
signal: abortController.signal,
|
156 |
-
});
|
157 |
-
}
|
158 |
|
159 |
-
|
160 |
-
throw new Error("Response body is empty");
|
161 |
-
}
|
162 |
|
163 |
-
|
164 |
|
165 |
-
|
166 |
-
|
|
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
|
|
|
|
|
|
|
|
172 |
|
173 |
-
|
174 |
-
|
175 |
-
PUBLIC_SEP_TOKEN
|
176 |
-
).trimEnd();
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
}
|
182 |
-
}
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
|
|
201 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
}
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
-
saveMessage().catch(console.error);
|
207 |
// Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
|
208 |
-
return new Response(
|
209 |
-
headers: Object.fromEntries(resp.headers.entries()),
|
210 |
-
status: resp.status,
|
211 |
-
statusText: resp.statusText,
|
212 |
-
});
|
213 |
}
|
214 |
|
215 |
export async function DELETE({ locals, params }) {
|
@@ -229,74 +305,6 @@ export async function DELETE({ locals, params }) {
|
|
229 |
return new Response();
|
230 |
}
|
231 |
|
232 |
-
async function parseGeneratedText(
|
233 |
-
stream: ReadableStream,
|
234 |
-
conversationId: ObjectId,
|
235 |
-
promptedAt: Date,
|
236 |
-
abortController: AbortController
|
237 |
-
): Promise<string> {
|
238 |
-
const inputs: Uint8Array[] = [];
|
239 |
-
for await (const input of streamToAsyncIterable(stream)) {
|
240 |
-
inputs.push(input);
|
241 |
-
|
242 |
-
const date = abortedGenerations.get(conversationId.toString());
|
243 |
-
|
244 |
-
if (date && date > promptedAt) {
|
245 |
-
abortController.abort("Cancelled by user");
|
246 |
-
const completeInput = concatUint8Arrays(inputs);
|
247 |
-
|
248 |
-
const lines = new TextDecoder()
|
249 |
-
.decode(completeInput)
|
250 |
-
.split("\n")
|
251 |
-
.filter((line) => line.startsWith("data:"));
|
252 |
-
|
253 |
-
const tokens = lines.map((line) => {
|
254 |
-
try {
|
255 |
-
const json: TextGenerationStreamOutput = JSON.parse(line.slice("data:".length));
|
256 |
-
return json.token.text;
|
257 |
-
} catch {
|
258 |
-
return "";
|
259 |
-
}
|
260 |
-
});
|
261 |
-
return tokens.join("");
|
262 |
-
}
|
263 |
-
}
|
264 |
-
|
265 |
-
// Merge inputs into a single Uint8Array
|
266 |
-
const completeInput = concatUint8Arrays(inputs);
|
267 |
-
|
268 |
-
// Get last line starting with "data:" and parse it as JSON to get the generated text
|
269 |
-
const message = new TextDecoder().decode(completeInput);
|
270 |
-
|
271 |
-
let lastIndex = message.lastIndexOf("\ndata:");
|
272 |
-
if (lastIndex === -1) {
|
273 |
-
lastIndex = message.indexOf("data");
|
274 |
-
}
|
275 |
-
|
276 |
-
if (lastIndex === -1) {
|
277 |
-
console.error("Could not parse last message", message);
|
278 |
-
}
|
279 |
-
|
280 |
-
let lastMessage = message.slice(lastIndex).trim().slice("data:".length);
|
281 |
-
if (lastMessage.includes("\n")) {
|
282 |
-
lastMessage = lastMessage.slice(0, lastMessage.indexOf("\n"));
|
283 |
-
}
|
284 |
-
|
285 |
-
const lastMessageJSON = JSON.parse(lastMessage);
|
286 |
-
|
287 |
-
if (lastMessageJSON.error) {
|
288 |
-
throw new Error(lastMessageJSON.error);
|
289 |
-
}
|
290 |
-
|
291 |
-
const res = lastMessageJSON.generated_text;
|
292 |
-
|
293 |
-
if (typeof res !== "string") {
|
294 |
-
throw new Error("Could not parse generated text");
|
295 |
-
}
|
296 |
-
|
297 |
-
return res;
|
298 |
-
}
|
299 |
-
|
300 |
export async function PATCH({ request, locals, params }) {
|
301 |
const { title } = z
|
302 |
.object({ title: z.string().trim().min(1).max(100) })
|
|
|
1 |
+
import { HF_ACCESS_TOKEN, MESSAGES_BEFORE_LOGIN, RATE_LIMIT } from "$env/static/private";
|
2 |
import { buildPrompt } from "$lib/buildPrompt";
|
3 |
import { PUBLIC_SEP_TOKEN } from "$lib/constants/publicSepToken";
|
|
|
4 |
import { authCondition, requiresUser } from "$lib/server/auth";
|
5 |
import { collections } from "$lib/server/database";
|
6 |
import { modelEndpoint } from "$lib/server/modelEndpoint";
|
7 |
import { models } from "$lib/server/models";
|
8 |
+
import { ERROR_MESSAGES } from "$lib/stores/errors";
|
9 |
import type { Message } from "$lib/types/Message";
|
|
|
|
|
10 |
import { trimPrefix } from "$lib/utils/trimPrefix";
|
11 |
import { trimSuffix } from "$lib/utils/trimSuffix";
|
12 |
+
import { textGenerationStream } from "@huggingface/inference";
|
13 |
import { error } from "@sveltejs/kit";
|
14 |
import { ObjectId } from "mongodb";
|
15 |
import { z } from "zod";
|
16 |
import { AwsClient } from "aws4fetch";
|
17 |
+
import type { MessageUpdate } from "$lib/types/MessageUpdate";
|
18 |
+
import { runWebSearch } from "$lib/server/websearch/runWebSearch";
|
19 |
+
import type { WebSearch } from "$lib/types/WebSearch";
|
20 |
+
import { abortedGenerations } from "$lib/server/abortedGenerations.js";
|
21 |
|
22 |
export async function POST({ request, fetch, locals, params, getClientAddress }) {
|
23 |
const id = z.string().parse(params.id);
|
24 |
const convId = new ObjectId(id);
|
25 |
+
const promptedAt = new Date();
|
26 |
|
27 |
const userId = locals.user?._id ?? locals.sessionId;
|
28 |
|
29 |
+
// check user
|
30 |
if (!userId) {
|
31 |
throw error(401, "Unauthorized");
|
32 |
}
|
33 |
|
34 |
+
// check if the user has access to the conversation
|
35 |
const conv = await collections.conversations.findOne({
|
36 |
_id: convId,
|
37 |
...authCondition(locals),
|
|
|
41 |
throw error(404, "Conversation not found");
|
42 |
}
|
43 |
|
44 |
+
// register the event for ratelimiting
|
45 |
await collections.messageEvents.insertOne({
|
46 |
userId: userId,
|
47 |
createdAt: new Date(),
|
48 |
ip: getClientAddress(),
|
49 |
});
|
50 |
|
51 |
+
// make sure an anonymous user can't post more than one message
|
52 |
if (
|
53 |
!locals.user?._id &&
|
54 |
requiresUser &&
|
|
|
57 |
throw error(429, "Exceeded number of messages before login");
|
58 |
}
|
59 |
|
60 |
+
// check if the user is rate limited
|
61 |
const nEvents = Math.max(
|
62 |
await collections.messageEvents.countDocuments({ userId }),
|
63 |
await collections.messageEvents.countDocuments({ ip: getClientAddress() })
|
|
|
67 |
throw error(429, ERROR_MESSAGES.rateLimited);
|
68 |
}
|
69 |
|
70 |
+
// fetch the model
|
71 |
const model = models.find((m) => m.id === conv.model);
|
72 |
const settings = await collections.settings.findOne(authCondition(locals));
|
73 |
|
|
|
75 |
throw error(410, "Model not available anymore");
|
76 |
}
|
77 |
|
78 |
+
// finally parse the content of the request
|
79 |
const json = await request.json();
|
80 |
+
|
81 |
const {
|
82 |
inputs: newPrompt,
|
83 |
+
response_id: responseId,
|
84 |
+
id: messageId,
|
85 |
+
is_retry,
|
86 |
+
web_search: webSearch,
|
87 |
} = z
|
88 |
.object({
|
89 |
inputs: z.string().trim().min(1),
|
90 |
+
id: z.optional(z.string().uuid()),
|
91 |
+
response_id: z.optional(z.string().uuid()),
|
92 |
+
is_retry: z.optional(z.boolean()),
|
93 |
+
web_search: z.optional(z.boolean()),
|
|
|
|
|
94 |
})
|
95 |
.parse(json);
|
96 |
|
97 |
+
// get the list of messages
|
98 |
+
// while checking for retries
|
99 |
+
let messages = (() => {
|
100 |
if (is_retry && messageId) {
|
101 |
+
// if the message is a retry, replace the message and remove the messages after it
|
102 |
let retryMessageIdx = conv.messages.findIndex((message) => message.id === messageId);
|
103 |
if (retryMessageIdx === -1) {
|
104 |
retryMessageIdx = conv.messages.length;
|
|
|
107 |
...conv.messages.slice(0, retryMessageIdx),
|
108 |
{ content: newPrompt, from: "user", id: messageId as Message["id"], updatedAt: new Date() },
|
109 |
];
|
110 |
+
} // else append the message at the bottom
|
111 |
+
|
112 |
return [
|
113 |
...conv.messages,
|
114 |
{
|
|
|
121 |
];
|
122 |
})() satisfies Message[];
|
123 |
|
124 |
+
// we now build the stream
|
125 |
+
const stream = new ReadableStream({
|
126 |
+
async start(controller) {
|
127 |
+
const updates: MessageUpdate[] = [];
|
|
|
|
|
|
|
128 |
|
129 |
+
function update(newUpdate: MessageUpdate) {
|
130 |
+
if (newUpdate.type !== "stream") {
|
131 |
+
updates.push(newUpdate);
|
132 |
+
}
|
133 |
+
controller.enqueue(JSON.stringify(newUpdate) + "\n");
|
134 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
+
update({ type: "status", status: "started" });
|
|
|
|
|
137 |
|
138 |
+
let webSearchResults: WebSearch | undefined;
|
139 |
|
140 |
+
if (webSearch) {
|
141 |
+
webSearchResults = await runWebSearch(conv, newPrompt, update);
|
142 |
+
}
|
143 |
|
144 |
+
// we can now build the prompt using the messages
|
145 |
+
const prompt = await buildPrompt({
|
146 |
+
messages,
|
147 |
+
model,
|
148 |
+
webSearch: webSearchResults,
|
149 |
+
preprompt: settings?.customPrompts?.[model.id] ?? model.preprompt,
|
150 |
+
locals: locals,
|
151 |
+
});
|
152 |
|
153 |
+
// fetch the endpoint
|
154 |
+
const randomEndpoint = modelEndpoint(model);
|
|
|
|
|
155 |
|
156 |
+
let usedFetch = fetch;
|
157 |
+
|
158 |
+
if (randomEndpoint.host === "sagemaker") {
|
159 |
+
const aws = new AwsClient({
|
160 |
+
accessKeyId: randomEndpoint.accessKey,
|
161 |
+
secretAccessKey: randomEndpoint.secretKey,
|
162 |
+
sessionToken: randomEndpoint.sessionToken,
|
163 |
+
service: "sagemaker",
|
164 |
+
});
|
165 |
+
|
166 |
+
usedFetch = aws.fetch.bind(aws) as typeof fetch;
|
167 |
}
|
|
|
168 |
|
169 |
+
async function saveLast(generated_text: string) {
|
170 |
+
const lastMessage = messages[messages.length - 1];
|
171 |
+
|
172 |
+
if (lastMessage) {
|
173 |
+
// We could also check if PUBLIC_ASSISTANT_MESSAGE_TOKEN is present and use it to slice the text
|
174 |
+
if (generated_text.startsWith(prompt)) {
|
175 |
+
generated_text = generated_text.slice(prompt.length);
|
176 |
+
}
|
177 |
+
|
178 |
+
generated_text = trimSuffix(
|
179 |
+
trimPrefix(generated_text, "<|startoftext|>"),
|
180 |
+
PUBLIC_SEP_TOKEN
|
181 |
+
).trimEnd();
|
182 |
+
|
183 |
+
// remove the stop tokens
|
184 |
+
for (const stop of [...(model?.parameters?.stop ?? []), "<|endoftext|>"]) {
|
185 |
+
if (generated_text.endsWith(stop)) {
|
186 |
+
generated_text = generated_text.slice(0, -stop.length).trimEnd();
|
187 |
+
}
|
188 |
+
}
|
189 |
+
lastMessage.content = generated_text;
|
190 |
+
|
191 |
+
await collections.conversations.updateOne(
|
192 |
+
{
|
193 |
+
_id: convId,
|
194 |
+
},
|
195 |
+
{
|
196 |
+
$set: {
|
197 |
+
messages,
|
198 |
+
updatedAt: new Date(),
|
199 |
+
},
|
200 |
+
}
|
201 |
+
);
|
202 |
+
|
203 |
+
update({
|
204 |
+
type: "finalAnswer",
|
205 |
+
text: generated_text,
|
206 |
+
});
|
207 |
+
}
|
208 |
+
}
|
209 |
|
210 |
+
const tokenStream = textGenerationStream(
|
211 |
+
{
|
212 |
+
parameters: {
|
213 |
+
...models.find((m) => m.id === conv.model)?.parameters,
|
214 |
+
return_full_text: false,
|
215 |
+
},
|
216 |
+
model: randomEndpoint.url,
|
217 |
+
inputs: prompt,
|
218 |
+
accessToken: randomEndpoint.host === "sagemaker" ? undefined : HF_ACCESS_TOKEN,
|
219 |
},
|
220 |
+
{
|
221 |
+
use_cache: false,
|
222 |
+
fetch: usedFetch,
|
223 |
+
}
|
224 |
+
);
|
225 |
+
|
226 |
+
for await (const output of tokenStream) {
|
227 |
+
// if not generated_text is here it means the generation is not done
|
228 |
+
if (!output.generated_text) {
|
229 |
+
// else we get the next token
|
230 |
+
if (!output.token.special) {
|
231 |
+
const lastMessage = messages[messages.length - 1];
|
232 |
+
update({
|
233 |
+
type: "stream",
|
234 |
+
token: output.token.text,
|
235 |
+
});
|
236 |
+
|
237 |
+
// if the last message is not from assistant, it means this is the first token
|
238 |
+
if (lastMessage?.from !== "assistant") {
|
239 |
+
// so we create a new message
|
240 |
+
messages = [
|
241 |
+
...messages,
|
242 |
+
// id doesn't match the backend id but it's not important for assistant messages
|
243 |
+
// First token has a space at the beginning, trim it
|
244 |
+
{
|
245 |
+
from: "assistant",
|
246 |
+
content: output.token.text.trimStart(),
|
247 |
+
webSearch: webSearchResults,
|
248 |
+
updates: updates,
|
249 |
+
id: (responseId as Message["id"]) || crypto.randomUUID(),
|
250 |
+
createdAt: new Date(),
|
251 |
+
updatedAt: new Date(),
|
252 |
+
},
|
253 |
+
];
|
254 |
+
} else {
|
255 |
+
const date = abortedGenerations.get(convId.toString());
|
256 |
+
if (date && date > promptedAt) {
|
257 |
+
saveLast(lastMessage.content);
|
258 |
+
}
|
259 |
+
if (!output) {
|
260 |
+
break;
|
261 |
+
}
|
262 |
+
|
263 |
+
// otherwise we just concatenate tokens
|
264 |
+
lastMessage.content += output.token.text;
|
265 |
+
}
|
266 |
+
}
|
267 |
+
} else {
|
268 |
+
saveLast(output.generated_text);
|
269 |
+
}
|
270 |
}
|
271 |
+
},
|
272 |
+
async cancel() {
|
273 |
+
await collections.conversations.updateOne(
|
274 |
+
{
|
275 |
+
_id: convId,
|
276 |
+
},
|
277 |
+
{
|
278 |
+
$set: {
|
279 |
+
messages,
|
280 |
+
updatedAt: new Date(),
|
281 |
+
},
|
282 |
+
}
|
283 |
+
);
|
284 |
+
},
|
285 |
+
});
|
286 |
|
|
|
287 |
// Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
|
288 |
+
return new Response(stream);
|
|
|
|
|
|
|
|
|
289 |
}
|
290 |
|
291 |
export async function DELETE({ locals, params }) {
|
|
|
305 |
return new Response();
|
306 |
}
|
307 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
export async function PATCH({ request, locals, params }) {
|
309 |
const { title } = z
|
310 |
.object({ title: z.string().trim().min(1).max(100) })
|
src/routes/conversation/[id]/share/+server.ts
CHANGED
@@ -3,7 +3,7 @@ import { PUBLIC_ORIGIN, PUBLIC_SHARE_PREFIX } from "$env/static/public";
|
|
3 |
import { authCondition } from "$lib/server/auth";
|
4 |
import { collections } from "$lib/server/database";
|
5 |
import type { SharedConversation } from "$lib/types/SharedConversation";
|
6 |
-
import { hashConv } from "$lib/utils/hashConv
|
7 |
import { error } from "@sveltejs/kit";
|
8 |
import { ObjectId } from "mongodb";
|
9 |
import { nanoid } from "nanoid";
|
|
|
3 |
import { authCondition } from "$lib/server/auth";
|
4 |
import { collections } from "$lib/server/database";
|
5 |
import type { SharedConversation } from "$lib/types/SharedConversation";
|
6 |
+
import { hashConv } from "$lib/utils/hashConv";
|
7 |
import { error } from "@sveltejs/kit";
|
8 |
import { ObjectId } from "mongodb";
|
9 |
import { nanoid } from "nanoid";
|
src/routes/conversation/[id]/summarize/+server.ts
CHANGED
@@ -4,7 +4,7 @@ import { authCondition } from "$lib/server/auth";
|
|
4 |
import { collections } from "$lib/server/database";
|
5 |
import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
|
6 |
import { defaultModel } from "$lib/server/models";
|
7 |
-
import { ERROR_MESSAGES } from "$lib/stores/errors
|
8 |
import { error } from "@sveltejs/kit";
|
9 |
import { ObjectId } from "mongodb";
|
10 |
|
|
|
4 |
import { collections } from "$lib/server/database";
|
5 |
import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
|
6 |
import { defaultModel } from "$lib/server/models";
|
7 |
+
import { ERROR_MESSAGES } from "$lib/stores/errors";
|
8 |
import { error } from "@sveltejs/kit";
|
9 |
import { ObjectId } from "mongodb";
|
10 |
|
src/routes/conversation/[id]/web-search/+server.ts
DELETED
@@ -1,165 +0,0 @@
|
|
1 |
-
import { authCondition } from "$lib/server/auth";
|
2 |
-
import { collections } from "$lib/server/database";
|
3 |
-
import { searchWeb } from "$lib/server/websearch/searchWeb";
|
4 |
-
import type { Message } from "$lib/types/Message";
|
5 |
-
import { error } from "@sveltejs/kit";
|
6 |
-
import { ObjectId } from "mongodb";
|
7 |
-
import { z } from "zod";
|
8 |
-
import type { WebSearch, WebSearchSource } from "$lib/types/WebSearch";
|
9 |
-
import { generateQuery } from "$lib/server/websearch/generateQuery";
|
10 |
-
import { parseWeb } from "$lib/server/websearch/parseWeb";
|
11 |
-
import { chunk } from "$lib/utils/chunk";
|
12 |
-
import { findSimilarSentences } from "$lib/server/websearch/sentenceSimilarity";
|
13 |
-
import { RATE_LIMIT } from "$env/static/private";
|
14 |
-
import { ERROR_MESSAGES } from "$lib/stores/errors.js";
|
15 |
-
|
16 |
-
const MAX_N_PAGES_SCRAPE = 10 as const;
|
17 |
-
const MAX_N_PAGES_EMBED = 5 as const;
|
18 |
-
|
19 |
-
export async function GET({ params, locals, url, getClientAddress }) {
|
20 |
-
const convId = new ObjectId(params.id);
|
21 |
-
const searchId = new ObjectId();
|
22 |
-
|
23 |
-
const conv = await collections.conversations.findOne({
|
24 |
-
_id: convId,
|
25 |
-
...authCondition(locals),
|
26 |
-
});
|
27 |
-
|
28 |
-
if (!conv) {
|
29 |
-
throw error(404, "Conversation not found");
|
30 |
-
}
|
31 |
-
|
32 |
-
const userId = locals.user?._id ?? locals.sessionId;
|
33 |
-
|
34 |
-
await collections.messageEvents.insertOne({
|
35 |
-
userId: userId,
|
36 |
-
createdAt: new Date(),
|
37 |
-
ip: getClientAddress(),
|
38 |
-
});
|
39 |
-
|
40 |
-
const nEvents = Math.max(
|
41 |
-
await collections.messageEvents.countDocuments({ userId }),
|
42 |
-
await collections.messageEvents.countDocuments({ ip: getClientAddress() })
|
43 |
-
);
|
44 |
-
|
45 |
-
if (RATE_LIMIT != "" && nEvents > parseInt(RATE_LIMIT)) {
|
46 |
-
throw error(429, ERROR_MESSAGES.rateLimited);
|
47 |
-
}
|
48 |
-
|
49 |
-
const prompt = z.string().trim().min(1).parse(url.searchParams.get("prompt"));
|
50 |
-
|
51 |
-
const messages = (() => {
|
52 |
-
return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
|
53 |
-
})() satisfies Message[];
|
54 |
-
|
55 |
-
const stream = new ReadableStream({
|
56 |
-
async start(controller) {
|
57 |
-
const webSearch: WebSearch = {
|
58 |
-
_id: searchId,
|
59 |
-
convId: convId,
|
60 |
-
prompt: prompt,
|
61 |
-
searchQuery: "",
|
62 |
-
results: [],
|
63 |
-
context: "",
|
64 |
-
contextSources: [],
|
65 |
-
messages: [],
|
66 |
-
createdAt: new Date(),
|
67 |
-
updatedAt: new Date(),
|
68 |
-
};
|
69 |
-
|
70 |
-
function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
|
71 |
-
webSearch.messages.push({
|
72 |
-
type: type ?? "update",
|
73 |
-
message,
|
74 |
-
args,
|
75 |
-
});
|
76 |
-
controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
|
77 |
-
}
|
78 |
-
|
79 |
-
try {
|
80 |
-
appendUpdate("Generating search query");
|
81 |
-
webSearch.searchQuery = await generateQuery(messages);
|
82 |
-
|
83 |
-
appendUpdate("Searching Google", [webSearch.searchQuery]);
|
84 |
-
const results = await searchWeb(webSearch.searchQuery);
|
85 |
-
webSearch.results =
|
86 |
-
(results.organic_results &&
|
87 |
-
results.organic_results.map((el: { title: string; link: string }) => {
|
88 |
-
const { title, link } = el;
|
89 |
-
const { hostname } = new URL(link);
|
90 |
-
return { title, link, hostname };
|
91 |
-
})) ??
|
92 |
-
[];
|
93 |
-
webSearch.results = webSearch.results
|
94 |
-
.filter(({ link }) => !link.includes("youtube.com")) // filter out youtube links
|
95 |
-
.slice(0, MAX_N_PAGES_SCRAPE); // limit to first 10 links only
|
96 |
-
|
97 |
-
let paragraphChunks: { source: WebSearchSource; text: string }[] = [];
|
98 |
-
if (webSearch.results.length > 0) {
|
99 |
-
appendUpdate("Browsing results");
|
100 |
-
const promises = webSearch.results.map(async (result) => {
|
101 |
-
const { link } = result;
|
102 |
-
let text = "";
|
103 |
-
try {
|
104 |
-
text = await parseWeb(link);
|
105 |
-
appendUpdate("Browsing webpage", [link]);
|
106 |
-
} catch (e) {
|
107 |
-
console.error(`Error parsing webpage "${link}"`, e);
|
108 |
-
}
|
109 |
-
const CHUNK_CAR_LEN = 512;
|
110 |
-
const MAX_N_CHUNKS = 100;
|
111 |
-
const texts = chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS);
|
112 |
-
return texts.map((t) => ({ source: result, text: t }));
|
113 |
-
});
|
114 |
-
const nestedParagraphChunks = (await Promise.all(promises)).slice(0, MAX_N_PAGES_EMBED);
|
115 |
-
paragraphChunks = nestedParagraphChunks.flat();
|
116 |
-
if (!paragraphChunks.length) {
|
117 |
-
throw new Error("No text found on the first 5 results");
|
118 |
-
}
|
119 |
-
} else {
|
120 |
-
throw new Error("No results found for this search query");
|
121 |
-
}
|
122 |
-
|
123 |
-
appendUpdate("Extracting relevant information");
|
124 |
-
const topKClosestParagraphs = 8;
|
125 |
-
const texts = paragraphChunks.map(({ text }) => text);
|
126 |
-
const indices = await findSimilarSentences(prompt, texts, {
|
127 |
-
topK: topKClosestParagraphs,
|
128 |
-
});
|
129 |
-
webSearch.context = indices.map((idx) => texts[idx]).join("");
|
130 |
-
|
131 |
-
const usedSources = new Set<string>();
|
132 |
-
for (const idx of indices) {
|
133 |
-
const { source } = paragraphChunks[idx];
|
134 |
-
if (!usedSources.has(source.link)) {
|
135 |
-
usedSources.add(source.link);
|
136 |
-
webSearch.contextSources.push(source);
|
137 |
-
}
|
138 |
-
}
|
139 |
-
|
140 |
-
appendUpdate("Injecting relevant information");
|
141 |
-
} catch (searchError) {
|
142 |
-
if (searchError instanceof Error) {
|
143 |
-
webSearch.messages.push({
|
144 |
-
type: "error",
|
145 |
-
message: "An error occurred with the web search",
|
146 |
-
args: [JSON.stringify(searchError.message)],
|
147 |
-
});
|
148 |
-
}
|
149 |
-
}
|
150 |
-
|
151 |
-
const res = await collections.webSearches.insertOne(webSearch);
|
152 |
-
webSearch.messages.push({
|
153 |
-
type: "sources",
|
154 |
-
sources: webSearch.contextSources,
|
155 |
-
});
|
156 |
-
webSearch.messages.push({
|
157 |
-
type: "result",
|
158 |
-
id: res.insertedId.toString(),
|
159 |
-
});
|
160 |
-
controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
|
161 |
-
},
|
162 |
-
});
|
163 |
-
|
164 |
-
return new Response(stream, { headers: { "Content-Type": "application/json" } });
|
165 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/routes/r/[id]/+page.server.ts
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
import type { PageServerLoad } from "./$types";
|
2 |
import { collections } from "$lib/server/database";
|
3 |
import { error } from "@sveltejs/kit";
|
4 |
-
import { ObjectId } from "mongodb";
|
5 |
-
import type { WebSearchMessageResult, WebSearchMessageSources } from "$lib/types/WebSearch";
|
6 |
|
7 |
export const load: PageServerLoad = async ({ params }) => {
|
8 |
const conversation = await collections.sharedConversations.findOne({
|
@@ -13,27 +11,9 @@ export const load: PageServerLoad = async ({ params }) => {
|
|
13 |
throw error(404, "Conversation not found");
|
14 |
}
|
15 |
|
16 |
-
const webSearchesId = conversation.messages
|
17 |
-
.filter((message) => message.webSearchId)
|
18 |
-
.map((message) => new ObjectId(message.webSearchId));
|
19 |
-
|
20 |
-
const results = await collections.webSearches.find({ _id: { $in: webSearchesId } }).toArray();
|
21 |
-
|
22 |
-
const searches = Object.fromEntries(
|
23 |
-
results.map((x) => [
|
24 |
-
x._id.toString(),
|
25 |
-
[
|
26 |
-
...x.messages,
|
27 |
-
{ type: "sources", sources: x.contextSources ?? [] } satisfies WebSearchMessageSources,
|
28 |
-
{ type: "result", id: x._id.toString() } satisfies WebSearchMessageResult,
|
29 |
-
],
|
30 |
-
])
|
31 |
-
);
|
32 |
-
|
33 |
return {
|
34 |
messages: conversation.messages,
|
35 |
title: conversation.title,
|
36 |
model: conversation.model,
|
37 |
-
searches,
|
38 |
};
|
39 |
};
|
|
|
1 |
import type { PageServerLoad } from "./$types";
|
2 |
import { collections } from "$lib/server/database";
|
3 |
import { error } from "@sveltejs/kit";
|
|
|
|
|
4 |
|
5 |
export const load: PageServerLoad = async ({ params }) => {
|
6 |
const conversation = await collections.sharedConversations.findOne({
|
|
|
11 |
throw error(404, "Conversation not found");
|
12 |
}
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
return {
|
15 |
messages: conversation.messages,
|
16 |
title: conversation.title,
|
17 |
model: conversation.model,
|
|
|
18 |
};
|
19 |
};
|
src/routes/r/[id]/+page.svelte
CHANGED
@@ -60,7 +60,6 @@
|
|
60 |
{loading}
|
61 |
shared={true}
|
62 |
messages={data.messages}
|
63 |
-
searches={data.searches}
|
64 |
on:message={(ev) =>
|
65 |
createConversation()
|
66 |
.then((convId) => {
|
|
|
60 |
{loading}
|
61 |
shared={true}
|
62 |
messages={data.messages}
|
|
|
63 |
on:message={(ev) =>
|
64 |
createConversation()
|
65 |
.then((convId) => {
|
src/routes/search/[id]/+server.ts
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import { collections } from "$lib/server/database";
|
2 |
-
import { hashConv } from "$lib/utils/hashConv
|
3 |
import { error } from "@sveltejs/kit";
|
4 |
import { ObjectId } from "mongodb";
|
5 |
|
|
|
1 |
import { collections } from "$lib/server/database";
|
2 |
+
import { hashConv } from "$lib/utils/hashConv";
|
3 |
import { error } from "@sveltejs/kit";
|
4 |
import { ObjectId } from "mongodb";
|
5 |
|