|
from __future__ import annotations |
|
|
|
import dataclasses |
|
import os |
|
from typing import Any, List |
|
|
|
import numpy as np |
|
import orjson |
|
|
|
from autogpt.llm_utils import create_embedding_with_ada |
|
from autogpt.memory.base import MemoryProviderSingleton |
|
|
|
EMBED_DIM = 1536 |
|
SAVE_OPTIONS = orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS |
|
|
|
|
|
def create_default_embeddings(): |
|
return np.zeros((0, EMBED_DIM)).astype(np.float32) |
|
|
|
|
|
@dataclasses.dataclass |
|
class CacheContent: |
|
texts: List[str] = dataclasses.field(default_factory=list) |
|
embeddings: np.ndarray = dataclasses.field( |
|
default_factory=create_default_embeddings |
|
) |
|
|
|
|
|
class LocalCache(MemoryProviderSingleton): |
|
"""A class that stores the memory in a local file""" |
|
|
|
def __init__(self, cfg) -> None: |
|
"""Initialize a class instance |
|
|
|
Args: |
|
cfg: Config object |
|
|
|
Returns: |
|
None |
|
""" |
|
self.filename = f"{cfg.memory_index}.json" |
|
if os.path.exists(self.filename): |
|
try: |
|
with open(self.filename, "w+b") as f: |
|
file_content = f.read() |
|
if not file_content.strip(): |
|
file_content = b"{}" |
|
f.write(file_content) |
|
|
|
loaded = orjson.loads(file_content) |
|
self.data = CacheContent(**loaded) |
|
except orjson.JSONDecodeError: |
|
print(f"Error: The file '{self.filename}' is not in JSON format.") |
|
self.data = CacheContent() |
|
else: |
|
print( |
|
f"Warning: The file '{self.filename}' does not exist. " |
|
"Local memory would not be saved to a file." |
|
) |
|
self.data = CacheContent() |
|
|
|
def add(self, text: str): |
|
""" |
|
Add text to our list of texts, add embedding as row to our |
|
embeddings-matrix |
|
|
|
Args: |
|
text: str |
|
|
|
Returns: None |
|
""" |
|
if "Command Error:" in text: |
|
return "" |
|
self.data.texts.append(text) |
|
|
|
embedding = create_embedding_with_ada(text) |
|
|
|
vector = np.array(embedding).astype(np.float32) |
|
vector = vector[np.newaxis, :] |
|
self.data.embeddings = np.concatenate( |
|
[ |
|
self.data.embeddings, |
|
vector, |
|
], |
|
axis=0, |
|
) |
|
|
|
with open(self.filename, "wb") as f: |
|
out = orjson.dumps(self.data, option=SAVE_OPTIONS) |
|
f.write(out) |
|
return text |
|
|
|
def clear(self) -> str: |
|
""" |
|
Clears the redis server. |
|
|
|
Returns: A message indicating that the memory has been cleared. |
|
""" |
|
self.data = CacheContent() |
|
return "Obliviated" |
|
|
|
def get(self, data: str) -> list[Any] | None: |
|
""" |
|
Gets the data from the memory that is most relevant to the given data. |
|
|
|
Args: |
|
data: The data to compare to. |
|
|
|
Returns: The most relevant data. |
|
""" |
|
return self.get_relevant(data, 1) |
|
|
|
def get_relevant(self, text: str, k: int) -> list[Any]: |
|
""" " |
|
matrix-vector mult to find score-for-each-row-of-matrix |
|
get indices for top-k winning scores |
|
return texts for those indices |
|
Args: |
|
text: str |
|
k: int |
|
|
|
Returns: List[str] |
|
""" |
|
embedding = create_embedding_with_ada(text) |
|
|
|
scores = np.dot(self.data.embeddings, embedding) |
|
|
|
top_k_indices = np.argsort(scores)[-k:][::-1] |
|
|
|
return [self.data.texts[i] for i in top_k_indices] |
|
|
|
def get_stats(self) -> tuple[int, tuple[int, ...]]: |
|
""" |
|
Returns: The stats of the local cache. |
|
""" |
|
return len(self.data.texts), self.data.embeddings.shape |
|
|