From 91ddb3672e514fa9824609ff047d7cab0c65631a Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Tue, 9 Apr 2024 00:14:00 +0200 Subject: Refactor --- rag/llm/encoder.py | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 rag/llm/encoder.py (limited to 'rag/llm/encoder.py') diff --git a/rag/llm/encoder.py b/rag/llm/encoder.py deleted file mode 100644 index a59b1b4..0000000 --- a/rag/llm/encoder.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -from pathlib import Path -from typing import List, Dict -from uuid import uuid4 - -import ollama -from langchain_core.documents import Document -from loguru import logger as log -from qdrant_client.http.models import StrictFloat - - -try: - from rag.db.vector import Point -except ModuleNotFoundError: - from db.vector import Point - - -class Encoder: - def __init__(self) -> None: - self.model = os.environ["ENCODER_MODEL"] - self.query_prompt = "Represent this sentence for searching relevant passages: " - - def __encode(self, prompt: str) -> List[StrictFloat]: - return list(ollama.embeddings(model=self.model, prompt=prompt)["embedding"]) - - def __get_source(self, metadata: Dict[str, str]) -> str: - source = metadata["source"] - return Path(source).name - - def encode_document(self, chunks: List[Document]) -> List[Point]: - log.debug("Encoding document...") - return [ - Point( - id=uuid4().hex, - vector=self.__encode(chunk.page_content), - payload={ - "text": chunk.page_content, - "source": self.__get_source(chunk.metadata), - }, - ) - for chunk in chunks - ] - - def encode_query(self, query: str) -> List[StrictFloat]: - log.debug(f"Encoding query: {query}") - query = self.query_prompt + query - return self.__encode(query) -- cgit v1.2.3-70-g09d2