diff options
| author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2024-04-06 13:15:07 +0200 |
|---|---|---|
| committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2024-04-06 13:15:07 +0200 |
| commit | 052bf63a2c18b1b55013dcf6974228609cc4d76f (patch) | |
| tree | 1846b4c5555ca504bfb638f72bee14249f502577 /rag/rag.py | |
| parent | d116abc63e350b092c2a7f9e1bb9b54298e21b2d (diff) | |
Refactor pdf reader
Diffstat (limited to 'rag/rag.py')
| -rw-r--r-- | rag/rag.py | 14 |
1 files changed, 0 insertions, 14 deletions
@@ -1,15 +1,12 @@ -from pathlib import Path from typing import List from dotenv import load_dotenv from loguru import logger as log from qdrant_client.models import StrictFloat -from rag.db.document import DocumentDB from rag.db.vector import VectorDB from rag.llm.encoder import Encoder from rag.llm.generator import Generator, Prompt -from rag.parser import pdf class RAG: @@ -17,19 +14,8 @@ class RAG: load_dotenv() self.generator = Generator() self.encoder = Encoder() - self.document_db = DocumentDB() self.vector_db = VectorDB() - def add_pdf(self, filepath: Path): - chunks = pdf.parser(filepath) - added = self.document_db.add(chunks) - if added: - log.debug(f"Adding pdf with filepath: {filepath} to vector db") - points = self.encoder.encode_document(chunks) - self.vector_db.add(points) - else: - log.debug("Document already exists!") - def __context(self, query_emb: List[StrictFloat], limit: int) -> str: hits = self.vector_db.search(query_emb, limit) log.debug(f"Got {len(hits)} hits in the vector db with limit={limit}") |