From 052bf63a2c18b1b55013dcf6974228609cc4d76f Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sat, 6 Apr 2024 13:15:07 +0200 Subject: Refactor pdf reader --- rag/rag.py | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'rag/rag.py') diff --git a/rag/rag.py b/rag/rag.py index 87b44c5..6826a80 100644 --- a/rag/rag.py +++ b/rag/rag.py @@ -1,15 +1,12 @@ -from pathlib import Path from typing import List from dotenv import load_dotenv from loguru import logger as log from qdrant_client.models import StrictFloat -from rag.db.document import DocumentDB from rag.db.vector import VectorDB from rag.llm.encoder import Encoder from rag.llm.generator import Generator, Prompt -from rag.parser import pdf class RAG: @@ -17,19 +14,8 @@ class RAG: load_dotenv() self.generator = Generator() self.encoder = Encoder() - self.document_db = DocumentDB() self.vector_db = VectorDB() - def add_pdf(self, filepath: Path): - chunks = pdf.parser(filepath) - added = self.document_db.add(chunks) - if added: - log.debug(f"Adding pdf with filepath: {filepath} to vector db") - points = self.encoder.encode_document(chunks) - self.vector_db.add(points) - else: - log.debug("Document already exists!") - def __context(self, query_emb: List[StrictFloat], limit: int) -> str: hits = self.vector_db.search(query_emb, limit) log.debug(f"Got {len(hits)} hits in the vector db with limit={limit}") -- cgit v1.2.3-70-g09d2