diff --git a/.env_example b/.env_example index 4f544df..fd5a3ff 100644 --- a/.env_example +++ b/.env_example @@ -1,4 +1,21 @@ -OPENAI_API_KEY= +## Django settings +# In production add your URLs here +DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1 +# Set to False in production +DJANGO_DEBUG=True +# Fill with a secret key +DJANGO_SECRET_KEY=some-secret-key-CHANGE-THIS-IN-PRODUCTION-41auGrAJ7X +# Path to the documents to be indexed +FRAG_RAW_DB=/path/to/your/actual/data +# Path to the database directory +FRAG_DB_DIRECTORY=/path/to/your/actual/database +# files to be indexed. Only pdf and txt are supported +FRAG_FILE_TYPES=pdf,txt -DJANGO_KEY= \ No newline at end of file +# Ollama host: use this if you are running Ollama on a distant server +# To use the local Ollama server, set it to localhost or comment it out +OLLAMA_HOST=http://ip.address.or.url:11434 + +# API keys +OPENAI_API_KEY=your_openai_api_key \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3522f5b..db663eb 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,13 @@ chroma_data # Ignore Django migrations **/migrations/*.py !**/migrations/__init__.py +*.sock + +# Space to store the database and logs +data/ +logs/ + +# Ignore actual config files but keep examples +config/*.yaml +!config/*.example.yaml diff --git a/cl-tools/chat.py b/cl-tools/chat.py index 21eb12d..f881bbe 100644 --- a/cl-tools/chat.py +++ b/cl-tools/chat.py @@ -1,21 +1,33 @@ import os import sys -from dotenv import load_dotenv from openai import OpenAI # Add the parent directory to sys.path parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(parent_dir) +# Import configurations using the new config loader +from config.config_loader import get_embedding_config, get_llm_config from retrieval.main import ChromaRetriever -from config.embedding_config import model_name, db_directory, collection_name - from llm.main import Responder, OpenAIResponder -from config.llm_config import llm_model, prompt, openai_model, use_openai +# Get configurations +embedding_config = get_embedding_config() +llm_config = get_llm_config() + +# Extract embedding configuration values +model_name = embedding_config['model_name'] +collection_name = embedding_config['collection_name'] -load_dotenv(os.path.join(parent_dir, '.env')) +# Extract LLM configuration values +llm_model = llm_config['llm_model'] +prompt = llm_config['prompt'] +openai_model = llm_config['openai_model'] +use_openai = llm_config['use_openai'] +number_docs_response = llm_config['number_docs_response'] +# Extract DB location from environment (defined in .env file) +db_directory = os.environ.get("FRAG_DB_DIRECTORY") openai_client = OpenAI( api_key=os.environ.get("OPENAI_API_KEY"), @@ -26,7 +38,7 @@ def main(): retriever = ChromaRetriever(embedding_model=model_name, db_path=db_directory, db_collection=collection_name, - n_results=5) + n_results=number_docs_response) user_query = str(input("Ask a question. Type quit to exit: ")) if user_query.lower() == "quit": diff --git a/cl-tools/search.py b/cl-tools/search.py index 706e42d..e0096a3 100644 --- a/cl-tools/search.py +++ b/cl-tools/search.py @@ -6,9 +6,19 @@ parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(parent_dir) +# Import configurations using the new config loader +from config.config_loader import get_embedding_config from retrieval.main import ChromaRetriever -from config.embedding_config import model_name, db_directory, collection_name +# Get the configuration +embedding_config = get_embedding_config() + +# Extract configuration values +model_name = embedding_config['model_name'] +collection_name = embedding_config['collection_name'] + +# Extract DB location from environment (defined in .env file) +db_directory = os.environ.get("FRAG_DB_DIRECTORY") def create_argument_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description='Script to perform vectorDB semantic search') diff --git a/config/config_loader.py b/config/config_loader.py new file mode 100644 index 0000000..c5c4a5f --- /dev/null +++ b/config/config_loader.py @@ -0,0 +1,65 @@ +import os +import yaml +import logging +from pathlib import Path +from dotenv import load_dotenv + +root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +load_dotenv(os.path.join(root_dir, '.env')) # Load environment variables from .env file + + +class ConfigLoader: + """ + Loads configuration from YAML files with environment variable override support. + """ + + def __init__(self): + self.config_dir = Path(__file__).parent + self._config_cache = {} + + def load_config(self, name): + """ + Load configuration from a YAML file. + + Args: + name: Name of the configuration file without extension (e.g., 'embedding_config') + + Returns: + dict: Configuration as a dictionary + """ + # If this file was already loaded, serve the version from the cache + if name in self._config_cache: + return self._config_cache[name] + + # Check if the config file exists and load it + config_path = self.config_dir / f"{name}.yaml" + if not config_path.exists(): + raise FileNotFoundError(f"Configuration file {config_path} not found") + with open(config_path, 'r') as f: + config = yaml.safe_load(f) + + # Process any path expansions like ~ for home directory + self._process_paths(config) + + self._config_cache[name] = config + return config + + def _process_paths(self, config): + """Process any paths in the config to expand user paths.""" + for key, value in config.items(): + if isinstance(value, str) and '~' in value: + config[key] = os.path.expanduser(value) + elif isinstance(value, dict): + self._process_paths(value) + +# Create a singleton instance for use throughout the application +config_loader = ConfigLoader() + +# Helper functions to easily access specific configs +def get_embedding_config(): + """Get the embedding configuration.""" + return config_loader.load_config('embedding_config') + +def get_llm_config(): + """Get the LLM configuration.""" + return config_loader.load_config('llm_config') \ No newline at end of file diff --git a/config/embedding_config.example.yaml b/config/embedding_config.example.yaml new file mode 100644 index 0000000..ab9bd07 --- /dev/null +++ b/config/embedding_config.example.yaml @@ -0,0 +1,21 @@ +# Embedding and vector database configuration + +# Embedding model to use +model_name: "Lajavaness/bilingual-embedding-large" + +# Vector database type +vector_db: "chromaDB" # Allowed Values ['chromaDB', 'FAISS']. Only ChromaDB works now + +# Collection name in the vector database +collection_name: "my_collection" + +# Language for the tokenizer +# Supported languages: czech, danish, dutch, english, estonian, finnish, french, +# german, greek, italian, norwegian, polish, portuguese, russian, slovene, +# spanish, swedish, turkish +data_language: "english" + +# Number of sentences each chunk will contain +chunk_size: 20 + +overlap_size: 5 # must be less than chunk_size. Indicates how many sentences overlap when splitting chunks diff --git a/config/embedding_config.py b/config/embedding_config.py deleted file mode 100644 index 750ae86..0000000 --- a/config/embedding_config.py +++ /dev/null @@ -1,18 +0,0 @@ -# this file containst default values related to embeddings and creating vectordb -import os - -model_name = "Lajavaness/bilingual-embedding-large" #choose any embedding model you prefer - -vector_db = "chromaDB" # Allowed Values ['chromaDB', 'FAISS']. Only ChromaDB works now - -collection_name = "my_collection" - -raw_db = "/path/to/data" #root directory to where raw documents are stored - -data_language = "english" #variable for the tokenizer. Supported language = ['czech', 'danish', 'dutch', 'english', 'estonian', 'finnish', 'french', 'german' ,'greek' ,'italian' ,'norwegian', 'polish' ,'portuguese', 'russian' ,'slovene','spanish', 'swedish', 'turkish'] - -db_directory = os.path.join(os.path.expanduser('~'), '.db') #default. Change it to where you want to store the vector DB - -chunk_size = 20 #number of sentences each chunk will contain in the vector db - -overlap_size = 5 # must be less than the chunk_size. It indicates how many sentences overlaps when splitting chunks diff --git a/config/llm_config.example.yaml b/config/llm_config.example.yaml new file mode 100644 index 0000000..0fe3410 --- /dev/null +++ b/config/llm_config.example.yaml @@ -0,0 +1,31 @@ +# LLM configuration settings + +# LLM model to use with Ollama +llm_model: "llama3.1:8b" + +# Whether to use OpenAI (true) or Ollama (false) +use_openai: false + +# OpenAI model to use if use_openai is true +openai_model: "gpt-4o" + +# Number of documents to retrieve for generating a response +number_docs_response: 12 + +# Prompt template for the RAG system +prompt: | + DOCUMENTS: + + {data} + + + QUESTION: + {query} + + + INSTRUCTIONS: + Answer the users QUESTION using the DOCUMENTS text above. + Keep your answer ground in the facts of the DOCUMENT. + If the DOCUMENT doesn't contain the facts to answer the QUESTION return NO Answer found + +record_data: true diff --git a/config/llm_config.py b/config/llm_config.py deleted file mode 100644 index 9e189d7..0000000 --- a/config/llm_config.py +++ /dev/null @@ -1,25 +0,0 @@ -llm_model = 'deepseek-r1:1.5b' # select any model available on the ollama site https://ollama.com/search - -use_openai = False # set to True if using openai api and then select 'openai_model' variable. You need to add the openai api token in the .env file in the root dirextory - -openai_model = 'gpt-4o' # if using openai api then select which model to use - - - -prompt = """ -DOCUMENTS: \n -{data} -\n -\n -QUESTION: -{query} -\n -\n -INSTRUCTIONS: -Answer the users QUESTION using the DOCUMENTS text above. -Keep your answer ground in the facts of the DOCUMENT. -If the DOCUMENT doesn’t contain the facts to answer the QUESTION return NO Answer found -""" - - -record_data = True \ No newline at end of file diff --git a/django-server/rag_app/services.py b/django-server/rag_app/services.py new file mode 100644 index 0000000..b4ad8b2 --- /dev/null +++ b/django-server/rag_app/services.py @@ -0,0 +1,169 @@ +import json +import gc +from typing import Generator, Dict, Any, List +from retrieval.main import ChromaRetriever +from llm.main import Responder, OpenAIResponder +from config.config_loader import get_embedding_config, get_llm_config +from openai import OpenAI +import os + +class ChatService: + """ + Service class to handle chat operations including document retrieval and LLM responses. + """ + + def __init__(self): + # Load configurations + self.embedding_config = get_embedding_config() + self.llm_config = get_llm_config() + # Extract configuration values + self.db_directory = os.environ.get("FRAG_DB_DIRECTORY") + self.number_docs_response = self.llm_config['number_docs_response'] + + + # Initialize retriever + self.retriever = ChromaRetriever( + embedding_model=self.embedding_config['model_name'], + db_path=self.db_directory, + db_collection=self.embedding_config['collection_name'], + n_results=self.number_docs_response + ) + + # Initialize OpenAI client if needed + if self.llm_config['use_openai']: + self.openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) + + def search_documents(self, query: str, n_results: int ) -> Dict[str, Any]: + """ + Search for relevant documents based on query. + """ + # Update n_results for this search + self.retriever.n_results = n_results + + # Retrieve documents + search_results = self.retriever.retrieve(query) + + # Format results for prompt + formatted_result = self.retriever.format_results_for_prompt(search_results) + + # Extract document metadata for response + documents = [] + if search_results and 'metadatas' in search_results and search_results['metadatas']: + # Access the first (and only) list in the nested structure + metadatas = search_results['metadatas'][0] + docs = search_results['documents'][0] if search_results['documents'] else [] + distances = search_results['distances'][0] if search_results['distances'] else [] + + for i, metadata in enumerate(metadatas): + documents.append({ + 'file_name': metadata.get('file_name', 'Unknown'), + 'chunk_id': metadata.get('chunk_id', f'chunk_{i}'), + 'content': docs[i] if i < len(docs) else '', + 'distance': distances[i] if i < len(distances) else 0 + }) + + return { + 'formatted_data': formatted_result, + 'documents': documents, + 'raw_results': search_results + } + + def generate_sync_response(self, query: str, n_results: int ) -> Dict[str, Any]: + """ + Generate a synchronous response for the given query. + + Args: + query: The user's query + n_results: Number of documents to retrieve + + Returns: + Dictionary containing response and source documents + """ + # Search for relevant documents + search_data = self.search_documents(query, n_results) + + # Generate response using LLM + if self.llm_config['use_openai']: + responder = OpenAIResponder( + data=search_data['formatted_data'], + model=self.llm_config['openai_model'], + prompt_template=self.llm_config['prompt'], + query=query, + client=self.openai_client + ) + else: + responder = Responder( + data=search_data['formatted_data'], + model=self.llm_config['llm_model'], + prompt_template=self.llm_config['prompt'], + query=query + ) + + # Get the response + response_text = responder.generate_response() + + return { + 'response': response_text, + 'documents': search_data['documents'], + 'query': query, + 'model_used': self.llm_config['openai_model'] if self.llm_config['use_openai'] else self.llm_config['llm_model'] + } + + def generate_stream_response(self, query: str, n_results: int ) -> Generator[str, None, None]: + """ + Generate a streaming response for the given query. + + Args: + query: The user's query + n_results: Number of documents to retrieve + + Yields: + String chunks of the response, followed by document metadata + """ + try: + # Search for relevant documents + search_data = self.search_documents(query, n_results) + + # Generate streaming response using LLM + if self.llm_config['use_openai']: + responder = OpenAIResponder( + data=search_data['formatted_data'], + model=self.llm_config['openai_model'], + prompt_template=self.llm_config['prompt'], + query=query, + client=self.openai_client + ) + + # Stream response chunks + for chunk in responder.stream_response_chunks(): + yield chunk + # Force garbage collection periodically + gc.collect() + else: + responder = Responder( + data=search_data['formatted_data'], + model=self.llm_config['llm_model'], + prompt_template=self.llm_config['prompt'], + query=query + ) + + # Stream response chunks + chunk_count = 0 + for chunk in responder.stream_response_chunks(): + chunk_count += 1 + yield chunk + # Force garbage collection every 50 chunks + if chunk_count % 50 == 0: + gc.collect() + + # After streaming is complete, send document metadata + docs_json = json.dumps(search_data['documents']) + yield f"<|DOCS_JSON|>{docs_json}" + + except Exception as e: + yield f"\n\nError during response generation: {str(e)[:100]}..." + # Return empty documents on error + yield "<|DOCS_JSON|>[]" + finally: + # Final cleanup + gc.collect() \ No newline at end of file diff --git a/django-server/rag_app/templates/rag_app/chat.html b/django-server/rag_app/templates/rag_app/chat.html index 344a123..5e3878f 100644 --- a/django-server/rag_app/templates/rag_app/chat.html +++ b/django-server/rag_app/templates/rag_app/chat.html @@ -13,7 +13,7 @@
This session if being recorded!
+This session is being recorded!
{% endif %} diff --git a/django-server/rag_app/templates/rag_app/search.html b/django-server/rag_app/templates/rag_app/search.html index abcea32..bc7fa87 100644 --- a/django-server/rag_app/templates/rag_app/search.html +++ b/django-server/rag_app/templates/rag_app/search.html @@ -6,7 +6,7 @@Empower your queries with state-of-the-art retrieval capabilities
-