diff --git a/.env_example b/.env_example
index 4f544df..fd5a3ff 100644
--- a/.env_example
+++ b/.env_example
@@ -1,4 +1,21 @@
-OPENAI_API_KEY=
+## Django settings
+# In production add your URLs here
+DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1
+# Set to False in production
+DJANGO_DEBUG=True
+# Fill with a secret key
+DJANGO_SECRET_KEY=some-secret-key-CHANGE-THIS-IN-PRODUCTION-41auGrAJ7X
 
+# Path to the documents to be indexed
+FRAG_RAW_DB=/path/to/your/actual/data
+# Path to the database directory
+FRAG_DB_DIRECTORY=/path/to/your/actual/database
+# files to be indexed. Only pdf and txt are supported
+FRAG_FILE_TYPES=pdf,txt
 
-DJANGO_KEY=
\ No newline at end of file
+# Ollama host: use this if you are running Ollama on a distant server
+# To use the local Ollama server, set it to localhost or comment it out
+OLLAMA_HOST=http://ip.address.or.url:11434
+
+# API keys
+OPENAI_API_KEY=your_openai_api_key
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3522f5b..db663eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,13 @@ chroma_data
 # Ignore Django migrations
 **/migrations/*.py
 !**/migrations/__init__.py
+*.sock
+
+# Space to store the database and logs
+data/
+logs/
+
+# Ignore actual config files but keep examples
+config/*.yaml
+!config/*.example.yaml
 
diff --git a/cl-tools/chat.py b/cl-tools/chat.py
index 21eb12d..f881bbe 100644
--- a/cl-tools/chat.py
+++ b/cl-tools/chat.py
@@ -1,21 +1,33 @@
 import os
 import sys
-from dotenv import load_dotenv
 from openai import OpenAI
 
 # Add the parent directory to sys.path
 parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(parent_dir)
 
+# Import configurations using the new config loader
+from config.config_loader import get_embedding_config, get_llm_config
 from retrieval.main import ChromaRetriever
-from config.embedding_config import model_name, db_directory, collection_name
-
 from llm.main import Responder, OpenAIResponder
-from config.llm_config import llm_model, prompt, openai_model, use_openai
 
+# Get configurations
+embedding_config = get_embedding_config()
+llm_config = get_llm_config()
+
+# Extract embedding configuration values
+model_name = embedding_config['model_name']
+collection_name = embedding_config['collection_name']
 
-load_dotenv(os.path.join(parent_dir, '.env'))
+# Extract LLM configuration values
+llm_model = llm_config['llm_model']
+prompt = llm_config['prompt']
+openai_model = llm_config['openai_model']
+use_openai = llm_config['use_openai']
+number_docs_response = llm_config['number_docs_response']
 
+# Extract DB location from environment (defined in .env file)
+db_directory = os.environ.get("FRAG_DB_DIRECTORY")
 
 openai_client = OpenAI(
     api_key=os.environ.get("OPENAI_API_KEY"),  
@@ -26,7 +38,7 @@ def main():
         retriever = ChromaRetriever(embedding_model=model_name, 
                                 db_path=db_directory, 
                                 db_collection=collection_name, 
-                                n_results=5)
+                                n_results=number_docs_response)
         
         user_query = str(input("Ask a question. Type quit to exit:  "))
         if user_query.lower() == "quit":
diff --git a/cl-tools/search.py b/cl-tools/search.py
index 706e42d..e0096a3 100644
--- a/cl-tools/search.py
+++ b/cl-tools/search.py
@@ -6,9 +6,19 @@
 parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(parent_dir)
 
+# Import configurations using the new config loader
+from config.config_loader import get_embedding_config
 from retrieval.main import ChromaRetriever
-from config.embedding_config import model_name, db_directory, collection_name
 
+# Get the configuration
+embedding_config = get_embedding_config()
+
+# Extract configuration values
+model_name = embedding_config['model_name']
+collection_name = embedding_config['collection_name']
+
+# Extract DB location from environment (defined in .env file)
+db_directory = os.environ.get("FRAG_DB_DIRECTORY")
 
 def create_argument_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(description='Script to perform vectorDB semantic search')
diff --git a/config/config_loader.py b/config/config_loader.py
new file mode 100644
index 0000000..c5c4a5f
--- /dev/null
+++ b/config/config_loader.py
@@ -0,0 +1,65 @@
+import os
+import yaml
+import logging
+from pathlib import Path
+from dotenv import load_dotenv
+
+root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+load_dotenv(os.path.join(root_dir, '.env')) # Load environment variables from .env file
+
+
+class ConfigLoader:
+    """
+    Loads configuration from YAML files with environment variable override support.
+    """
+    
+    def __init__(self):
+        self.config_dir = Path(__file__).parent
+        self._config_cache = {}
+    
+    def load_config(self, name):
+        """
+        Load configuration from a YAML file.
+        
+        Args:
+            name: Name of the configuration file without extension (e.g., 'embedding_config')
+            
+        Returns:
+            dict: Configuration as a dictionary
+        """
+        # If this file was already loaded, serve the version from the cache
+        if name in self._config_cache:
+            return self._config_cache[name]
+        
+        # Check if the config file exists and load it
+        config_path = self.config_dir / f"{name}.yaml"
+        if not config_path.exists():
+            raise FileNotFoundError(f"Configuration file {config_path} not found")
+        with open(config_path, 'r') as f:
+            config = yaml.safe_load(f)
+        
+        # Process any path expansions like ~ for home directory
+        self._process_paths(config)
+        
+        self._config_cache[name] = config
+        return config
+    
+    def _process_paths(self, config):
+        """Process any paths in the config to expand user paths."""
+        for key, value in config.items():
+            if isinstance(value, str) and '~' in value:
+                config[key] = os.path.expanduser(value)
+            elif isinstance(value, dict):
+                self._process_paths(value)
+
+# Create a singleton instance for use throughout the application
+config_loader = ConfigLoader()
+
+# Helper functions to easily access specific configs
+def get_embedding_config():
+    """Get the embedding configuration."""
+    return config_loader.load_config('embedding_config')
+
+def get_llm_config():
+    """Get the LLM configuration."""
+    return config_loader.load_config('llm_config')
\ No newline at end of file
diff --git a/config/embedding_config.example.yaml b/config/embedding_config.example.yaml
new file mode 100644
index 0000000..ab9bd07
--- /dev/null
+++ b/config/embedding_config.example.yaml
@@ -0,0 +1,21 @@
+# Embedding and vector database configuration
+
+# Embedding model to use
+model_name: "Lajavaness/bilingual-embedding-large"
+
+# Vector database type
+vector_db: "chromaDB"  # Allowed Values ['chromaDB', 'FAISS']. Only ChromaDB works now
+
+# Collection name in the vector database
+collection_name: "my_collection"
+
+# Language for the tokenizer
+# Supported languages: czech, danish, dutch, english, estonian, finnish, french,
+# german, greek, italian, norwegian, polish, portuguese, russian, slovene,
+# spanish, swedish, turkish
+data_language: "english"
+
+# Number of sentences each chunk will contain
+chunk_size: 20
+
+overlap_size: 5 # must be less than chunk_size. Indicates how many sentences overlap when splitting chunks
diff --git a/config/embedding_config.py b/config/embedding_config.py
deleted file mode 100644
index 750ae86..0000000
--- a/config/embedding_config.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# this file containst default values related to embeddings and creating vectordb
-import os
-
-model_name = "Lajavaness/bilingual-embedding-large"  #choose any embedding model you prefer
-
-vector_db = "chromaDB" # Allowed Values ['chromaDB', 'FAISS']. Only ChromaDB works now
-
-collection_name = "my_collection"
-
-raw_db = "/path/to/data"  #root directory to where raw documents are stored
-
-data_language = "english" #variable for the tokenizer. Supported language = ['czech', 'danish', 'dutch', 'english', 'estonian', 'finnish', 'french', 'german' ,'greek' ,'italian' ,'norwegian', 'polish' ,'portuguese', 'russian' ,'slovene','spanish', 'swedish', 'turkish']
-
-db_directory = os.path.join(os.path.expanduser('~'), '.db')  #default. Change it to where you want to store the vector DB
-
-chunk_size = 20   #number of sentences each chunk will contain in the vector db
-
-overlap_size = 5 # must be less than the chunk_size. It indicates how many sentences overlaps when splitting chunks
diff --git a/config/llm_config.example.yaml b/config/llm_config.example.yaml
new file mode 100644
index 0000000..0fe3410
--- /dev/null
+++ b/config/llm_config.example.yaml
@@ -0,0 +1,31 @@
+# LLM configuration settings
+
+# LLM model to use with Ollama
+llm_model: "llama3.1:8b"
+
+# Whether to use OpenAI (true) or Ollama (false)
+use_openai: false
+
+# OpenAI model to use if use_openai is true
+openai_model: "gpt-4o"
+
+# Number of documents to retrieve for generating a response
+number_docs_response: 12
+
+# Prompt template for the RAG system
+prompt: |
+  DOCUMENTS:
+  
+  {data}
+  
+  
+  QUESTION:
+  {query}
+  
+  
+  INSTRUCTIONS:
+  Answer the users QUESTION using the DOCUMENTS text above.
+  Keep your answer ground in the facts of the DOCUMENT.
+  If the DOCUMENT doesn't contain the facts to answer the QUESTION return NO Answer found
+
+record_data: true
diff --git a/config/llm_config.py b/config/llm_config.py
deleted file mode 100644
index 9e189d7..0000000
--- a/config/llm_config.py
+++ /dev/null
@@ -1,25 +0,0 @@
-llm_model = 'deepseek-r1:1.5b' # select any model available on the ollama site https://ollama.com/search
-
-use_openai = False # set to True if using openai api and then select 'openai_model' variable. You need to add the openai api token in the .env file in the root dirextory
-
-openai_model = 'gpt-4o' # if using openai api then select which model to use
-
-
-
-prompt = """
-DOCUMENTS: \n
-{data}
-\n
-\n
-QUESTION:
-{query}
-\n
-\n
-INSTRUCTIONS:
-Answer the users QUESTION using the DOCUMENTS text above.
-Keep your answer ground in the facts of the DOCUMENT.
-If the DOCUMENT doesn’t contain the facts to answer the QUESTION return NO Answer found
-"""
-
-
-record_data = True
\ No newline at end of file
diff --git a/django-server/rag_app/services.py b/django-server/rag_app/services.py
new file mode 100644
index 0000000..b4ad8b2
--- /dev/null
+++ b/django-server/rag_app/services.py
@@ -0,0 +1,169 @@
+import json
+import gc
+from typing import Generator, Dict, Any, List
+from retrieval.main import ChromaRetriever
+from llm.main import Responder, OpenAIResponder
+from config.config_loader import get_embedding_config, get_llm_config
+from openai import OpenAI
+import os
+
+class ChatService:
+    """
+    Service class to handle chat operations including document retrieval and LLM responses.
+    """
+    
+    def __init__(self):
+        # Load configurations
+        self.embedding_config = get_embedding_config()
+        self.llm_config = get_llm_config()
+        # Extract configuration values
+        self.db_directory = os.environ.get("FRAG_DB_DIRECTORY")
+        self.number_docs_response = self.llm_config['number_docs_response']
+        
+        
+        # Initialize retriever
+        self.retriever = ChromaRetriever(
+            embedding_model=self.embedding_config['model_name'],
+            db_path=self.db_directory,
+            db_collection=self.embedding_config['collection_name'],
+            n_results=self.number_docs_response
+        )
+        
+        # Initialize OpenAI client if needed
+        if self.llm_config['use_openai']:
+            self.openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+    
+    def search_documents(self, query: str, n_results: int ) -> Dict[str, Any]:
+        """
+        Search for relevant documents based on query.
+        """
+        # Update n_results for this search
+        self.retriever.n_results = n_results
+        
+        # Retrieve documents
+        search_results = self.retriever.retrieve(query)
+        
+        # Format results for prompt
+        formatted_result = self.retriever.format_results_for_prompt(search_results)
+        
+        # Extract document metadata for response
+        documents = []
+        if search_results and 'metadatas' in search_results and search_results['metadatas']:
+            # Access the first (and only) list in the nested structure
+            metadatas = search_results['metadatas'][0]
+            docs = search_results['documents'][0] if search_results['documents'] else []
+            distances = search_results['distances'][0] if search_results['distances'] else []
+            
+            for i, metadata in enumerate(metadatas):
+                documents.append({
+                    'file_name': metadata.get('file_name', 'Unknown'),
+                    'chunk_id': metadata.get('chunk_id', f'chunk_{i}'),
+                    'content': docs[i] if i < len(docs) else '',
+                    'distance': distances[i] if i < len(distances) else 0
+                })
+    
+        return {
+            'formatted_data': formatted_result,
+            'documents': documents,
+            'raw_results': search_results
+        }
+    
+    def generate_sync_response(self, query: str, n_results: int ) -> Dict[str, Any]:
+        """
+        Generate a synchronous response for the given query.
+        
+        Args:
+            query: The user's query
+            n_results: Number of documents to retrieve
+            
+        Returns:
+            Dictionary containing response and source documents
+        """
+        # Search for relevant documents
+        search_data = self.search_documents(query, n_results)
+        
+        # Generate response using LLM
+        if self.llm_config['use_openai']:
+            responder = OpenAIResponder(
+                data=search_data['formatted_data'],
+                model=self.llm_config['openai_model'],
+                prompt_template=self.llm_config['prompt'],
+                query=query,
+                client=self.openai_client
+            )
+        else:
+            responder = Responder(
+                data=search_data['formatted_data'],
+                model=self.llm_config['llm_model'],
+                prompt_template=self.llm_config['prompt'],
+                query=query
+            )
+        
+        # Get the response
+        response_text = responder.generate_response()
+        
+        return {
+            'response': response_text,
+            'documents': search_data['documents'],
+            'query': query,
+            'model_used': self.llm_config['openai_model'] if self.llm_config['use_openai'] else self.llm_config['llm_model']
+        }
+    
+    def generate_stream_response(self, query: str, n_results: int ) -> Generator[str, None, None]:
+        """
+        Generate a streaming response for the given query.
+        
+        Args:
+            query: The user's query
+            n_results: Number of documents to retrieve
+            
+        Yields:
+            String chunks of the response, followed by document metadata
+        """
+        try:
+            # Search for relevant documents
+            search_data = self.search_documents(query, n_results)
+            
+            # Generate streaming response using LLM
+            if self.llm_config['use_openai']:
+                responder = OpenAIResponder(
+                    data=search_data['formatted_data'],
+                    model=self.llm_config['openai_model'],
+                    prompt_template=self.llm_config['prompt'],
+                    query=query,
+                    client=self.openai_client
+                )
+                
+                # Stream response chunks
+                for chunk in responder.stream_response_chunks():
+                    yield chunk
+                    # Force garbage collection periodically
+                    gc.collect()
+            else:
+                responder = Responder(
+                    data=search_data['formatted_data'],
+                    model=self.llm_config['llm_model'],
+                    prompt_template=self.llm_config['prompt'],
+                    query=query
+                )
+                
+                # Stream response chunks
+                chunk_count = 0
+                for chunk in responder.stream_response_chunks():
+                    chunk_count += 1
+                    yield chunk
+                    # Force garbage collection every 50 chunks
+                    if chunk_count % 50 == 0:
+                        gc.collect()
+            
+            # After streaming is complete, send document metadata
+            docs_json = json.dumps(search_data['documents'])
+            yield f"<|DOCS_JSON|>{docs_json}"
+            
+        except Exception as e:
+            yield f"\n\nError during response generation: {str(e)[:100]}..."
+            # Return empty documents on error
+            yield "<|DOCS_JSON|>[]"
+        finally:
+            # Final cleanup
+            gc.collect()
\ No newline at end of file
diff --git a/django-server/rag_app/templates/rag_app/chat.html b/django-server/rag_app/templates/rag_app/chat.html
index 344a123..5e3878f 100644
--- a/django-server/rag_app/templates/rag_app/chat.html
+++ b/django-server/rag_app/templates/rag_app/chat.html
@@ -13,7 +13,7 @@ <h1>Chat with the LLM</h1>
         <button type="submit">Send</button>
     </form>
     {% if record_data %}
-    <p>This session if being recorded!</p>
+    <p>This session is being recorded!</p>
     {% endif %}
 </div>
 
diff --git a/django-server/rag_app/templates/rag_app/search.html b/django-server/rag_app/templates/rag_app/search.html
index abcea32..bc7fa87 100644
--- a/django-server/rag_app/templates/rag_app/search.html
+++ b/django-server/rag_app/templates/rag_app/search.html
@@ -6,7 +6,7 @@
     <h1>Vector Database Search</h1>
     <p class="subtitle">Empower your queries with state-of-the-art retrieval capabilities</p>
 
-    <form action="" method="post" class="search-form" id="search-form">
+    <form id="search-form" class="search-form">
         <div class="form-group">
             {% csrf_token %}
             <label for="query" class="form-label">Enter your query:</label>
@@ -16,7 +16,6 @@ <h1>Vector Database Search</h1>
                 id="query" 
                 name="query" 
                 placeholder="Search your DB" 
-                value="{{ query }}"
                 required
             >
             <br>
@@ -43,32 +42,79 @@ <h1>Vector Database Search</h1>
         <div class="spinner"></div>
     </div>
 
-    {% if submitted %}
-        <div class="results">
-            <h2>Search Results</h2>
-            {% if data %}
-                {% for result in data %}
-                    <div class="result-item">
-                        <p><strong>Content:</strong> {{ result.content }}</p>
-                        <p><strong>File Name:</strong> {{ result.file_name }}</p>
-                        <p><strong>Chunk ID:</strong> {{ result.chunk_id }}</p>
-                        <p><strong>Distance:</strong> {{ result.distance }}</p>
-                    </div>
-                {% endfor %}
-            {% else %}
-                <p class="no-results">No results found or an error occurred during retrieval.</p>
-            {% endif %}
-        </div>
-    {% endif %}
+    <!-- Results container -->
+    <div id="results-container" style="display:none;">
+        <h2>Search Results</h2>
+        <div id="results-content"></div>
+    </div>
+
+    <!-- No results message -->
+    <div id="no-results" style="display:none; text-align:center; margin-top:20px;">
+        <p class="no-results">No results found or an error occurred during retrieval.</p>
+    </div>
 </div>
 
 <script>
-// Show loading indicator on form submission
-document.getElementById('search-form').addEventListener('submit', function() {
+document.getElementById('search-form').addEventListener('submit', function(event) {
+    event.preventDefault();
+    
     const loadingIndicator = document.getElementById('loading-indicator');
     const searchButton = document.getElementById('search-button');
+    const resultsContainer = document.getElementById('results-container');
+    const noResultsDiv = document.getElementById('no-results');
+    const resultsContent = document.getElementById('results-content');
+    
+    // Show loading, hide previous results
     loadingIndicator.style.display = 'block';
     searchButton.disabled = true; // Disable the button to prevent multiple clicks
+    resultsContainer.style.display = 'none';
+    noResultsDiv.style.display = 'none';
+    
+    const formData = new FormData();
+    formData.append('query', document.getElementById('query').value);
+    formData.append('number_results', document.getElementById('n_results').value);
+    formData.append('csrfmiddlewaretoken', 
+        document.querySelector('[name=csrfmiddlewaretoken]').value);
+    
+    fetch("{% url 'search_documents' %}", {
+        method: 'POST',
+        body: formData
+    })
+    .then(response => response.json())
+    .then(data => {
+        loadingIndicator.style.display = 'none';
+        searchButton.disabled = false;
+        
+        if (data.error) {
+            noResultsDiv.style.display = 'block';
+            noResultsDiv.innerHTML = `<p style="color: red;">Error: ${data.error}</p>`;
+        } else if (data.documents && data.documents.length > 0) {
+            resultsContainer.style.display = 'block';
+            let html = '';
+            data.documents.forEach(result => {
+                html += `
+                    <div class="result-item">
+                        <p><strong>Content:</strong> ${result.content}</p>
+                        <p><strong>File Name:</strong> ${result.file_name}</p>
+                        <p><strong>Chunk ID:</strong> ${result.chunk_id}</p>
+                        <p><strong>Distance:</strong> ${result.distance}</p>
+                    </div>
+                `;
+            });
+            resultsContent.innerHTML = html;
+        } else {
+            // No results found
+            noResultsDiv.style.display = 'block';
+            noResultsDiv.innerHTML = '<p class="no-results">No results found for your query.</p>';
+        }
+    })
+    .catch(error => {
+        console.error('Error:', error);
+        loadingIndicator.style.display = 'none';
+        searchButton.disabled = false;
+        noResultsDiv.style.display = 'block';
+        noResultsDiv.innerHTML = `<p style="color: red;">Network error: ${error.message}</p>`;
+    });
 });
 </script>
 {% endblock %}
diff --git a/django-server/rag_app/urls.py b/django-server/rag_app/urls.py
index f943d6a..9d3ae73 100644
--- a/django-server/rag_app/urls.py
+++ b/django-server/rag_app/urls.py
@@ -3,7 +3,9 @@
 
 urlpatterns = [
     path('', views.home, name='home'),
-    path('search/', views.search, name='search'),
-    path('chat/', views.chat_page, name='chat'),
-    path('chat/stream/', views.chat_stream, name='chat_stream'),
+    path('search/', views.search_view, name='search'),
+    path('chat/', views.chat_view, name='chat'),
+    path('search_documents/', views.search_documents, name='search_documents'),
+    path('chat_stream/', views.chat_stream, name='chat_stream'),
+    path('api/chat/', views.chat_api, name='chat_api'),  # New API endpoint
 ]
diff --git a/django-server/rag_app/views.py b/django-server/rag_app/views.py
index 2c0fc0c..ae77a71 100644
--- a/django-server/rag_app/views.py
+++ b/django-server/rag_app/views.py
@@ -1,160 +1,116 @@
+from openai import OpenAI
+import os 
+import json
+from datetime import datetime
+
 from django.shortcuts import render
 from django.views.decorators.csrf import csrf_exempt
 from django.views.decorators.http import require_POST
 from django.http import StreamingHttpResponse, JsonResponse
 from django.conf import settings
+from .services import ChatService
+from config.config_loader import get_embedding_config, get_llm_config
 
-from retrieval.main import ChromaRetriever
-from config.embedding_config import model_name, db_directory, collection_name
-
-from llm.main import Responder, OpenAIResponder
-from config.llm_config import llm_model, prompt, use_openai, openai_model, record_data
-from .models import ChatLog
-from datetime import datetime
-
-from dotenv import load_dotenv
-from openai import OpenAI
-import os 
-import json
-
-
+# Initialize the chat service
+chat_service = ChatService()
 
 def home(request):
-    footer_class =  'footer-absolute'
-    return render(request, 'rag_app/home.html', {'footer_class': footer_class,})
-
-
-def search(request):
-    submitted = False
-    formatted_results = []
+    return render(request, 'rag_app/home.html')
 
-    if request.method == "POST":
-        query = request.POST["query"]
-        n_results = int(request.POST["n_results"])
-        submitted = True
-
-        try:
-            retriever = ChromaRetriever(
-                embedding_model=model_name, 
-                db_path=db_directory, 
-                db_collection=collection_name, 
-                n_results=n_results
-            )
-            raw_results = retriever.retrieve(query)
-
-            # Process raw results into a template-friendly format
-            documents = raw_results.get("documents", [[]])[0]
-            metadatas = raw_results.get("metadatas", [[]])[0]
-            distances = raw_results.get("distances", [[]])[0]
-
-            for doc, metadata, distance in zip(documents, metadatas, distances):
-                formatted_results.append({
-                    "content": doc,
-                    "file_name": metadata.get("file_name", "N/A"),
-                    "chunk_id": metadata.get("chunk_id", "N/A"),
-                    "distance": distance,
-                })
-
-        except Exception as e:
-            print(f"Error during retrieval: {e}")
-        footer_class = 'footer-flex' if submitted else 'footer-absolute'
-
-        return render(
-            request,
-            "rag_app/search.html",
-            {"data": formatted_results, "submitted": submitted, "query": query, "n_results": n_results, 'footer_class': footer_class,},
-        )
-    footer_class = 'footer-flex' if submitted else 'footer-absolute'
-
-    return render(request, "rag_app/search.html", {"data": formatted_results, "submitted": submitted, 'footer_class': footer_class,})
-    
-
-
-def chat_page(request):
-    # Renders the chat page with the form and no answers yet
-    footer_class = 'footer-absolute'
-    return render(request, 'rag_app/chat.html', {'footer_class': footer_class, 'record_data': record_data})
+def search_view(request):
+    return render(request, 'rag_app/search.html')
 
+def chat_view(request):
+    return render(request, 'rag_app/chat.html')
 
 @csrf_exempt
 @require_POST
+def search_documents(request):
+    """
+    Handle search requests and return relevant documents.
+    """
+    try:
+        query = request.POST.get('query', '').strip()
+        number_results = int(request.POST.get('number_results'))
+        
+        if not query:
+            return JsonResponse({'error': 'Query is required'}, status=400)
+        if not number_results or number_results <= 0:
+            return JsonResponse({'error': 'number_results is required'}, status=400)
+        
+        # Use the service to search documents
+        search_data = chat_service.search_documents(query, number_results)
+        
+        # Format response for the frontend
+        response_data = {
+            'query': query,
+            'documents': search_data['documents'],
+            'total_results': len(search_data['documents']),
+            'formatted_data': search_data['formatted_data']
+        }
+        
+        return JsonResponse(response_data)
+        
+    except Exception as e:
+        return JsonResponse({'error': str(e)}, status=500)
+
+@csrf_exempt  
+@require_POST
 def chat_stream(request):
-    user_query = request.POST.get('query', '').strip()
-    if not user_query:
-        return JsonResponse({"error": "No query provided"}, status=400)
-
-    # -- 1) Retrieve
-    retriever = ChromaRetriever(
-        embedding_model=model_name, 
-        db_path=db_directory, 
-        db_collection=collection_name, 
-        n_results=5
-    )
-    search_results = retriever.retrieve(user_query)
-
-    formatted_result = retriever.format_results_for_prompt(search_results)
-
-
-    doc_list_for_frontend = []
-    if search_results and 'documents' in search_results and 'metadatas' in search_results:
-        for doc, metadata in zip(search_results['documents'][0], search_results['metadatas'][0]):
-            doc_list_for_frontend.append({
-                "file_name": metadata.get('file_name', 'N/A'),
-                "chunk_id": metadata.get('chunk_id', 'N/A'),
-                "content": doc
-            })
-
-    # -- 2) Initialize an LLM Responder
-    if use_openai:
-        load_dotenv(os.path.join(settings.BASE_DIR.parent, '.env'))
-        openai_client = OpenAI(
-            api_key=os.environ.get("OPENAI_API_KEY"),
+    """
+    Handle chat requests with streaming responses.
+    """
+    try:
+        query = request.POST.get('query', '').strip()
+        
+        if not query:
+            return JsonResponse({'error': 'Query is required'}, status=400)
+        
+        # Use the service to generate streaming response
+        llm_config = get_llm_config()
+        number_docs_response = llm_config['number_docs_response']
+        def response_generator():
+            for chunk in chat_service.generate_stream_response(query, n_results=number_docs_response):
+                yield chunk
+        
+        response = StreamingHttpResponse(
+            response_generator(),
+            content_type='text/plain'
         )
-        responder = OpenAIResponder(
-            data=formatted_result,
-            model=openai_model,
-            prompt_template=prompt,
-            query=user_query,
-            cleint=openai_client
-        )
-    else:
-        responder = Responder(
-            data=formatted_result,
-            model=llm_model,
-            prompt_template=prompt,
-            query=user_query
-        )
-
-    # -- 3) Prepare a streaming generator
-    def stream_generator():
-        full_response = ""
-        # First yield the LLM's output
-        for chunk in responder.stream_response_chunks():
-            full_response += chunk
-            yield chunk
-
-        # Yield retrieved documents info to the client
-        docs_json_str = json.dumps(doc_list_for_frontend)
-        final_chunk = f"<|DOCS_JSON|>{docs_json_str}"
-        yield final_chunk
-
-        # Save conversation if flag is True
-        if record_data:
-            ChatLog.objects.create(
-                user_query=user_query,
-                response=full_response,
-            )
-
-
-        # After the model output finishes, yield one last "special" chunk
-        # so the client can parse out the retrieved docs. 
-        # E.g. we prefix with a known marker: "<|DOCS_JSON|>"
-        docs_json_str = json.dumps(doc_list_for_frontend)
-        yield f"<|DOCS_JSON|>{docs_json_str}"
-
-    # -- 4) Return the streaming response
-    return StreamingHttpResponse(
-        stream_generator(), 
-        content_type='text/plain'
-    )
+        response['Cache-Control'] = 'no-cache'
+        response['Connection'] = 'keep-alive'
+        
+        return response
+        
+    except Exception as e:
+        return JsonResponse({'error': str(e)}, status=500)
+
+# New API endpoint for synchronous chat
+@csrf_exempt
+@require_POST  
+def chat_api(request):
+    """
+    Handle API chat requests with synchronous responses.
+    """
+    try:
+        # Parse JSON body for API requests
+        if request.content_type == 'application/json':
+            data = json.loads(request.body)
+            query = data.get('query', '').strip()
+            n_results = data.get('n_results')
+        else:
+            # Fallback to form data
+            query = request.POST.get('query', '').strip()
+            n_results = int(request.POST.get('n_results'))
+        
+        if not query:
+            return JsonResponse({'error': 'Query is required'}, status=400)
+        
+        # Use the service to generate synchronous response
+        result = chat_service.generate_sync_response(query, n_results)
+        
+        return JsonResponse(result)
+        
+    except Exception as e:
+        return JsonResponse({'error': str(e)}, status=500)
 
diff --git a/django-server/rag_server/settings.py b/django-server/rag_server/settings.py
index 59e54c9..7292f5a 100644
--- a/django-server/rag_server/settings.py
+++ b/django-server/rag_server/settings.py
@@ -14,6 +14,10 @@
 import os 
 import environ
 
+# make sure to load environment variables through load_dotenv in config_loader
+import config.config_loader
+
+
 # Build paths inside the project like this: BASE_DIR / 'subdir'.
 BASE_DIR = Path(__file__).resolve().parent.parent
 
@@ -28,13 +32,12 @@
 # See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
 
 # SECURITY WARNING: keep the secret key used in production secret!
-SECRET_KEY = env('DJANGO_KEY')
+SECRET_KEY = os.environ.get('DJANGO_SECRET_KEY', 'default-key-for-development-only-CHANGE-VALUE-IN-ENVIRONMENT-6cgwv5n6vjcpwulv0s')
 
 # SECURITY WARNING: don't run with debug turned on in production!
-DEBUG = True
-
-ALLOWED_HOSTS = []
+DEBUG = os.environ.get('DJANGO_DEBUG', 'False') == 'True'
 
+ALLOWED_HOSTS = [host for host in os.environ.get('DJANGO_ALLOWED_HOSTS', '').split(',') if host]
 
 # Application definition
 
diff --git a/embedding/vector_db_setup.py b/embedding/vector_db_setup.py
index d410371..ff4c64e 100644
--- a/embedding/vector_db_setup.py
+++ b/embedding/vector_db_setup.py
@@ -8,17 +8,25 @@
 parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(parent_dir)
 
-# Import configurations and utility functions
-from config.embedding_config import (
-    model_name,
-    vector_db,
-    raw_db,
-    data_language,
-    db_directory,
-    chunk_size,
-    overlap_size,
-    collection_name
-)
+# Import configurations using the new config loader
+from config.config_loader import get_embedding_config
+
+# Get the configuration
+embedding_config = get_embedding_config()
+
+# Extract configuration values
+model_name = embedding_config['model_name']
+vector_db = embedding_config['vector_db']
+data_language = embedding_config['data_language']
+chunk_size = embedding_config['chunk_size']
+collection_name = embedding_config['collection_name']
+overlap_size = embedding_config['overlap_size']
+
+# Extract DB location from environment (defined in .env file)
+raw_db = os.environ.get("FRAG_RAW_DB")
+db_directory = os.environ.get("FRAG_DB_DIRECTORY")
+file_types = os.environ.get("FRAG_FILE_TYPES").split(',')
+
 from embedding.utils import (
     get_file_paths,
     read_text_file,
@@ -39,7 +47,7 @@ def main():
     print(f"Vector Database is: {vector_db}\n")
 
     # Step 1: Load documents (txt and pdf)
-    file_paths = get_file_paths(raw_db, ["txt", "pdf"])
+    file_paths = get_file_paths(raw_db, file_types)
     print(f"Found {len(file_paths)} files to process.\n")
 
     # Initialize embedding model
diff --git a/llm/main.py b/llm/main.py
index e0eceda..3f67ac5 100644
--- a/llm/main.py
+++ b/llm/main.py
@@ -102,7 +102,7 @@ class OpenAIResponder:
     A class to generate responses using the OpenAI API within a RAG framework.
     """
 
-    def __init__(self, data: str, model: str, prompt_template: str, query: str, cleint: openai.OpenAI) -> None:
+    def __init__(self, data: str, model: str, prompt_template: str, query: str, client: openai.OpenAI) -> None:
         """
         Initialize the OpenAIResponder instance.
 
@@ -116,7 +116,7 @@ def __init__(self, data: str, model: str, prompt_template: str, query: str, clei
         self.model = model
         self.prompt_template = prompt_template
         self.query = query
-        self.cleint = cleint
+        self.client = client
         
         # Construct the final prompt
         self.prompt = prompt_template.format(data=self.data, query=self.query)
@@ -130,7 +130,7 @@ def generate_response(self) -> str:
         """
         try:
             # For ChatCompletion API
-            response = self.cleint.chat.completions.create(
+            response = self.client.chat.completions.create(
                 model=self.model,
                 messages=[
                     {"role": "system", "content": "You are a RAG system."},
@@ -150,7 +150,7 @@ def stream_response(self):
         Prints chunks to stdout as they arrive.
         """
         try:
-            response_stream = self.cleint.chat.completions.create(
+            response_stream = self.client.chat.completions.create(
                 model=self.model,
                 messages=[
                     {"role": "system", "content": "You are a RAG system."},
@@ -174,7 +174,7 @@ def stream_response_chunks(self):
         Returns a generator that yields chunks of the response text.
         """
         try:
-            response_stream = self.cleint.chat.completions.create(
+            response_stream = self.client.chat.completions.create(
                 model=self.model,
                 messages=[
                     {"role": "system", "content": "You are a RAG system."},
diff --git a/readme.md b/readme.md
index 0c9a1cc..4084091 100644
--- a/readme.md
+++ b/readme.md
@@ -12,6 +12,68 @@ Contributions, feedback, and suggestions are welcome as we work toward a stable
 
 This repository contains a Retrieval-Augmented Generation (RAG) framework for efficient information retrieval and natural language generation. The framework supports both Ollama (running local, open-source LLMs) and OpenAI (for cloud-based LLMs like gpt-3.5-turbo, gpt-4, etc.)
 
+## Repository Structure
+
+```plaintext
+.
+├── LICENSE
+├── cl-tools
+│   ├── __init__.py
+│   ├── chat.py
+│   └── search.py
+├── config
+│   ├── __init__.py
+│   ├── config_loader.py
+│   ├── embedding_config.yaml
+│   ├── embedding_config.example.yaml
+│   ├── llm_config.yaml
+│   └── llm_config.example.yaml
+├── django-server
+│   ├── manage.py
+│   ├── rag_app
+│   │   ├── admin.py
+│   │   ├── apps.py
+│   │   ├── migrations
+│   │   │   └── __init__.py
+│   │   ├── models.py
+│   │   ├── static
+│   │   │   └── rag_app
+│   │   │       ├── css
+│   │   │       │   └── styles.css
+│   │   │       └── images
+│   │   │           └── frag.jpg
+│   │   ├── templates
+│   │   │   └── rag_app
+│   │   │       ├── base.html
+│   │   │       ├── chat.html
+│   │   │       ├── home.html
+│   │   │       └── search.html
+│   │   ├── tests.py
+│   │   ├── urls.py
+│   │   └── views.py
+│   └── rag_server
+│       ├── asgi.py
+│       ├── settings.py
+│       ├── urls.py
+│       └── wsgi.py
+├── .env
+├── .env_example
+├── docs
+│   └── diagram
+│       └── RAG_Framework.svg
+├── embedding
+│   ├── __init__.py
+│   ├── utils.py
+│   └── vector_db_setup.py
+├── llm
+│   └── main.py
+├── readme.md
+├── requirement.txt
+├── retrieval
+│   └── main.py
+└── tests
+```
+
 
 ## How to Get Started
 
@@ -42,95 +104,113 @@ This repository contains a Retrieval-Augmented Generation (RAG) framework for ef
     pip install -r requirement.txt
     ```
 
-4.  **Add a vector database:**
-
-    *   Edit the file `config/embedding_config.py` where you can add the path for the data that needs to be used for the embedding vector database.
-
-    Here's a breakdown of the editable parameters in the file:
-
-    *   `model_name`: This specifies the pre-trained model used for creating the embedding vectors. The example shows `"Lajavaness/bilingual-embedding-large"`, but you can choose a different model name depending on your needs.
-    *   `vector_db`: This defines the type of vector database to use. Currently, only `'chromaDB'` is supported.
-    *   `collection_name`: This specifies the name of the collection within the vector database where the embeddings will be stored. You can choose a name that suits your project.
-    *   `raw_db`: This is the root directory where your raw documents are stored. Edit this path to point to your actual data location. For example: `raw_db = "/path/to/my/data"`
-    *   `data_language`: This specifies the language of your data. The file provides a list of supported languages. Choose the one that matches your data.
-    *   `db_directory`: This defines the location where the vector database will be stored. By default, it's set to the user's home directory under a `.db` folder. You can change this path to a different location if needed.
-    *   `chunk_size`: This determines the number of sentences processed together when creating the vector database. You can adjust this value based on your data size and hardware capabilities.
-     *   `overlap_size`: This determines the number of sentences overlaped between the chunk and the next chunk. This is useful to not lose semantic of chunks when splitting the text. The value must be lower than the chunk_size.
-
-    Example `embedding_config.py` (Remember to adapt these values to your specific setup):
-
-    ```python
-    import os
+4.  **Configure your environment:**
 
-    model_name = "Lajavaness/bilingual-embedding-large"  
-
-    vector_db = "chromaDB"
-
-    collection_name = "my_rag_collection"
-
-    raw_db = "/path/to/my/data"  # Replace with the actual path
+    * Copy .env_example to .env and set your environment-specific paths:
+    
+    ```bash
+    cp .env_example .env
+    ```
+    
+    * Edit the .env file to set your data paths:
+    
+    ```bash
+    # Database and file paths
+    FRAG_RAW_DB=/path/to/your/actual/data
+    FRAG_DB_DIRECTORY=/path/to/your/actual/database
+    # files to be indexed. Only pdf and txt are supported
+    FRAG_FILE_TYPES=pdf,txt
+    
+    # API keys
+    OPENAI_API_KEY=your_openai_api_key
+    ```
 
-    data_language = "english"
+5.  **Configure your embedding settings:**
 
-    db_directory = os.path.join(os.path.expanduser('~'), '.my_rag_db')
+    * Copy the example configuration:
+    
+    ```bash
+    cp config/embedding_config.example.yaml config/embedding_config.yaml
+    ```
+    
+    * Edit `config/embedding_config.yaml` where you can configure the embedding model and vector database settings.
 
-    chunk_size = 20
+    Here's a breakdown of the editable parameters:
 
-    overlap_size = 5
-    ```
+    * `model_name`: This specifies the pre-trained model used for creating the embedding vectors
+    * `vector_db`: This defines the type of vector database to use. Currently, only `'chromaDB'` is supported
+    * `collection_name`: This specifies the name of the collection within the vector database
+    * `data_language`: This specifies the language of your data (e.g., "english", "french", etc.) Choose the one that matches your data.
+    * `chunk_size`: This determines the number of sentences processed together when creating the vector database. You can adjust this value based on your data size and hardware capabilities.
+    * `overlap_size`: This determines the number of sentences overlaped between the chunk and the next chunk. This is useful to not lose semantic of chunks when splitting the text. The value must be lower than the chunk_size.
 
-5.  **Create vector database:**
+6.  **Create vector database:**
 
-    *   After setting the data paths in `embedding_config.py`, run the following command to create the vector database:
+    * After setting up your configuration, run the following command to create the vector database:
 
     ```bash
     python embedding/vector_db_setup.py
     ```
 
-    *   This will create a Chroma vector database using the configurations you provided.
-
-6.  **LLM configurations (ollama or openai):**
-
-    *   The file `config/llm_config.py` allows you to configure the large language model (LLM) used for text generation. You can specify the LLM and potentially edit the prompts used for generating text. This config file also allows you to choose between running ollama or openai api models. You can also choose to record chat log of users with the record_data variable. Here is an example file:
-    ```python
-    llm_model = 'deepseek-r1:1.5b' # select any model available on the ollama site https://ollama.com/search
+    * This will create a Chroma vector database using the configurations you provided.
 
-    use_openai = False # set to True if using openai api and then select 'openai_model' variable
+7.  **Configure the LLM:**
 
-    openai_model = 'gpt-4o' # if using openai api then select which model to use
-
-    prompt = """
-    DOCUMENTS: \n
-    {data}
-    \n
-    \n
-    QUESTION:
-    {query}
-    \n
-    \n
-    INSTRUCTIONS:
-    Answer the users QUESTION using the DOCUMENTS text above.
-    Keep your answer ground in the facts of the DOCUMENT.
-    If the DOCUMENT doesn’t contain the facts to answer the QUESTION return NO Answer found
-    """
-
-    record_data = False # set to true to record chat log
+    * Copy the example configuration:
+    
+    ```bash
+    cp config/llm_config.example.yaml config/llm_config.yaml
+    ```
+    
+    * Edit `config/llm_config.yaml` to configure the large language model settings:
+    
+    ```yaml
+    # LLM model to use with Ollama
+    llm_model: "deepseek-r1:1.5b"
+    
+    # Whether to use OpenAI (true) or Ollama (false)
+    use_openai: false
+    
+    # OpenAI model to use if use_openai is true
+    openai_model: "gpt-4o"
+    
+    # Prompt template for the RAG system
+    prompt: |
+      DOCUMENTS:
+      
+      {data}
+      
+      
+      QUESTION:
+      {query}
+      
+      
+      INSTRUCTIONS:
+      Answer the users QUESTION using the DOCUMENTS text above.
+      Keep your answer ground in the facts of the DOCUMENT.
+      If the DOCUMENT doesn't contain the facts to answer the QUESTION return NO Answer found
     ```
 
-7.  **Run the system:**
+8.  **Run the system:**
 
-    *   Once the vector database is created, you can run the chat and search functionalities using either the Django web app or the command-line tools.
+    * Once the vector database is created, you can run the chat and search functionalities using either the Django web app or the command-line tools.
 
-    *   To run the Django app:
+    * To run the Django app:
 
     ```bash
     python django-server/manage.py runserver
     ```
 
-    *   This will start the Django development server, allowing you to access the web interface for chat and search (usually at `http://127.0.0.1:8000/` in your web browser).
+    * This will start the Django development server, allowing you to access the web interface for chat and search (usually at `http://127.0.0.1:8000/` in your web browser).
 
-    *   To use the command-line tools:
+    * To use the command-line tools:
 
-    *   The functionalities are likely defined in the `cl-tools` directory (chat.py and search.py). You can refer to those files to understand how to use the command-line interface for chat and search.
+    ```bash
+    # For chat functionality
+    python cl-tools/chat.py
+    
+    # For search functionality with 5 results
+    python cl-tools/search.py --number-results 5
+    ```
 
 ![RAG Framework Diagram](docs/diagram/RAG_Framework.svg)
\ No newline at end of file
diff --git a/retrieval/simple_query.py b/retrieval/simple_query.py
deleted file mode 100644
index 7b3a3ee..0000000
--- a/retrieval/simple_query.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import os
-import sys
-import chromadb
-from sentence_transformers import SentenceTransformer
-
-
-# Add the parent directory to sys.path
-parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.append(parent_dir)
-
-
-from config.embedding_config import model_name, db_directory, collection_name
-
-
-# Initialize the ChromaDB persistent client
-client = chromadb.PersistentClient(path=db_directory)
-
-# Get the collection
-collection = client.get_collection(name=collection_name)
-
-# Define your query text
-query_text = "I am looking for books about war"
-
-# Initialize the embedding model (same as used during indexing)
-embedding_model = SentenceTransformer(model_name)
-
-# Embed the query text
-query_embedding = embedding_model.encode(query_text)
-
-# Perform the query
-results = collection.query(
-    query_embeddings=[query_embedding],
-    n_results=5  # Number of results to retrieve
-)
-
-# Print out the results
-print("\n--- Query Results ---\n")
-for idx, (doc, metadata, distance) in enumerate(zip(results['documents'][0], results['metadatas'][0], results['distances'][0])):
-    print(f"Result {idx + 1}:")
-    print(f"Document ID: {metadata.get('chunk_id', 'N/A')}")
-    print(f"File Name: {metadata.get('file_name', 'N/A')}")
-    print(f"Distance: {distance}")
-    print(f"Content:\n{doc}\n")
-    print("-" * 80)