From 0766103b9a3e373cee00aec3607f48547a46bc45 Mon Sep 17 00:00:00 2001
From: Marco Minerva <marco.minerva@gmail.com>
Date: Wed, 4 Jun 2025 11:42:24 +0200
Subject: [PATCH] Refactor ChatService and VectorSearchService parameters

Updated parameter types in ChatService and VectorSearchService from IEnumerable<string> to IEnumerable<Entities.DocumentChunk> for better structure. Enhanced citation formatting rules in ChatService. Increased MaxRelevantChunks and MaxInputTokens in appsettings.json to improve processing capabilities.
---
 .../Services/ChatService.cs                   | 53 ++++++++++++++++---
 .../Services/VectorSearchService.cs           |  5 +-
 SqlDatabaseVectorSearch/appsettings.json      |  4 +-
 3 files changed, 50 insertions(+), 12 deletions(-)
diff --git a/SqlDatabaseVectorSearch/Services/ChatService.cs b/SqlDatabaseVectorSearch/Services/ChatService.cs
index 049c08f..4e8d0fa 100644
--- a/SqlDatabaseVectorSearch/Services/ChatService.cs
+++ b/SqlDatabaseVectorSearch/Services/ChatService.cs
@@ -7,6 +7,7 @@ using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
 using OpenAI.Chat;
 using SqlDatabaseVectorSearch.Models;
 using SqlDatabaseVectorSearch.Settings;
+using Entities = SqlDatabaseVectorSearch.Data.Entities;
 
 namespace SqlDatabaseVectorSearch.Services;
 
@@ -41,7 +42,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
         return new(reformulatedQuestion.Content!, tokenUsage);
     }
 
-    public async Task<ChatResponse> AskQuestionAsync(Guid conversationId, IEnumerable<string> chunks, string question, CancellationToken cancellationToken = default)
+    public async Task<ChatResponse> AskQuestionAsync(Guid conversationId, IEnumerable<Entities.DocumentChunk> chunks, string question, CancellationToken cancellationToken = default)
     {
         var chat = CreateChatAsync(chunks, question);
 
@@ -59,7 +60,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
         return new(answer.Content!, tokenUsage);
     }
 
-    public async IAsyncEnumerable<ChatResponse> AskStreamingAsync(Guid conversationId, IEnumerable<string> chunks, string question, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    public async IAsyncEnumerable<ChatResponse> AskStreamingAsync(Guid conversationId, IEnumerable<Entities.DocumentChunk> chunks, string question, [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
         var chat = CreateChatAsync(chunks, question);
 
@@ -110,19 +111,57 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
         return null;
     }
 
-    private ChatHistory CreateChatAsync(IEnumerable<string> chunks, string question)
+    private ChatHistory CreateChatAsync(IEnumerable<Entities.DocumentChunk> chunks, string question)
     {
         var chat = new ChatHistory("""
             You can use only the information provided in this chat to answer questions. If you don't know the answer, reply suggesting to refine the question.
+
             For example, if the user asks "What is the capital of France?" and in this chat there isn't information about France, you should reply something like:
             - This information isn't available in the given context
             - I'm sorry, I don't know the answer to that question
             - I don't have that information
             - I don't know
             - Given the context, I can't answer that question
-            - I'my sorry, I don't have enough information to answer that question
-            Never answer to questions that are not related to this chat.
-            You must answer in the same language of the user's question. For example, it the user asks a question in English, the answer must be in English.
+            - I'm sorry, I don't have enough information to answer that question
+
+            Never answer questions that are not related to this chat.
+            You must answer in the same language as the user's question.
+
+            IMPORTANT - CITATION PLACEMENT AND LENGTH:
+            The quote in each <citation> MUST be MAXIMUM 5 words, taken word-for-word from the search result. If the quote is longer than 5 words, your answer is INVALID.
+            When you find an answer, you MUST place ALL citations ONLY at the very end of your response, never inside or between sentences.
+            First provide your complete answer, then add a blank line, then list all citations.
+            
+            Use this XML format for citations:
+            <citation filename='string' page_number='1'>exact quote here</citation>
+
+            STRICT RULES for citations:
+            - Citations MUST NEVER appear inside, before, or between sentences of your answer. They MUST be grouped together ONLY at the end, after a blank line.
+            - If you include citations anywhere except at the end, your answer is WRONG and INVALID.
+            - Always include the citation(s) if there are results. If you don't know the answer, do NOT include citations.
+            - The quote must be max 5 words, taken word-for-word from the search result, and is the basis for why the citation is relevant. If the quote is longer than 5 words, your answer is INVALID.
+            - Do NOT refer to the presence of citations; just emit these tags right at the end, with no surrounding text.
+            - The citations must always be in a list at the end of the response, one after the other. Never add the citations between the actual response text or inside sentences.
+            - Do NOT add any text after the citations.
+            - ALWAYS leave a blank line between your answer and the first citation.
+
+            Examples (CORRECT):
+            Here is my complete answer to your question. I'm providing all the information based on the context.
+
+            <citation filename='doc1.pdf' page_number='1'>Paris is the capital</citation>
+            <citation filename='doc2.pdf' page_number='2'>largest city in France</citation>
+
+            Examples (WRONG):
+            Here is my answer <citation filename='doc1.pdf' page_number='1'>Paris is the capital of France and is known for the Eiffel Tower</citation> with more text.
+            <citation filename='doc1.pdf' page_number='1'>Paris is the capital of France and is known for the Eiffel Tower</citation> Here is my answer.
+            Here is my answer. (without any citations when information is available)
+            Here is my answer.
+            <citation filename='doc1.pdf' page_number='1'>Paris is the capital of France and is known for the Eiffel Tower</citation> More answer text.
+
+            YOU MUST SEPARATE YOUR ANSWER FROM CITATIONS WITH A BLANK LINE.
+            NEVER INSERT CITATIONS WITHIN YOUR ANSWER TEXT.
+            CITATIONS MUST ONLY APPEAR AT THE END, AFTER A BLANK LINE.
+            IF YOU DO NOT FOLLOW THESE RULES, YOUR RESPONSE IS INVALID.
             """);
 
         var prompt = new StringBuilder($"""
@@ -141,7 +180,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
 
         foreach (var chunk in chunks)
         {
-            var text = $"---{Environment.NewLine}{chunk}";
+            var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id}) {Environment.NewLine}{chunk.Content}{Environment.NewLine}";
 
             var tokenCount = tokenizerService.CountChatCompletionTokens(text);
             if (tokenCount > availableTokens)
diff --git a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs
index d2819ac..beda3cb 100644
--- a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs
+++ b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs
@@ -96,7 +96,7 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
         }
     }
 
-    private async Task<(ChatResponse ReformulatedQuestion, int EmbeddingTokenCount, IEnumerable<string> Chunks)> CreateContextAsync(Question question, bool reformulate, CancellationToken cancellationToken)
+    private async Task<(ChatResponse ReformulatedQuestion, int EmbeddingTokenCount, IEnumerable<Entities.DocumentChunk> Chunks)> CreateContextAsync(Question question, bool reformulate, CancellationToken cancellationToken)
     {
         // Reformulate the question taking into account the context of the chat to perform keyword search and embeddings.
         var reformulatedQuestion = reformulate ? await chatService.CreateQuestionAsync(question.ConversationId, question.Text, cancellationToken) : new(question.Text);
@@ -107,9 +107,8 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
         // Perform Vector Search on SQL Database.
         var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken);
 
-        var chunks = await dbContext.DocumentChunks
+        var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
                     .OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray()))
-                    .Select(c => c.Content)
                     .Take(appSettings.MaxRelevantChunks)
                     .ToListAsync(cancellationToken);
 
diff --git a/SqlDatabaseVectorSearch/appsettings.json b/SqlDatabaseVectorSearch/appsettings.json
index 52ff3cf..10a1967 100644
--- a/SqlDatabaseVectorSearch/appsettings.json
+++ b/SqlDatabaseVectorSearch/appsettings.json
@@ -23,8 +23,8 @@
         "MaxTokensPerLine": 300,
         "MaxTokensPerParagraph": 1000,
         "OverlapTokens": 100,
-        "MaxRelevantChunks": 10,
-        "MaxInputTokens": 16384,
+        "MaxRelevantChunks": 50,
+        "MaxInputTokens": 32768,
         "MaxOutputTokens": 800,
         "MessageExpiration": "00:05:00",
         "MessageLimit": 20