From 0766103b9a3e373cee00aec3607f48547a46bc45 Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Wed, 4 Jun 2025 11:42:24 +0200 Subject: [PATCH] Refactor ChatService and VectorSearchService parameters Updated parameter types in ChatService and VectorSearchService from IEnumerable to IEnumerable for better structure. Enhanced citation formatting rules in ChatService. Increased MaxRelevantChunks and MaxInputTokens in appsettings.json to improve processing capabilities. --- .../Services/ChatService.cs | 53 ++++++++++++++++--- .../Services/VectorSearchService.cs | 5 +- SqlDatabaseVectorSearch/appsettings.json | 4 +- 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/SqlDatabaseVectorSearch/Services/ChatService.cs b/SqlDatabaseVectorSearch/Services/ChatService.cs index 049c08f..4e8d0fa 100644 --- a/SqlDatabaseVectorSearch/Services/ChatService.cs +++ b/SqlDatabaseVectorSearch/Services/ChatService.cs @@ -7,6 +7,7 @@ using Microsoft.SemanticKernel.Connectors.AzureOpenAI; using OpenAI.Chat; using SqlDatabaseVectorSearch.Models; using SqlDatabaseVectorSearch.Settings; +using Entities = SqlDatabaseVectorSearch.Data.Entities; namespace SqlDatabaseVectorSearch.Services; @@ -41,7 +42,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer return new(reformulatedQuestion.Content!, tokenUsage); } - public async Task AskQuestionAsync(Guid conversationId, IEnumerable chunks, string question, CancellationToken cancellationToken = default) + public async Task AskQuestionAsync(Guid conversationId, IEnumerable chunks, string question, CancellationToken cancellationToken = default) { var chat = CreateChatAsync(chunks, question); @@ -59,7 +60,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer return new(answer.Content!, tokenUsage); } - public async IAsyncEnumerable AskStreamingAsync(Guid conversationId, IEnumerable chunks, string question, [EnumeratorCancellation] CancellationToken cancellationToken = default) + public async IAsyncEnumerable AskStreamingAsync(Guid conversationId, IEnumerable chunks, string question, [EnumeratorCancellation] CancellationToken cancellationToken = default) { var chat = CreateChatAsync(chunks, question); @@ -110,19 +111,57 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer return null; } - private ChatHistory CreateChatAsync(IEnumerable chunks, string question) + private ChatHistory CreateChatAsync(IEnumerable chunks, string question) { var chat = new ChatHistory(""" You can use only the information provided in this chat to answer questions. If you don't know the answer, reply suggesting to refine the question. + For example, if the user asks "What is the capital of France?" and in this chat there isn't information about France, you should reply something like: - This information isn't available in the given context - I'm sorry, I don't know the answer to that question - I don't have that information - I don't know - Given the context, I can't answer that question - - I'my sorry, I don't have enough information to answer that question - Never answer to questions that are not related to this chat. - You must answer in the same language of the user's question. For example, it the user asks a question in English, the answer must be in English. + - I'm sorry, I don't have enough information to answer that question + + Never answer questions that are not related to this chat. + You must answer in the same language as the user's question. + + IMPORTANT - CITATION PLACEMENT AND LENGTH: + The quote in each MUST be MAXIMUM 5 words, taken word-for-word from the search result. If the quote is longer than 5 words, your answer is INVALID. + When you find an answer, you MUST place ALL citations ONLY at the very end of your response, never inside or between sentences. + First provide your complete answer, then add a blank line, then list all citations. + + Use this XML format for citations: + exact quote here + + STRICT RULES for citations: + - Citations MUST NEVER appear inside, before, or between sentences of your answer. They MUST be grouped together ONLY at the end, after a blank line. + - If you include citations anywhere except at the end, your answer is WRONG and INVALID. + - Always include the citation(s) if there are results. If you don't know the answer, do NOT include citations. + - The quote must be max 5 words, taken word-for-word from the search result, and is the basis for why the citation is relevant. If the quote is longer than 5 words, your answer is INVALID. + - Do NOT refer to the presence of citations; just emit these tags right at the end, with no surrounding text. + - The citations must always be in a list at the end of the response, one after the other. Never add the citations between the actual response text or inside sentences. + - Do NOT add any text after the citations. + - ALWAYS leave a blank line between your answer and the first citation. + + Examples (CORRECT): + Here is my complete answer to your question. I'm providing all the information based on the context. + + Paris is the capital + largest city in France + + Examples (WRONG): + Here is my answer Paris is the capital of France and is known for the Eiffel Tower with more text. + Paris is the capital of France and is known for the Eiffel Tower Here is my answer. + Here is my answer. (without any citations when information is available) + Here is my answer. + Paris is the capital of France and is known for the Eiffel Tower More answer text. + + YOU MUST SEPARATE YOUR ANSWER FROM CITATIONS WITH A BLANK LINE. + NEVER INSERT CITATIONS WITHIN YOUR ANSWER TEXT. + CITATIONS MUST ONLY APPEAR AT THE END, AFTER A BLANK LINE. + IF YOU DO NOT FOLLOW THESE RULES, YOUR RESPONSE IS INVALID. """); var prompt = new StringBuilder($""" @@ -141,7 +180,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer foreach (var chunk in chunks) { - var text = $"---{Environment.NewLine}{chunk}"; + var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id}) {Environment.NewLine}{chunk.Content}{Environment.NewLine}"; var tokenCount = tokenizerService.CountChatCompletionTokens(text); if (tokenCount > availableTokens) diff --git a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs index d2819ac..beda3cb 100644 --- a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs +++ b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs @@ -96,7 +96,7 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb } } - private async Task<(ChatResponse ReformulatedQuestion, int EmbeddingTokenCount, IEnumerable Chunks)> CreateContextAsync(Question question, bool reformulate, CancellationToken cancellationToken) + private async Task<(ChatResponse ReformulatedQuestion, int EmbeddingTokenCount, IEnumerable Chunks)> CreateContextAsync(Question question, bool reformulate, CancellationToken cancellationToken) { // Reformulate the question taking into account the context of the chat to perform keyword search and embeddings. var reformulatedQuestion = reformulate ? await chatService.CreateQuestionAsync(question.ConversationId, question.Text, cancellationToken) : new(question.Text); @@ -107,9 +107,8 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb // Perform Vector Search on SQL Database. var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken); - var chunks = await dbContext.DocumentChunks + var chunks = await dbContext.DocumentChunks.Include(c => c.Document) .OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray())) - .Select(c => c.Content) .Take(appSettings.MaxRelevantChunks) .ToListAsync(cancellationToken); diff --git a/SqlDatabaseVectorSearch/appsettings.json b/SqlDatabaseVectorSearch/appsettings.json index 52ff3cf..10a1967 100644 --- a/SqlDatabaseVectorSearch/appsettings.json +++ b/SqlDatabaseVectorSearch/appsettings.json @@ -23,8 +23,8 @@ "MaxTokensPerLine": 300, "MaxTokensPerParagraph": 1000, "OverlapTokens": 100, - "MaxRelevantChunks": 10, - "MaxInputTokens": 16384, + "MaxRelevantChunks": 50, + "MaxInputTokens": 32768, "MaxOutputTokens": 800, "MessageExpiration": "00:05:00", "MessageLimit": 20