From fa58e02709f419292cede590e686d38321af626f Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Mon, 17 Jun 2024 11:58:30 +0200 Subject: [PATCH] Refactor and enhance config management Refactored code to centralize configuration access through a single `AppSettings` instance in `ChatService` and `VectorSearchService`, improving maintainability and reducing verbosity. Introduced new configuration settings (`MaxTokensPerLine`, `MaxTokensPerParagraph`, `OverlapTokens`, `MaxChunksCount`) in `AppSettings.cs` and `appsettings.json` for enhanced flexibility in content processing. Adjusted existing settings usage (`MessageLimit`, `MessageExpiration`) to align with the new access method, and removed obsolete settings (`StoragePath`, `VectorDbPath`, `QueuePath`). These changes simplify the codebase, make the application more configurable and adaptable to different content characteristics, and allow for more controlled vector search operations. --- SqlDatabaseVectorSearch/Services/ChatService.cs | 8 +++++--- .../Services/VectorSearchService.cs | 12 ++++++++---- SqlDatabaseVectorSearch/Settings/AppSettings.cs | 14 ++++++++------ SqlDatabaseVectorSearch/appsettings.json | 4 ++++ 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/SqlDatabaseVectorSearch/Services/ChatService.cs b/SqlDatabaseVectorSearch/Services/ChatService.cs index e39407e..7e47070 100644 --- a/SqlDatabaseVectorSearch/Services/ChatService.cs +++ b/SqlDatabaseVectorSearch/Services/ChatService.cs @@ -9,6 +9,8 @@ namespace SqlDatabaseVectorSearch.Services; public class ChatService(IMemoryCache cache, IChatCompletionService chatCompletionService, IOptions appSettingsOptions) { + private readonly AppSettings appSettings = appSettingsOptions.Value; + public async Task CreateQuestionAsync(Guid conversationId, string question) { var chat = new ChatHistory(cache.Get(conversationId) ?? []); @@ -77,12 +79,12 @@ public class ChatService(IMemoryCache cache, IChatCompletionService chatCompleti private Task UpdateCacheAsync(Guid conversationId, ChatHistory chat) { - if (chat.Count > appSettingsOptions.Value.MessageLimit) + if (chat.Count > appSettings.MessageLimit) { - chat = new ChatHistory(chat.TakeLast(appSettingsOptions.Value.MessageLimit)); + chat = new ChatHistory(chat.TakeLast(appSettings.MessageLimit)); } - cache.Set(conversationId, chat, appSettingsOptions.Value.MessageExpiration); + cache.Set(conversationId, chat, appSettings.MessageExpiration); return Task.CompletedTask; } } diff --git a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs index eb81657..8870ae4 100644 --- a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs +++ b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs @@ -1,17 +1,21 @@ using System.Text; using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Options; using Microsoft.SemanticKernel.Embeddings; using Microsoft.SemanticKernel.Text; using SqlDatabaseVectorSearch.DataAccessLayer; using SqlDatabaseVectorSearch.Models; +using SqlDatabaseVectorSearch.Settings; using UglyToad.PdfPig; using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor; using Entities = SqlDatabaseVectorSearch.DataAccessLayer.Entities; namespace SqlDatabaseVectorSearch.Services; -public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingGenerationService textEmbeddingGenerationService, ChatService chatService) +public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingGenerationService textEmbeddingGenerationService, ChatService chatService, IOptions appSettingsOptions) { + private readonly AppSettings appSettings = appSettingsOptions.Value; + public async Task ImportAsync(Stream stream, string name, Guid? documentId) { // Extract the contents of the file (current, only PDF are supported). @@ -31,8 +35,8 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG var document = new Entities.Document { Id = documentId.Value, Name = name, CreationDate = DateTimeOffset.UtcNow }; dbContext.Documents.Add(document); - // Split the content into chunks of at most 1024 tokens and generate the embeddings for each one. - var paragraphs = TextChunker.SplitPlainTextParagraphs(TextChunker.SplitPlainTextLines(content, 300), 1024, 100); + // Split the content into chunks and generate the embeddings for each one. + var paragraphs = TextChunker.SplitPlainTextParagraphs(TextChunker.SplitPlainTextLines(content, appSettings.MaxTokensPerLine), appSettings.MaxTokensPerParagraph, appSettings.OverlapTokens); var embeddings = await textEmbeddingGenerationService.GenerateEmbeddingsAsync(paragraphs); foreach (var (paragraph, embedding) in paragraphs.Zip(embeddings, (p, e) => (p, e.ToArray()))) @@ -70,7 +74,7 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG var chunks = await dbContext.DocumentChunks .OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray())) - .Take(5) + .Take(appSettings.MaxChunksCount) .ToListAsync(); var answer = await chatService.AskQuestionAsync(question.ConversationId, chunks, reformulatedQuestion); diff --git a/SqlDatabaseVectorSearch/Settings/AppSettings.cs b/SqlDatabaseVectorSearch/Settings/AppSettings.cs index e30b9c9..fcd9103 100644 --- a/SqlDatabaseVectorSearch/Settings/AppSettings.cs +++ b/SqlDatabaseVectorSearch/Settings/AppSettings.cs @@ -2,13 +2,15 @@ public class AppSettings { + public int MaxTokensPerLine { get; init; } = 300; + + public int MaxTokensPerParagraph { get; init; } = 1024; + + public int OverlapTokens { get; init; } = 100; + + public int MaxChunksCount { get; init; } = 5; + public int MessageLimit { get; init; } public TimeSpan MessageExpiration { get; init; } - - public required string StoragePath { get; init; } - - public required string VectorDbPath { get; init; } - - public required string QueuePath { get; init; } } diff --git a/SqlDatabaseVectorSearch/appsettings.json b/SqlDatabaseVectorSearch/appsettings.json index 84b9ab8..93786d3 100644 --- a/SqlDatabaseVectorSearch/appsettings.json +++ b/SqlDatabaseVectorSearch/appsettings.json @@ -15,6 +15,10 @@ } }, "AppSettings": { + "MaxTokenPerLine": 300, + "MaxTokensPerParagraph": 1024, + "OverlapTokens": 100, + "MaxChunksCount": 5, "MessageLimit": 20, "MessageExpiration": "00:05:00" },