mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Refactor and enhance config management
Refactored code to centralize configuration access through a single `AppSettings` instance in `ChatService` and `VectorSearchService`, improving maintainability and reducing verbosity. Introduced new configuration settings (`MaxTokensPerLine`, `MaxTokensPerParagraph`, `OverlapTokens`, `MaxChunksCount`) in `AppSettings.cs` and `appsettings.json` for enhanced flexibility in content processing. Adjusted existing settings usage (`MessageLimit`, `MessageExpiration`) to align with the new access method, and removed obsolete settings (`StoragePath`, `VectorDbPath`, `QueuePath`). These changes simplify the codebase, make the application more configurable and adaptable to different content characteristics, and allow for more controlled vector search operations.
This commit is contained in:
@@ -9,6 +9,8 @@ namespace SqlDatabaseVectorSearch.Services;
|
|||||||
|
|
||||||
public class ChatService(IMemoryCache cache, IChatCompletionService chatCompletionService, IOptions<AppSettings> appSettingsOptions)
|
public class ChatService(IMemoryCache cache, IChatCompletionService chatCompletionService, IOptions<AppSettings> appSettingsOptions)
|
||||||
{
|
{
|
||||||
|
private readonly AppSettings appSettings = appSettingsOptions.Value;
|
||||||
|
|
||||||
public async Task<string> CreateQuestionAsync(Guid conversationId, string question)
|
public async Task<string> CreateQuestionAsync(Guid conversationId, string question)
|
||||||
{
|
{
|
||||||
var chat = new ChatHistory(cache.Get<ChatHistory?>(conversationId) ?? []);
|
var chat = new ChatHistory(cache.Get<ChatHistory?>(conversationId) ?? []);
|
||||||
@@ -77,12 +79,12 @@ public class ChatService(IMemoryCache cache, IChatCompletionService chatCompleti
|
|||||||
|
|
||||||
private Task UpdateCacheAsync(Guid conversationId, ChatHistory chat)
|
private Task UpdateCacheAsync(Guid conversationId, ChatHistory chat)
|
||||||
{
|
{
|
||||||
if (chat.Count > appSettingsOptions.Value.MessageLimit)
|
if (chat.Count > appSettings.MessageLimit)
|
||||||
{
|
{
|
||||||
chat = new ChatHistory(chat.TakeLast(appSettingsOptions.Value.MessageLimit));
|
chat = new ChatHistory(chat.TakeLast(appSettings.MessageLimit));
|
||||||
}
|
}
|
||||||
|
|
||||||
cache.Set(conversationId, chat, appSettingsOptions.Value.MessageExpiration);
|
cache.Set(conversationId, chat, appSettings.MessageExpiration);
|
||||||
return Task.CompletedTask;
|
return Task.CompletedTask;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,17 +1,21 @@
|
|||||||
using System.Text;
|
using System.Text;
|
||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
using Microsoft.SemanticKernel.Embeddings;
|
using Microsoft.SemanticKernel.Embeddings;
|
||||||
using Microsoft.SemanticKernel.Text;
|
using Microsoft.SemanticKernel.Text;
|
||||||
using SqlDatabaseVectorSearch.DataAccessLayer;
|
using SqlDatabaseVectorSearch.DataAccessLayer;
|
||||||
using SqlDatabaseVectorSearch.Models;
|
using SqlDatabaseVectorSearch.Models;
|
||||||
|
using SqlDatabaseVectorSearch.Settings;
|
||||||
using UglyToad.PdfPig;
|
using UglyToad.PdfPig;
|
||||||
using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
|
using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
|
||||||
using Entities = SqlDatabaseVectorSearch.DataAccessLayer.Entities;
|
using Entities = SqlDatabaseVectorSearch.DataAccessLayer.Entities;
|
||||||
|
|
||||||
namespace SqlDatabaseVectorSearch.Services;
|
namespace SqlDatabaseVectorSearch.Services;
|
||||||
|
|
||||||
public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingGenerationService textEmbeddingGenerationService, ChatService chatService)
|
public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingGenerationService textEmbeddingGenerationService, ChatService chatService, IOptions<AppSettings> appSettingsOptions)
|
||||||
{
|
{
|
||||||
|
private readonly AppSettings appSettings = appSettingsOptions.Value;
|
||||||
|
|
||||||
public async Task<Guid> ImportAsync(Stream stream, string name, Guid? documentId)
|
public async Task<Guid> ImportAsync(Stream stream, string name, Guid? documentId)
|
||||||
{
|
{
|
||||||
// Extract the contents of the file (current, only PDF are supported).
|
// Extract the contents of the file (current, only PDF are supported).
|
||||||
@@ -31,8 +35,8 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
|||||||
var document = new Entities.Document { Id = documentId.Value, Name = name, CreationDate = DateTimeOffset.UtcNow };
|
var document = new Entities.Document { Id = documentId.Value, Name = name, CreationDate = DateTimeOffset.UtcNow };
|
||||||
dbContext.Documents.Add(document);
|
dbContext.Documents.Add(document);
|
||||||
|
|
||||||
// Split the content into chunks of at most 1024 tokens and generate the embeddings for each one.
|
// Split the content into chunks and generate the embeddings for each one.
|
||||||
var paragraphs = TextChunker.SplitPlainTextParagraphs(TextChunker.SplitPlainTextLines(content, 300), 1024, 100);
|
var paragraphs = TextChunker.SplitPlainTextParagraphs(TextChunker.SplitPlainTextLines(content, appSettings.MaxTokensPerLine), appSettings.MaxTokensPerParagraph, appSettings.OverlapTokens);
|
||||||
var embeddings = await textEmbeddingGenerationService.GenerateEmbeddingsAsync(paragraphs);
|
var embeddings = await textEmbeddingGenerationService.GenerateEmbeddingsAsync(paragraphs);
|
||||||
|
|
||||||
foreach (var (paragraph, embedding) in paragraphs.Zip(embeddings, (p, e) => (p, e.ToArray())))
|
foreach (var (paragraph, embedding) in paragraphs.Zip(embeddings, (p, e) => (p, e.ToArray())))
|
||||||
@@ -70,7 +74,7 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
|||||||
|
|
||||||
var chunks = await dbContext.DocumentChunks
|
var chunks = await dbContext.DocumentChunks
|
||||||
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray()))
|
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray()))
|
||||||
.Take(5)
|
.Take(appSettings.MaxChunksCount)
|
||||||
.ToListAsync();
|
.ToListAsync();
|
||||||
|
|
||||||
var answer = await chatService.AskQuestionAsync(question.ConversationId, chunks, reformulatedQuestion);
|
var answer = await chatService.AskQuestionAsync(question.ConversationId, chunks, reformulatedQuestion);
|
||||||
|
|||||||
@@ -2,13 +2,15 @@
|
|||||||
|
|
||||||
public class AppSettings
|
public class AppSettings
|
||||||
{
|
{
|
||||||
|
public int MaxTokensPerLine { get; init; } = 300;
|
||||||
|
|
||||||
|
public int MaxTokensPerParagraph { get; init; } = 1024;
|
||||||
|
|
||||||
|
public int OverlapTokens { get; init; } = 100;
|
||||||
|
|
||||||
|
public int MaxChunksCount { get; init; } = 5;
|
||||||
|
|
||||||
public int MessageLimit { get; init; }
|
public int MessageLimit { get; init; }
|
||||||
|
|
||||||
public TimeSpan MessageExpiration { get; init; }
|
public TimeSpan MessageExpiration { get; init; }
|
||||||
|
|
||||||
public required string StoragePath { get; init; }
|
|
||||||
|
|
||||||
public required string VectorDbPath { get; init; }
|
|
||||||
|
|
||||||
public required string QueuePath { get; init; }
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"AppSettings": {
|
"AppSettings": {
|
||||||
|
"MaxTokenPerLine": 300,
|
||||||
|
"MaxTokensPerParagraph": 1024,
|
||||||
|
"OverlapTokens": 100,
|
||||||
|
"MaxChunksCount": 5,
|
||||||
"MessageLimit": 20,
|
"MessageLimit": 20,
|
||||||
"MessageExpiration": "00:05:00"
|
"MessageExpiration": "00:05:00"
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user