mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Refactor document operations into DocumentService
Refactored Program.cs to use AddAzureSql with new options. Added VectorSearchService and DocumentService as scoped services. Updated documentsApiGroup to use DocumentService for document operations and added a delete document endpoint. Moved document-related methods from VectorSearchService to new DocumentService for better separation of concerns.
This commit is contained in:
@@ -26,7 +26,7 @@ builder.Services.ConfigureHttpJsonOptions(options =>
|
|||||||
|
|
||||||
builder.Services.AddSingleton(TimeProvider.System);
|
builder.Services.AddSingleton(TimeProvider.System);
|
||||||
|
|
||||||
builder.Services.AddSqlServer<ApplicationDbContext>(builder.Configuration.GetConnectionString("SqlConnection"), options =>
|
builder.Services.AddAzureSql<ApplicationDbContext>(builder.Configuration.GetConnectionString("SqlConnection"), options =>
|
||||||
{
|
{
|
||||||
options.UseVectorSearch();
|
options.UseVectorSearch();
|
||||||
}, options =>
|
}, options =>
|
||||||
@@ -56,7 +56,9 @@ builder.Services.AddKernel()
|
|||||||
builder.Services.AddSingleton<TextChunkerService>();
|
builder.Services.AddSingleton<TextChunkerService>();
|
||||||
builder.Services.AddSingleton<TokenizerService>();
|
builder.Services.AddSingleton<TokenizerService>();
|
||||||
builder.Services.AddSingleton<ChatService>();
|
builder.Services.AddSingleton<ChatService>();
|
||||||
|
|
||||||
builder.Services.AddScoped<VectorSearchService>();
|
builder.Services.AddScoped<VectorSearchService>();
|
||||||
|
builder.Services.AddScoped<DocumentService>();
|
||||||
|
|
||||||
builder.Services.AddKeyedSingleton<IContentDecoder, PdfContentDecoder>(MediaTypeNames.Application.Pdf);
|
builder.Services.AddKeyedSingleton<IContentDecoder, PdfContentDecoder>(MediaTypeNames.Application.Pdf);
|
||||||
builder.Services.AddKeyedSingleton<IContentDecoder, DocxContentDecoder>("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
|
builder.Services.AddKeyedSingleton<IContentDecoder, DocxContentDecoder>("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
|
||||||
@@ -96,24 +98,24 @@ app.UseSwaggerUI(options =>
|
|||||||
|
|
||||||
var documentsApiGroup = app.MapGroup("/api/documents").WithTags("Documents");
|
var documentsApiGroup = app.MapGroup("/api/documents").WithTags("Documents");
|
||||||
|
|
||||||
documentsApiGroup.MapGet(string.Empty, async (VectorSearchService vectorSearchService) =>
|
documentsApiGroup.MapGet(string.Empty, async (DocumentService documentService) =>
|
||||||
{
|
{
|
||||||
var documents = await vectorSearchService.GetDocumentsAsync();
|
var documents = await documentService.GetDocumentsAsync();
|
||||||
return TypedResults.Ok(documents);
|
return TypedResults.Ok(documents);
|
||||||
})
|
})
|
||||||
.WithSummary("Gets the list of documents");
|
.WithSummary("Gets the list of documents");
|
||||||
|
|
||||||
documentsApiGroup.MapGet("{documentId:guid}/chunks", async (Guid documentId, VectorSearchService vectorSearchService) =>
|
documentsApiGroup.MapGet("{documentId:guid}/chunks", async (Guid documentId, DocumentService documentService) =>
|
||||||
{
|
{
|
||||||
var documents = await vectorSearchService.GetDocumentChunksAsync(documentId);
|
var documents = await documentService.GetDocumentChunksAsync(documentId);
|
||||||
return TypedResults.Ok(documents);
|
return TypedResults.Ok(documents);
|
||||||
})
|
})
|
||||||
.WithSummary("Gets the list of chunks of a given document")
|
.WithSummary("Gets the list of chunks of a given document")
|
||||||
.WithDescription("The list does not contain embedding. Use '/api/documents/{documentId}/chunks/{documentChunkId}' to get the embedding for a given chunk.");
|
.WithDescription("The list does not contain embedding. Use '/api/documents/{documentId}/chunks/{documentChunkId}' to get the embedding for a given chunk.");
|
||||||
|
|
||||||
documentsApiGroup.MapGet("{documentId:guid}/chunks/{documentChunkId:guid}", async Task<Results<Ok<DocumentChunk>, NotFound>> (Guid documentId, Guid documentChunkId, VectorSearchService vectorSearchService) =>
|
documentsApiGroup.MapGet("{documentId:guid}/chunks/{documentChunkId:guid}", async Task<Results<Ok<DocumentChunk>, NotFound>> (Guid documentId, Guid documentChunkId, DocumentService documentService) =>
|
||||||
{
|
{
|
||||||
var chunk = await vectorSearchService.GetDocumentChunkEmbeddingAsync(documentId, documentChunkId);
|
var chunk = await documentService.GetDocumentChunkEmbeddingAsync(documentId, documentChunkId);
|
||||||
if (chunk is null)
|
if (chunk is null)
|
||||||
{
|
{
|
||||||
return TypedResults.NotFound();
|
return TypedResults.NotFound();
|
||||||
@@ -124,6 +126,14 @@ documentsApiGroup.MapGet("{documentId:guid}/chunks/{documentChunkId:guid}", asyn
|
|||||||
.ProducesProblem(StatusCodes.Status404NotFound)
|
.ProducesProblem(StatusCodes.Status404NotFound)
|
||||||
.WithSummary("Gets the details of a given chunk, includings its embedding");
|
.WithSummary("Gets the details of a given chunk, includings its embedding");
|
||||||
|
|
||||||
|
documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, DocumentService documentService) =>
|
||||||
|
{
|
||||||
|
await documentService.DeleteDocumentAsync(documentId);
|
||||||
|
return TypedResults.NoContent();
|
||||||
|
})
|
||||||
|
.WithSummary("Deletes a document")
|
||||||
|
.WithDescription("This endpoint deletes the document and all its chunks.");
|
||||||
|
|
||||||
documentsApiGroup.MapPost(string.Empty, async (IFormFile file, VectorSearchService vectorSearchService,
|
documentsApiGroup.MapPost(string.Empty, async (IFormFile file, VectorSearchService vectorSearchService,
|
||||||
[Description("The unique identifier of the document. If not provided, a new one will be generated. If you specify an existing documentId, the corresponding document will be overwritten.")] Guid? documentId = null) =>
|
[Description("The unique identifier of the document. If not provided, a new one will be generated. If you specify an existing documentId, the corresponding document will be overwritten.")] Guid? documentId = null) =>
|
||||||
{
|
{
|
||||||
@@ -137,14 +147,6 @@ documentsApiGroup.MapPost(string.Empty, async (IFormFile file, VectorSearchServi
|
|||||||
.WithSummary("Uploads a document")
|
.WithSummary("Uploads a document")
|
||||||
.WithDescription("Uploads a document to SQL Database and saves its embedding using the native VECTOR type. The document will be indexed and used to answer questions. Currently, PDF, DOCX and TXT files are supported.");
|
.WithDescription("Uploads a document to SQL Database and saves its embedding using the native VECTOR type. The document will be indexed and used to answer questions. Currently, PDF, DOCX and TXT files are supported.");
|
||||||
|
|
||||||
documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, VectorSearchService vectorSearchService) =>
|
|
||||||
{
|
|
||||||
await vectorSearchService.DeleteDocumentAsync(documentId);
|
|
||||||
return TypedResults.NoContent();
|
|
||||||
})
|
|
||||||
.WithSummary("Deletes a document")
|
|
||||||
.WithDescription("This endpoint deletes the document and all its chunks.");
|
|
||||||
|
|
||||||
app.MapPost("/api/ask", async (Question question, VectorSearchService vectorSearchService,
|
app.MapPost("/api/ask", async (Question question, VectorSearchService vectorSearchService,
|
||||||
[Description("If true, the question will be reformulated taking into account the context of the chat identified by the given ConversationId.")] bool reformulate = true) =>
|
[Description("If true, the question will be reformulated taking into account the context of the chat identified by the given ConversationId.")] bool reformulate = true) =>
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
using System.Data;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using SqlDatabaseVectorSearch.DataAccessLayer;
|
||||||
|
using SqlDatabaseVectorSearch.Models;
|
||||||
|
|
||||||
|
namespace SqlDatabaseVectorSearch.Services;
|
||||||
|
|
||||||
|
public class DocumentService(ApplicationDbContext dbContext)
|
||||||
|
{
|
||||||
|
public async Task<IEnumerable<Document>> GetDocumentsAsync()
|
||||||
|
{
|
||||||
|
var documents = await dbContext.Documents.OrderBy(d => d.Name)
|
||||||
|
.Select(d => new Document(d.Id, d.Name, d.CreationDate, d.Chunks.Count))
|
||||||
|
.ToListAsync();
|
||||||
|
|
||||||
|
return documents;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<IEnumerable<DocumentChunk>> GetDocumentChunksAsync(Guid documentId)
|
||||||
|
{
|
||||||
|
var documentChunks = await dbContext.DocumentChunks.Where(c => c.DocumentId == documentId).OrderBy(c => c.Index)
|
||||||
|
.Select(c => new DocumentChunk(c.Id, c.Index, c.Content, null))
|
||||||
|
.ToListAsync();
|
||||||
|
|
||||||
|
return documentChunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<DocumentChunk?> GetDocumentChunkEmbeddingAsync(Guid documentId, Guid documentChunkId)
|
||||||
|
{
|
||||||
|
var documentChunk = await dbContext.DocumentChunks.Where(c => c.Id == documentChunkId && c.DocumentId == documentId)
|
||||||
|
.Select(c => new DocumentChunk(c.Id, c.Index, c.Content, c.Embedding))
|
||||||
|
.FirstOrDefaultAsync();
|
||||||
|
|
||||||
|
return documentChunk;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task DeleteDocumentAsync(Guid documentId)
|
||||||
|
=> dbContext.Documents.Where(d => d.Id == documentId).ExecuteDeleteAsync();
|
||||||
|
}
|
||||||
@@ -10,7 +10,7 @@ using Entities = SqlDatabaseVectorSearch.DataAccessLayer.Entities;
|
|||||||
|
|
||||||
namespace SqlDatabaseVectorSearch.Services;
|
namespace SqlDatabaseVectorSearch.Services;
|
||||||
|
|
||||||
public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDbContext dbContext, ITextEmbeddingGenerationService textEmbeddingGenerationService, TokenizerService tokenizerService, TextChunkerService textChunkerService, ChatService chatService, TimeProvider timeProvider, IOptions<AppSettings> appSettingsOptions, ILogger<VectorSearchService> logger)
|
public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDbContext dbContext, DocumentService documentService, ITextEmbeddingGenerationService textEmbeddingGenerationService, TokenizerService tokenizerService, TextChunkerService textChunkerService, ChatService chatService, TimeProvider timeProvider, IOptions<AppSettings> appSettingsOptions, ILogger<VectorSearchService> logger)
|
||||||
{
|
{
|
||||||
private readonly AppSettings appSettings = appSettingsOptions.Value;
|
private readonly AppSettings appSettings = appSettingsOptions.Value;
|
||||||
|
|
||||||
@@ -23,12 +23,15 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
|
|||||||
// We get the token count of the whole document because it is the total number of token used by embedding (it may be necessary, for example, for cost analysis).
|
// We get the token count of the whole document because it is the total number of token used by embedding (it may be necessary, for example, for cost analysis).
|
||||||
var tokenCount = tokenizerService.CountEmbeddingTokens(content);
|
var tokenCount = tokenizerService.CountEmbeddingTokens(content);
|
||||||
|
|
||||||
|
var strategy = dbContext.Database.CreateExecutionStrategy();
|
||||||
|
var document = await strategy.ExecuteAsync(async () =>
|
||||||
|
{
|
||||||
await dbContext.Database.BeginTransactionAsync();
|
await dbContext.Database.BeginTransactionAsync();
|
||||||
|
|
||||||
if (documentId.HasValue)
|
if (documentId.HasValue)
|
||||||
{
|
{
|
||||||
// If the user is importing a document that already exists, delete the previous one.
|
// If the user is importing a document that already exists, delete the previous one.
|
||||||
await DeleteDocumentAsync(documentId.Value);
|
await documentService.DeleteDocumentAsync(documentId.Value);
|
||||||
}
|
}
|
||||||
|
|
||||||
var document = new Entities.Document { Id = documentId.GetValueOrDefault(), Name = name, CreationDate = timeProvider.GetUtcNow() };
|
var document = new Entities.Document { Id = documentId.GetValueOrDefault(), Name = name, CreationDate = timeProvider.GetUtcNow() };
|
||||||
@@ -50,39 +53,12 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
|
|||||||
await dbContext.SaveChangesAsync();
|
await dbContext.SaveChangesAsync();
|
||||||
await dbContext.Database.CommitTransactionAsync();
|
await dbContext.Database.CommitTransactionAsync();
|
||||||
|
|
||||||
|
return document;
|
||||||
|
});
|
||||||
|
|
||||||
return new(document.Id, tokenCount);
|
return new(document.Id, tokenCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task<IEnumerable<Document>> GetDocumentsAsync()
|
|
||||||
{
|
|
||||||
var documents = await dbContext.Documents.OrderBy(d => d.Name)
|
|
||||||
.Select(d => new Document(d.Id, d.Name, d.CreationDate, d.Chunks.Count))
|
|
||||||
.ToListAsync();
|
|
||||||
|
|
||||||
return documents;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<IEnumerable<DocumentChunk>> GetDocumentChunksAsync(Guid documentId)
|
|
||||||
{
|
|
||||||
var documentChunks = await dbContext.DocumentChunks.Where(c => c.DocumentId == documentId).OrderBy(c => c.Index)
|
|
||||||
.Select(c => new DocumentChunk(c.Id, c.Index, c.Content, null))
|
|
||||||
.ToListAsync();
|
|
||||||
|
|
||||||
return documentChunks;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<DocumentChunk?> GetDocumentChunkEmbeddingAsync(Guid documentId, Guid documentChunkId)
|
|
||||||
{
|
|
||||||
var documentChunk = await dbContext.DocumentChunks.Where(c => c.Id == documentChunkId && c.DocumentId == documentId)
|
|
||||||
.Select(c => new DocumentChunk(c.Id, c.Index, c.Content, c.Embedding))
|
|
||||||
.FirstOrDefaultAsync();
|
|
||||||
|
|
||||||
return documentChunk;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Task DeleteDocumentAsync(Guid documentId)
|
|
||||||
=> dbContext.Documents.Where(d => d.Id == documentId).ExecuteDeleteAsync();
|
|
||||||
|
|
||||||
public async Task<QuestionResponse> AskQuestionAsync(Question question, bool reformulate = true)
|
public async Task<QuestionResponse> AskQuestionAsync(Question question, bool reformulate = true)
|
||||||
{
|
{
|
||||||
// It the user doesn't want to reforulate the question, CreateContextAsync returns the original one.
|
// It the user doesn't want to reforulate the question, CreateContextAsync returns the original one.
|
||||||
|
|||||||
Reference in New Issue
Block a user