mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Refactor code and enhance API documentation
- Converted `Question.cs` and `Search.cs` records to `record class` syntax for clarity. - Organized API endpoints with tags and added new GET and DELETE endpoints in `Program.cs`, including OpenAPI documentation improvements. - Removed commented-out code in `Program.cs` for cleaner codebase. - Introduced `WithTags` for better API operation categorization in Swagger UI. - Added a TODO comment in `ChatService.cs` for future improvement on chunk length check. - Clarified `using` directives in `VectorSearchService.cs` with namespace aliasing to improve readability. - Refactored document deletion in `VectorSearchService.cs` to use a private helper method and expanded service capabilities with a new `GetDocumentsAsync` method. - Introduced a new `Document` model in the `Models` namespace to support document fetching functionality. - Simplified `appsettings.json` by removing `MaxTokens` configuration for `ChatCompletion` and `Embedding` services.
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
namespace SqlDatabaseVectorSearch.Models;
|
||||
|
||||
public record class Document(Guid Id, string Name, DateTimeOffset CreationDate, int ChunkCount);
|
||||
@@ -1,3 +1,3 @@
|
||||
namespace SqlDatabaseVectorSearch.Models;
|
||||
|
||||
public record Question(Guid ConversationId, string Text) : Search(Text);
|
||||
public record class Question(Guid ConversationId, string Text) : Search(Text);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace SqlDatabaseVectorSearch.Models;
|
||||
|
||||
public record Search(string Text);
|
||||
public record class Search(string Text);
|
||||
|
||||
|
||||
@@ -63,7 +63,19 @@ if (app.Environment.IsDevelopment())
|
||||
});
|
||||
}
|
||||
|
||||
var documentsApiGroup = app.MapGroup("/api/documents");
|
||||
var documentsApiGroup = app.MapGroup("/api/documents").WithTags("Documents");
|
||||
|
||||
documentsApiGroup.MapGet(string.Empty, async (VectorSearchService vectorSearchService) =>
|
||||
{
|
||||
var documents = await vectorSearchService.GetDocumentsAsync();
|
||||
return TypedResults.Ok(documents);
|
||||
})
|
||||
.WithOpenApi(operation =>
|
||||
{
|
||||
operation.Summary = "Gets the list of documents";
|
||||
|
||||
return operation;
|
||||
});
|
||||
|
||||
documentsApiGroup.MapPost(string.Empty, async (IFormFile file, VectorSearchService vectorSearchService, LinkGenerator linkGenerator, Guid? documentId = null) =>
|
||||
{
|
||||
@@ -79,8 +91,7 @@ documentsApiGroup.MapPost(string.Empty, async (IFormFile file, VectorSearchServi
|
||||
operation.Parameter("documentId").Description = "The unique identifier of the document. If not provided, a new one will be generated. If you specify an existing documentId, the document will be overridden.";
|
||||
|
||||
return operation;
|
||||
})
|
||||
;
|
||||
});
|
||||
|
||||
documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, VectorSearchService vectorSearchService) =>
|
||||
{
|
||||
@@ -90,27 +101,11 @@ documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, VectorS
|
||||
.WithOpenApi(operation =>
|
||||
{
|
||||
operation.Summary = "Deletes a document";
|
||||
operation.Description = "This endpoint deletes the documents and all its chunks from SQL Server";
|
||||
operation.Description = "This endpoint deletes the document and all its chunks from SQL Server";
|
||||
|
||||
return operation;
|
||||
});
|
||||
|
||||
//app.MapPost("/api/search", async (Search search, ApplicationMemoryService memory, double minimumRelevance = 0, string? index = null) =>
|
||||
//{
|
||||
// var response = await memory.SearchAsync(search, minimumRelevance, index);
|
||||
// return TypedResults.Ok(response);
|
||||
//})
|
||||
//.WithOpenApi(operation =>
|
||||
//{
|
||||
// operation.Summary = "Search into Kernel Memory";
|
||||
// operation.Description = "Search into Kernel Memory using the provided question and optional tags. If tags are provided, they will be used as filters with OR logic.";
|
||||
|
||||
// operation.Parameter("minimumRelevance").Description = "The minimum Cosine Similarity required.";
|
||||
// operation.Parameter("index").Description = "The index in which to search for documents. If not provided, the default index will be used ('default').";
|
||||
|
||||
// return operation;
|
||||
//});
|
||||
|
||||
app.MapPost("/api/ask", async (Question question, VectorSearchService vectorSearchService, bool reformulate = true) =>
|
||||
{
|
||||
var response = await vectorSearchService.AskQuestionAsync(question, reformulate);
|
||||
@@ -124,6 +119,7 @@ app.MapPost("/api/ask", async (Question question, VectorSearchService vectorSear
|
||||
operation.Parameter("reformulate").Description = "If true, the question will be reformulated taking into account the context of the chat identified by the given ConversationId.";
|
||||
|
||||
return operation;
|
||||
});
|
||||
})
|
||||
.WithTags("Ask");
|
||||
|
||||
app.Run();
|
||||
@@ -45,6 +45,7 @@ public class ChatService(IMemoryCache cache, IChatCompletionService chatCompleti
|
||||
|
||||
""");
|
||||
|
||||
// TODO: Ensure that the chunks are not too long, according to the model max token.
|
||||
foreach (var result in chunks.Select(c => c.Content))
|
||||
{
|
||||
prompt.AppendLine(result);
|
||||
@@ -75,7 +76,6 @@ public class ChatService(IMemoryCache cache, IChatCompletionService chatCompleti
|
||||
}
|
||||
|
||||
cache.Set(conversationId, chat, appSettingsOptions.Value.MessageExpiration);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,10 +3,10 @@ using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.SemanticKernel.Embeddings;
|
||||
using Microsoft.SemanticKernel.Text;
|
||||
using SqlDatabaseVectorSearch.DataAccessLayer;
|
||||
using SqlDatabaseVectorSearch.DataAccessLayer.Entities;
|
||||
using SqlDatabaseVectorSearch.Models;
|
||||
using UglyToad.PdfPig;
|
||||
using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
|
||||
using Entities = SqlDatabaseVectorSearch.DataAccessLayer.Entities;
|
||||
|
||||
namespace SqlDatabaseVectorSearch.Services;
|
||||
|
||||
@@ -24,11 +24,11 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
||||
}
|
||||
else
|
||||
{
|
||||
// Creates a new document.
|
||||
// Create a new document.
|
||||
documentId = Guid.NewGuid();
|
||||
}
|
||||
|
||||
var document = new Document { Id = documentId.Value, Name = name, CreationDate = DateTimeOffset.UtcNow };
|
||||
var document = new Entities.Document { Id = documentId.Value, Name = name, CreationDate = DateTimeOffset.UtcNow };
|
||||
dbContext.Documents.Add(document);
|
||||
|
||||
// Split the content into chunks of at most 1024 tokens and generate the embeddings for each one.
|
||||
@@ -37,7 +37,7 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
||||
|
||||
foreach (var (paragraph, embedding) in paragraphs.Zip(embeddings, (p, e) => (p, e.ToArray())))
|
||||
{
|
||||
var documentChunk = new DocumentChunk { DocumentId = documentId.Value, Content = paragraph, Embedding = embedding };
|
||||
var documentChunk = new Entities.DocumentChunk { DocumentId = documentId.Value, Content = paragraph, Embedding = embedding };
|
||||
dbContext.DocumentChunks.Add(documentChunk);
|
||||
}
|
||||
|
||||
@@ -45,17 +45,18 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
||||
return documentId.Value;
|
||||
}
|
||||
|
||||
public async Task DeleteDocumentAsync(Guid documentId)
|
||||
public async Task<IEnumerable<Document>> GetDocumentsAsync()
|
||||
{
|
||||
var document = await dbContext.Documents.Include(d => d.DocumentChunks).FirstOrDefaultAsync(d => d.Id == documentId);
|
||||
if (document is null)
|
||||
{
|
||||
return;
|
||||
var documents = await dbContext.Documents.OrderBy(d => d.Name).AsNoTracking()
|
||||
.Select(d => new Document(d.Id, d.Name, d.CreationDate, d.DocumentChunks.Count))
|
||||
.ToListAsync();
|
||||
|
||||
return documents;
|
||||
}
|
||||
|
||||
dbContext.DocumentChunks.RemoveRange(document.DocumentChunks);
|
||||
dbContext.Documents.Remove(document);
|
||||
|
||||
public async Task DeleteDocumentAsync(Guid documentId)
|
||||
{
|
||||
await DeleteDocumentInternalAsync(documentId);
|
||||
await dbContext.SaveChangesAsync();
|
||||
}
|
||||
|
||||
@@ -69,13 +70,6 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
||||
|
||||
var chunks = await dbContext.DocumentChunks
|
||||
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray()))
|
||||
//.Select(c => new
|
||||
//{
|
||||
// c.Id,
|
||||
// c.DocumentId,
|
||||
// c.Content,
|
||||
// Distance = EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray())
|
||||
//})
|
||||
.Take(5)
|
||||
.ToListAsync();
|
||||
|
||||
@@ -83,18 +77,6 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
||||
return new Response(reformulatedQuestion, answer);
|
||||
}
|
||||
|
||||
//public async Task<SearchResult?> SearchAsync(Search search, double minimumRelevance = 0, string? index = null)
|
||||
//{
|
||||
// // Search using the embedding search via Kernel Memory .
|
||||
// // If tags are provided, use them as filters with OR logic.
|
||||
// var searchResult = await memory.SearchAsync(search.Text.TrimEnd([' ', '?']), index, filters: search.Tags.ToMemoryFilters(), minRelevance: minimumRelevance, limit: 50);
|
||||
|
||||
// // If you want to use an AND logic, set the "filter" parameter (instead of "filters").
|
||||
// //var searchResult = await memory.SearchAsync(search.Text.TrimEnd([' ', '?']), index, filter: search.Tags.ToMemoryFilter(), minRelevance: minimumRelevance);
|
||||
|
||||
// return searchResult;
|
||||
//}
|
||||
|
||||
private static Task<string> GetContentAsync(Stream stream)
|
||||
{
|
||||
var content = new StringBuilder();
|
||||
@@ -102,7 +84,7 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
||||
// Reads the content of the PDF document using PdfPig.
|
||||
using var pdfDocument = PdfDocument.Open(stream);
|
||||
|
||||
foreach (var page in pdfDocument.GetPages().Where(x => x != null))
|
||||
foreach (var page in pdfDocument.GetPages().Where(x => x is not null))
|
||||
{
|
||||
var pageContent = ContentOrderTextExtractor.GetText(page) ?? string.Empty;
|
||||
content.AppendLine(pageContent);
|
||||
@@ -110,4 +92,16 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
||||
|
||||
return Task.FromResult(content.ToString());
|
||||
}
|
||||
|
||||
private async Task DeleteDocumentInternalAsync(Guid documentId)
|
||||
{
|
||||
var document = await dbContext.Documents.Include(d => d.DocumentChunks).FirstOrDefaultAsync(d => d.Id == documentId);
|
||||
if (document is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
dbContext.DocumentChunks.RemoveRange(document.DocumentChunks);
|
||||
dbContext.Documents.Remove(document);
|
||||
}
|
||||
}
|
||||
@@ -6,14 +6,12 @@
|
||||
"ChatCompletion": {
|
||||
"Endpoint": "",
|
||||
"Deployment": "",
|
||||
"ApiKey": "",
|
||||
"MaxTokens": 32768
|
||||
"ApiKey": ""
|
||||
},
|
||||
"Embedding": {
|
||||
"Endpoint": "",
|
||||
"Deployment": "",
|
||||
"ApiKey": "",
|
||||
"MaxTokens": 8191
|
||||
"ApiKey": ""
|
||||
}
|
||||
},
|
||||
"AppSettings": {
|
||||
|
||||
Reference in New Issue
Block a user