Refactor code and enhance API documentation

- Converted `Question.cs` and `Search.cs` records to `record class` syntax for clarity.
- Organized API endpoints with tags and added new GET and DELETE endpoints in `Program.cs`, including OpenAPI documentation improvements.
- Removed commented-out code in `Program.cs` for cleaner codebase.
- Introduced `WithTags` for better API operation categorization in Swagger UI.
- Added a TODO comment in `ChatService.cs` for future improvement on chunk length check.
- Clarified `using` directives in `VectorSearchService.cs` with namespace aliasing to improve readability.
- Refactored document deletion in `VectorSearchService.cs` to use a private helper method and expanded service capabilities with a new `GetDocumentsAsync` method.
- Introduced a new `Document` model in the `Models` namespace to support document fetching functionality.
- Simplified `appsettings.json` by removing `MaxTokens` configuration for `ChatCompletion` and `Embedding` services.
This commit is contained in:
Marco Minerva
2024-06-14 17:20:21 +02:00
parent db4646330f
commit b6c898a3f5
7 changed files with 52 additions and 61 deletions
@@ -0,0 +1,3 @@
namespace SqlDatabaseVectorSearch.Models;
public record class Document(Guid Id, string Name, DateTimeOffset CreationDate, int ChunkCount);
+1 -1
View File
@@ -1,3 +1,3 @@
namespace SqlDatabaseVectorSearch.Models;
public record Question(Guid ConversationId, string Text) : Search(Text);
public record class Question(Guid ConversationId, string Text) : Search(Text);
+1 -1
View File
@@ -1,4 +1,4 @@
namespace SqlDatabaseVectorSearch.Models;
public record Search(string Text);
public record class Search(string Text);
+17 -21
View File
@@ -63,7 +63,19 @@ if (app.Environment.IsDevelopment())
});
}
var documentsApiGroup = app.MapGroup("/api/documents");
var documentsApiGroup = app.MapGroup("/api/documents").WithTags("Documents");
documentsApiGroup.MapGet(string.Empty, async (VectorSearchService vectorSearchService) =>
{
var documents = await vectorSearchService.GetDocumentsAsync();
return TypedResults.Ok(documents);
})
.WithOpenApi(operation =>
{
operation.Summary = "Gets the list of documents";
return operation;
});
documentsApiGroup.MapPost(string.Empty, async (IFormFile file, VectorSearchService vectorSearchService, LinkGenerator linkGenerator, Guid? documentId = null) =>
{
@@ -79,8 +91,7 @@ documentsApiGroup.MapPost(string.Empty, async (IFormFile file, VectorSearchServi
operation.Parameter("documentId").Description = "The unique identifier of the document. If not provided, a new one will be generated. If you specify an existing documentId, the document will be overridden.";
return operation;
})
;
});
documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, VectorSearchService vectorSearchService) =>
{
@@ -90,27 +101,11 @@ documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, VectorS
.WithOpenApi(operation =>
{
operation.Summary = "Deletes a document";
operation.Description = "This endpoint deletes the documents and all its chunks from SQL Server";
operation.Description = "This endpoint deletes the document and all its chunks from SQL Server";
return operation;
});
//app.MapPost("/api/search", async (Search search, ApplicationMemoryService memory, double minimumRelevance = 0, string? index = null) =>
//{
// var response = await memory.SearchAsync(search, minimumRelevance, index);
// return TypedResults.Ok(response);
//})
//.WithOpenApi(operation =>
//{
// operation.Summary = "Search into Kernel Memory";
// operation.Description = "Search into Kernel Memory using the provided question and optional tags. If tags are provided, they will be used as filters with OR logic.";
// operation.Parameter("minimumRelevance").Description = "The minimum Cosine Similarity required.";
// operation.Parameter("index").Description = "The index in which to search for documents. If not provided, the default index will be used ('default').";
// return operation;
//});
app.MapPost("/api/ask", async (Question question, VectorSearchService vectorSearchService, bool reformulate = true) =>
{
var response = await vectorSearchService.AskQuestionAsync(question, reformulate);
@@ -124,6 +119,7 @@ app.MapPost("/api/ask", async (Question question, VectorSearchService vectorSear
operation.Parameter("reformulate").Description = "If true, the question will be reformulated taking into account the context of the chat identified by the given ConversationId.";
return operation;
});
})
.WithTags("Ask");
app.Run();
@@ -45,6 +45,7 @@ public class ChatService(IMemoryCache cache, IChatCompletionService chatCompleti
""");
// TODO: Ensure that the chunks are not too long, according to the model max token.
foreach (var result in chunks.Select(c => c.Content))
{
prompt.AppendLine(result);
@@ -75,7 +76,6 @@ public class ChatService(IMemoryCache cache, IChatCompletionService chatCompleti
}
cache.Set(conversationId, chat, appSettingsOptions.Value.MessageExpiration);
return Task.CompletedTask;
}
}
@@ -3,10 +3,10 @@ using Microsoft.EntityFrameworkCore;
using Microsoft.SemanticKernel.Embeddings;
using Microsoft.SemanticKernel.Text;
using SqlDatabaseVectorSearch.DataAccessLayer;
using SqlDatabaseVectorSearch.DataAccessLayer.Entities;
using SqlDatabaseVectorSearch.Models;
using UglyToad.PdfPig;
using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
using Entities = SqlDatabaseVectorSearch.DataAccessLayer.Entities;
namespace SqlDatabaseVectorSearch.Services;
@@ -24,11 +24,11 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
}
else
{
// Creates a new document.
// Create a new document.
documentId = Guid.NewGuid();
}
var document = new Document { Id = documentId.Value, Name = name, CreationDate = DateTimeOffset.UtcNow };
var document = new Entities.Document { Id = documentId.Value, Name = name, CreationDate = DateTimeOffset.UtcNow };
dbContext.Documents.Add(document);
// Split the content into chunks of at most 1024 tokens and generate the embeddings for each one.
@@ -37,7 +37,7 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
foreach (var (paragraph, embedding) in paragraphs.Zip(embeddings, (p, e) => (p, e.ToArray())))
{
var documentChunk = new DocumentChunk { DocumentId = documentId.Value, Content = paragraph, Embedding = embedding };
var documentChunk = new Entities.DocumentChunk { DocumentId = documentId.Value, Content = paragraph, Embedding = embedding };
dbContext.DocumentChunks.Add(documentChunk);
}
@@ -45,17 +45,18 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
return documentId.Value;
}
public async Task DeleteDocumentAsync(Guid documentId)
public async Task<IEnumerable<Document>> GetDocumentsAsync()
{
var document = await dbContext.Documents.Include(d => d.DocumentChunks).FirstOrDefaultAsync(d => d.Id == documentId);
if (document is null)
{
return;
var documents = await dbContext.Documents.OrderBy(d => d.Name).AsNoTracking()
.Select(d => new Document(d.Id, d.Name, d.CreationDate, d.DocumentChunks.Count))
.ToListAsync();
return documents;
}
dbContext.DocumentChunks.RemoveRange(document.DocumentChunks);
dbContext.Documents.Remove(document);
public async Task DeleteDocumentAsync(Guid documentId)
{
await DeleteDocumentInternalAsync(documentId);
await dbContext.SaveChangesAsync();
}
@@ -69,13 +70,6 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
var chunks = await dbContext.DocumentChunks
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray()))
//.Select(c => new
//{
// c.Id,
// c.DocumentId,
// c.Content,
// Distance = EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray())
//})
.Take(5)
.ToListAsync();
@@ -83,18 +77,6 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
return new Response(reformulatedQuestion, answer);
}
//public async Task<SearchResult?> SearchAsync(Search search, double minimumRelevance = 0, string? index = null)
//{
// // Search using the embedding search via Kernel Memory .
// // If tags are provided, use them as filters with OR logic.
// var searchResult = await memory.SearchAsync(search.Text.TrimEnd([' ', '?']), index, filters: search.Tags.ToMemoryFilters(), minRelevance: minimumRelevance, limit: 50);
// // If you want to use an AND logic, set the "filter" parameter (instead of "filters").
// //var searchResult = await memory.SearchAsync(search.Text.TrimEnd([' ', '?']), index, filter: search.Tags.ToMemoryFilter(), minRelevance: minimumRelevance);
// return searchResult;
//}
private static Task<string> GetContentAsync(Stream stream)
{
var content = new StringBuilder();
@@ -102,7 +84,7 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
// Reads the content of the PDF document using PdfPig.
using var pdfDocument = PdfDocument.Open(stream);
foreach (var page in pdfDocument.GetPages().Where(x => x != null))
foreach (var page in pdfDocument.GetPages().Where(x => x is not null))
{
var pageContent = ContentOrderTextExtractor.GetText(page) ?? string.Empty;
content.AppendLine(pageContent);
@@ -110,4 +92,16 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
return Task.FromResult(content.ToString());
}
private async Task DeleteDocumentInternalAsync(Guid documentId)
{
var document = await dbContext.Documents.Include(d => d.DocumentChunks).FirstOrDefaultAsync(d => d.Id == documentId);
if (document is null)
{
return;
}
dbContext.DocumentChunks.RemoveRange(document.DocumentChunks);
dbContext.Documents.Remove(document);
}
}
+2 -4
View File
@@ -6,14 +6,12 @@
"ChatCompletion": {
"Endpoint": "",
"Deployment": "",
"ApiKey": "",
"MaxTokens": 32768
"ApiKey": ""
},
"Embedding": {
"Endpoint": "",
"Deployment": "",
"ApiKey": "",
"MaxTokens": 8191
"ApiKey": ""
}
},
"AppSettings": {