Enhanced document chunk handling and API

- Updated `Scripts.sql` to add a new `[Index]` column to `[dbo].[DocumentChunks]` for order tracking.
- Modified `DocumentChunk.cs` to include a new `Index` property, and introduced a new immutable record class for document chunks.
- Introduced new API endpoints in `Program.cs` for document and chunk retrieval, including embedding details, with OpenAPI documentation enhancements.
- Updated an API endpoint description in `Program.cs` to clarify document embedding handling.
- Updated `VectorSearchService.cs` to reflect schema changes in service logic, adding methods for fetching document chunks and specific embeddings.
This commit is contained in:
Marco Minerva
2024-07-10 11:25:50 +02:00
parent ad6faee370
commit 45de38d87a
5 changed files with 56 additions and 2 deletions
@@ -39,9 +39,10 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
var paragraphs = TextChunker.SplitPlainTextParagraphs(TextChunker.SplitPlainTextLines(content, appSettings.MaxTokensPerLine), appSettings.MaxTokensPerParagraph, appSettings.OverlapTokens);
var embeddings = await textEmbeddingGenerationService.GenerateEmbeddingsAsync(paragraphs);
var index = 0;
foreach (var (paragraph, embedding) in paragraphs.Zip(embeddings, (p, e) => (p, e.ToArray())))
{
var documentChunk = new Entities.DocumentChunk { DocumentId = documentId.Value, Content = paragraph, Embedding = embedding };
var documentChunk = new Entities.DocumentChunk { DocumentId = documentId.Value, Index = index++, Content = paragraph, Embedding = embedding };
dbContext.DocumentChunks.Add(documentChunk);
}
@@ -58,6 +59,24 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
return documents;
}
public async Task<IEnumerable<DocumentChunk>> GetDocumentChunksAsync(Guid documentId)
{
var documentChunks = await dbContext.DocumentChunks.Where(c => c.DocumentId == documentId).OrderBy(c => c.Index).AsNoTracking()
.Select(c => new DocumentChunk(c.Id, c.Index, c.Content, null))
.ToListAsync();
return documentChunks;
}
public async Task<DocumentChunk?> GetDocumentChunkEmbeddingAsync(Guid documentId, Guid documentChunkId)
{
var documentChunk = await dbContext.DocumentChunks.Where(c => c.Id == documentChunkId && c.DocumentId == documentId).AsNoTracking()
.Select(c => new DocumentChunk(c.Id, c.Index, c.Content, c.Embedding))
.FirstOrDefaultAsync();
return documentChunk;
}
public async Task DeleteDocumentAsync(Guid documentId, bool saveChanges = true)
{
var document = await dbContext.Documents.Include(d => d.Chunks).FirstOrDefaultAsync(d => d.Id == documentId);