mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Enhanced document chunk handling and API
- Updated `Scripts.sql` to add a new `[Index]` column to `[dbo].[DocumentChunks]` for order tracking. - Modified `DocumentChunk.cs` to include a new `Index` property, and introduced a new immutable record class for document chunks. - Introduced new API endpoints in `Program.cs` for document and chunk retrieval, including embedding details, with OpenAPI documentation enhancements. - Updated an API endpoint description in `Program.cs` to clarify document embedding handling. - Updated `VectorSearchService.cs` to reflect schema changes in service logic, adding methods for fetching document chunks and specific embeddings.
This commit is contained in:
@@ -39,9 +39,10 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
||||
var paragraphs = TextChunker.SplitPlainTextParagraphs(TextChunker.SplitPlainTextLines(content, appSettings.MaxTokensPerLine), appSettings.MaxTokensPerParagraph, appSettings.OverlapTokens);
|
||||
var embeddings = await textEmbeddingGenerationService.GenerateEmbeddingsAsync(paragraphs);
|
||||
|
||||
var index = 0;
|
||||
foreach (var (paragraph, embedding) in paragraphs.Zip(embeddings, (p, e) => (p, e.ToArray())))
|
||||
{
|
||||
var documentChunk = new Entities.DocumentChunk { DocumentId = documentId.Value, Content = paragraph, Embedding = embedding };
|
||||
var documentChunk = new Entities.DocumentChunk { DocumentId = documentId.Value, Index = index++, Content = paragraph, Embedding = embedding };
|
||||
dbContext.DocumentChunks.Add(documentChunk);
|
||||
}
|
||||
|
||||
@@ -58,6 +59,24 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
||||
return documents;
|
||||
}
|
||||
|
||||
public async Task<IEnumerable<DocumentChunk>> GetDocumentChunksAsync(Guid documentId)
|
||||
{
|
||||
var documentChunks = await dbContext.DocumentChunks.Where(c => c.DocumentId == documentId).OrderBy(c => c.Index).AsNoTracking()
|
||||
.Select(c => new DocumentChunk(c.Id, c.Index, c.Content, null))
|
||||
.ToListAsync();
|
||||
|
||||
return documentChunks;
|
||||
}
|
||||
|
||||
public async Task<DocumentChunk?> GetDocumentChunkEmbeddingAsync(Guid documentId, Guid documentChunkId)
|
||||
{
|
||||
var documentChunk = await dbContext.DocumentChunks.Where(c => c.Id == documentChunkId && c.DocumentId == documentId).AsNoTracking()
|
||||
.Select(c => new DocumentChunk(c.Id, c.Index, c.Content, c.Embedding))
|
||||
.FirstOrDefaultAsync();
|
||||
|
||||
return documentChunk;
|
||||
}
|
||||
|
||||
public async Task DeleteDocumentAsync(Guid documentId, bool saveChanges = true)
|
||||
{
|
||||
var document = await dbContext.Documents.Include(d => d.Chunks).FirstOrDefaultAsync(d => d.Id == documentId);
|
||||
|
||||
Reference in New Issue
Block a user