Improve docx parsing, chunk ordering, and DB config

- Add null check for Document in DocxContentDecoder to prevent exceptions.
- Set DocumentChunk.Id to auto-generate in ApplicationDbContext.
- Order vector search results by cosine similarity for relevance.
This commit is contained in:
Marco Minerva
2026-01-13 14:43:14 +01:00
parent 1ae1db2628
commit 04d777c9d5
3 changed files with 3 additions and 3 deletions
@@ -14,7 +14,7 @@ public class DocxContentDecoder(IServiceProvider serviceProvider) : IContentDeco
// Open a Word document for read-only access.
using var document = WordprocessingDocument.Open(stream, false);
var body = document.MainDocumentPart?.Document.Body;
var body = document.MainDocumentPart?.Document?.Body;
var content = new StringBuilder();
foreach (var p in body?.Descendants<Paragraph>() ?? [])
@@ -34,7 +34,7 @@ public class ApplicationDbContext(DbContextOptions<ApplicationDbContext> options
modelBuilder.Entity<DocumentChunk>(entity =>
{
entity.ToTable("DocumentChunks");
entity.HasKey(e => e.Id);
entity.HasKey(e => e.Id);
entity.Property(e => e.Id).ValueGeneratedOnAdd();
entity.Property(e => e.Content).IsRequired();
@@ -152,7 +152,7 @@ public partial class VectorSearchService(IServiceProvider serviceProvider, Appli
var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken);
var embeddingVector = new SqlVector<float>(questionEmbedding);
var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, embeddingVector))
.Take(appSettings.MaxRelevantChunks)
.ToListAsync(cancellationToken);