From 04d777c9d5adfd6f321b5a7891a691bd44a69b04 Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Tue, 13 Jan 2026 14:43:14 +0100 Subject: [PATCH] Improve docx parsing, chunk ordering, and DB config - Add null check for Document in DocxContentDecoder to prevent exceptions. - Set DocumentChunk.Id to auto-generate in ApplicationDbContext. - Order vector search results by cosine similarity for relevance. --- SqlDatabaseVectorSearch/ContentDecoders/DocxContentDecoder.cs | 2 +- SqlDatabaseVectorSearch/Data/ApplicationDbContext.cs | 2 +- SqlDatabaseVectorSearch/Services/VectorSearchService.cs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/SqlDatabaseVectorSearch/ContentDecoders/DocxContentDecoder.cs b/SqlDatabaseVectorSearch/ContentDecoders/DocxContentDecoder.cs index 00579c4..7b0bac1 100644 --- a/SqlDatabaseVectorSearch/ContentDecoders/DocxContentDecoder.cs +++ b/SqlDatabaseVectorSearch/ContentDecoders/DocxContentDecoder.cs @@ -14,7 +14,7 @@ public class DocxContentDecoder(IServiceProvider serviceProvider) : IContentDeco // Open a Word document for read-only access. using var document = WordprocessingDocument.Open(stream, false); - var body = document.MainDocumentPart?.Document.Body; + var body = document.MainDocumentPart?.Document?.Body; var content = new StringBuilder(); foreach (var p in body?.Descendants() ?? []) diff --git a/SqlDatabaseVectorSearch/Data/ApplicationDbContext.cs b/SqlDatabaseVectorSearch/Data/ApplicationDbContext.cs index e9e41a3..a595973 100644 --- a/SqlDatabaseVectorSearch/Data/ApplicationDbContext.cs +++ b/SqlDatabaseVectorSearch/Data/ApplicationDbContext.cs @@ -34,7 +34,7 @@ public class ApplicationDbContext(DbContextOptions options modelBuilder.Entity(entity => { entity.ToTable("DocumentChunks"); - entity.HasKey(e => e.Id); + entity.HasKey(e => e.Id); entity.Property(e => e.Id).ValueGeneratedOnAdd(); entity.Property(e => e.Content).IsRequired(); diff --git a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs index 113af4b..f18d39e 100644 --- a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs +++ b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs @@ -152,7 +152,7 @@ public partial class VectorSearchService(IServiceProvider serviceProvider, Appli var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken); var embeddingVector = new SqlVector(questionEmbedding); - var chunks = await dbContext.DocumentChunks.Include(c => c.Document) + var chunks = await dbContext.DocumentChunks.Include(c => c.Document) .OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, embeddingVector)) .Take(appSettings.MaxRelevantChunks) .ToListAsync(cancellationToken);