mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Improve docx parsing, chunk ordering, and DB config
- Add null check for Document in DocxContentDecoder to prevent exceptions. - Set DocumentChunk.Id to auto-generate in ApplicationDbContext. - Order vector search results by cosine similarity for relevance.
This commit is contained in:
@@ -14,7 +14,7 @@ public class DocxContentDecoder(IServiceProvider serviceProvider) : IContentDeco
|
||||
// Open a Word document for read-only access.
|
||||
using var document = WordprocessingDocument.Open(stream, false);
|
||||
|
||||
var body = document.MainDocumentPart?.Document.Body;
|
||||
var body = document.MainDocumentPart?.Document?.Body;
|
||||
var content = new StringBuilder();
|
||||
|
||||
foreach (var p in body?.Descendants<Paragraph>() ?? [])
|
||||
|
||||
@@ -34,7 +34,7 @@ public class ApplicationDbContext(DbContextOptions<ApplicationDbContext> options
|
||||
modelBuilder.Entity<DocumentChunk>(entity =>
|
||||
{
|
||||
entity.ToTable("DocumentChunks");
|
||||
entity.HasKey(e => e.Id);
|
||||
entity.HasKey(e => e.Id);
|
||||
|
||||
entity.Property(e => e.Id).ValueGeneratedOnAdd();
|
||||
entity.Property(e => e.Content).IsRequired();
|
||||
|
||||
@@ -152,7 +152,7 @@ public partial class VectorSearchService(IServiceProvider serviceProvider, Appli
|
||||
var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken);
|
||||
var embeddingVector = new SqlVector<float>(questionEmbedding);
|
||||
|
||||
var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
|
||||
var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
|
||||
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, embeddingVector))
|
||||
.Take(appSettings.MaxRelevantChunks)
|
||||
.ToListAsync(cancellationToken);
|
||||
|
||||
Reference in New Issue
Block a user