mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Improve docx parsing, chunk ordering, and DB config
- Add null check for Document in DocxContentDecoder to prevent exceptions. - Set DocumentChunk.Id to auto-generate in ApplicationDbContext. - Order vector search results by cosine similarity for relevance.
This commit is contained in:
@@ -14,7 +14,7 @@ public class DocxContentDecoder(IServiceProvider serviceProvider) : IContentDeco
|
|||||||
// Open a Word document for read-only access.
|
// Open a Word document for read-only access.
|
||||||
using var document = WordprocessingDocument.Open(stream, false);
|
using var document = WordprocessingDocument.Open(stream, false);
|
||||||
|
|
||||||
var body = document.MainDocumentPart?.Document.Body;
|
var body = document.MainDocumentPart?.Document?.Body;
|
||||||
var content = new StringBuilder();
|
var content = new StringBuilder();
|
||||||
|
|
||||||
foreach (var p in body?.Descendants<Paragraph>() ?? [])
|
foreach (var p in body?.Descendants<Paragraph>() ?? [])
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ public class ApplicationDbContext(DbContextOptions<ApplicationDbContext> options
|
|||||||
modelBuilder.Entity<DocumentChunk>(entity =>
|
modelBuilder.Entity<DocumentChunk>(entity =>
|
||||||
{
|
{
|
||||||
entity.ToTable("DocumentChunks");
|
entity.ToTable("DocumentChunks");
|
||||||
entity.HasKey(e => e.Id);
|
entity.HasKey(e => e.Id);
|
||||||
|
|
||||||
entity.Property(e => e.Id).ValueGeneratedOnAdd();
|
entity.Property(e => e.Id).ValueGeneratedOnAdd();
|
||||||
entity.Property(e => e.Content).IsRequired();
|
entity.Property(e => e.Content).IsRequired();
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ public partial class VectorSearchService(IServiceProvider serviceProvider, Appli
|
|||||||
var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken);
|
var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken);
|
||||||
var embeddingVector = new SqlVector<float>(questionEmbedding);
|
var embeddingVector = new SqlVector<float>(questionEmbedding);
|
||||||
|
|
||||||
var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
|
var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
|
||||||
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, embeddingVector))
|
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, embeddingVector))
|
||||||
.Take(appSettings.MaxRelevantChunks)
|
.Take(appSettings.MaxRelevantChunks)
|
||||||
.ToListAsync(cancellationToken);
|
.ToListAsync(cancellationToken);
|
||||||
|
|||||||
Reference in New Issue
Block a user