mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Improve docx parsing, chunk ordering, and DB config
- Add null check for Document in DocxContentDecoder to prevent exceptions. - Set DocumentChunk.Id to auto-generate in ApplicationDbContext. - Order vector search results by cosine similarity for relevance.
This commit is contained in:
@@ -14,7 +14,7 @@ public class DocxContentDecoder(IServiceProvider serviceProvider) : IContentDeco
|
|||||||
// Open a Word document for read-only access.
|
// Open a Word document for read-only access.
|
||||||
using var document = WordprocessingDocument.Open(stream, false);
|
using var document = WordprocessingDocument.Open(stream, false);
|
||||||
|
|
||||||
var body = document.MainDocumentPart?.Document.Body;
|
var body = document.MainDocumentPart?.Document?.Body;
|
||||||
var content = new StringBuilder();
|
var content = new StringBuilder();
|
||||||
|
|
||||||
foreach (var p in body?.Descendants<Paragraph>() ?? [])
|
foreach (var p in body?.Descendants<Paragraph>() ?? [])
|
||||||
|
|||||||
Reference in New Issue
Block a user