mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Filter out empty paragraphs in PdfContentDecoder
Updated the paragraph processing to exclude empty or whitespace-only entries before creating Chunk objects, ensuring only meaningful text is included.
This commit is contained in:
@@ -28,6 +28,6 @@ public class PdfContentDecoder(IServiceProvider serviceProvider) : IContentDecod
|
||||
|
||||
var paragraphs = textChunker.Split(pageText);
|
||||
|
||||
return paragraphs.Select((text, index) => new Chunk(pdfPage.Number, index, text));
|
||||
return paragraphs.Where(p => !string.IsNullOrWhiteSpace(p)).Select((text, index) => new Chunk(pdfPage.Number, index, text));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user