Filter out empty paragraphs in PdfContentDecoder

Updated the paragraph processing to exclude empty or whitespace-only entries before creating Chunk objects, ensuring only meaningful text is included.
This commit is contained in:
Marco Minerva
2025-05-27 17:19:25 +02:00
parent fa81f01c27
commit 1e531e5ad6
@@ -28,6 +28,6 @@ public class PdfContentDecoder(IServiceProvider serviceProvider) : IContentDecod
var paragraphs = textChunker.Split(pageText);
return paragraphs.Select((text, index) => new Chunk(pdfPage.Number, index, text));
return paragraphs.Where(p => !string.IsNullOrWhiteSpace(p)).Select((text, index) => new Chunk(pdfPage.Number, index, text));
}
}