Filter out empty paragraphs in PdfContentDecoder

Updated the paragraph processing to exclude empty or whitespace-only entries before creating Chunk objects, ensuring only meaningful text is included.
2026-06-20 12:23:10 +00:00 · 2025-05-27 17:19:25 +02:00
parent fa81f01c27
commit 1e531e5ad6
1 changed files with 1 additions and 1 deletions
@@ -28,6 +28,6 @@ public class PdfContentDecoder(IServiceProvider serviceProvider) : IContentDecod

        var paragraphs = textChunker.Split(pageText);

-        return paragraphs.Select((text, index) => new Chunk(pdfPage.Number, index, text));
+        return paragraphs.Where(p => !string.IsNullOrWhiteSpace(p)).Select((text, index) => new Chunk(pdfPage.Number, index, text));
    }
 }