Add support for DOCX and TXT files, update error handling

Updated README.md to reflect support for PDF, DOCX, and TXT files.
Removed commented-out code in DocxContentDecoder.cs.
Added TextContentDecoder service in Program.cs and updated exception handling middleware.
Updated document upload endpoint description in Program.cs.
Modified VectorSearchService to throw NotSupportedException for unsupported content types.
Added TextContentDecoder class in TextContentDecoder.cs.
This commit is contained in:
Marco Minerva
2025-01-29 09:58:22 +01:00
parent 110e21e1e0
commit af9158873f
5 changed files with 26 additions and 41 deletions
@@ -21,41 +21,5 @@ public class DocxContentDecoder : IContentDecoder
}
return Task.FromResult(content.ToString());
//foreach (var paragraph in body!.Elements<Paragraph>())
//{
// foreach (var element in paragraph.Elements())
// {
// if (element is Run run)
// {
// DecodeTextFromRun(run);
// }
// else if (element is Hyperlink hyperlink)
// {
// foreach (var hyperlinkRun in hyperlink.Elements<Run>())
// {
// DecodeTextFromRun(hyperlinkRun);
// }
// //var hyperlinkUri = doc.MainDocumentPart.HyperlinkRelationships.FirstOrDefault(r => r.Id == hyperlink.Id)?.Uri;
// //if (hyperlinkUri is not null)
// //{
// // content.Append($" ({hyperlinkUri})");
// //}
// }
// }
// content.AppendLine(); // Preserve whitespace and blank lines.
//}
//return Task.FromResult(content.ToString());
//void DecodeTextFromRun(Run run)
//{
// foreach (var text in run.Elements<Text>())
// {
// content.Append(text.Text);
// }
//}
}
}
@@ -0,0 +1,12 @@
namespace SqlDatabaseVectorSearch.ContentDecoders;
public class TextContentDecoder : IContentDecoder
{
public async Task<string> DecodeAsync(Stream stream, string contentType)
{
using var readStream = new StreamReader(stream);
var content = await readStream.ReadToEndAsync();
return content;
}
}