mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Add support for DOCX and TXT files, update error handling
Updated README.md to reflect support for PDF, DOCX, and TXT files. Removed commented-out code in DocxContentDecoder.cs. Added TextContentDecoder service in Program.cs and updated exception handling middleware. Updated document upload endpoint description in Program.cs. Modified VectorSearchService to throw NotSupportedException for unsupported content types. Added TextContentDecoder class in TextContentDecoder.cs.
This commit is contained in:
@@ -21,41 +21,5 @@ public class DocxContentDecoder : IContentDecoder
|
||||
}
|
||||
|
||||
return Task.FromResult(content.ToString());
|
||||
|
||||
//foreach (var paragraph in body!.Elements<Paragraph>())
|
||||
//{
|
||||
// foreach (var element in paragraph.Elements())
|
||||
// {
|
||||
// if (element is Run run)
|
||||
// {
|
||||
// DecodeTextFromRun(run);
|
||||
// }
|
||||
// else if (element is Hyperlink hyperlink)
|
||||
// {
|
||||
// foreach (var hyperlinkRun in hyperlink.Elements<Run>())
|
||||
// {
|
||||
// DecodeTextFromRun(hyperlinkRun);
|
||||
// }
|
||||
|
||||
// //var hyperlinkUri = doc.MainDocumentPart.HyperlinkRelationships.FirstOrDefault(r => r.Id == hyperlink.Id)?.Uri;
|
||||
// //if (hyperlinkUri is not null)
|
||||
// //{
|
||||
// // content.Append($" ({hyperlinkUri})");
|
||||
// //}
|
||||
// }
|
||||
// }
|
||||
|
||||
// content.AppendLine(); // Preserve whitespace and blank lines.
|
||||
//}
|
||||
|
||||
//return Task.FromResult(content.ToString());
|
||||
|
||||
//void DecodeTextFromRun(Run run)
|
||||
//{
|
||||
// foreach (var text in run.Elements<Text>())
|
||||
// {
|
||||
// content.Append(text.Text);
|
||||
// }
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
namespace SqlDatabaseVectorSearch.ContentDecoders;
|
||||
|
||||
public class TextContentDecoder : IContentDecoder
|
||||
{
|
||||
public async Task<string> DecodeAsync(Stream stream, string contentType)
|
||||
{
|
||||
using var readStream = new StreamReader(stream);
|
||||
var content = await readStream.ReadToEndAsync();
|
||||
|
||||
return content;
|
||||
}
|
||||
}
|
||||
@@ -54,6 +54,7 @@ builder.Services.AddScoped<VectorSearchService>();
|
||||
|
||||
builder.Services.AddKeyedSingleton<IContentDecoder, PdfContentDecoder>(MediaTypeNames.Application.Pdf);
|
||||
builder.Services.AddKeyedSingleton<IContentDecoder, DocxContentDecoder>("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
|
||||
builder.Services.AddKeyedSingleton<IContentDecoder, TextContentDecoder>(MediaTypeNames.Text.Plain);
|
||||
|
||||
builder.Services.ConfigureHttpJsonOptions(options =>
|
||||
{
|
||||
@@ -74,7 +75,15 @@ var app = builder.Build();
|
||||
// Configure the HTTP request pipeline.
|
||||
app.UseHttpsRedirection();
|
||||
|
||||
app.UseExceptionHandler();
|
||||
app.UseExceptionHandler(new ExceptionHandlerOptions
|
||||
{
|
||||
StatusCodeSelector = exception => exception switch
|
||||
{
|
||||
NotSupportedException => StatusCodes.Status501NotImplemented,
|
||||
_ => StatusCodes.Status500InternalServerError
|
||||
}
|
||||
});
|
||||
|
||||
app.UseStatusCodePages();
|
||||
|
||||
app.MapOpenApi();
|
||||
@@ -125,7 +134,7 @@ documentsApiGroup.MapPost(string.Empty, async (IFormFile file, VectorSearchServi
|
||||
.DisableAntiforgery()
|
||||
.ProducesProblem(StatusCodes.Status400BadRequest)
|
||||
.WithSummary("Uploads a document")
|
||||
.WithDescription("Uploads a document to SQL Database and saves its embedding using the native VECTOR type. The document will be indexed and used to answer questions. Currently, only PDF files are supported.");
|
||||
.WithDescription("Uploads a document to SQL Database and saves its embedding using the native VECTOR type. The document will be indexed and used to answer questions. Currently, PDF, DOCX and TXT files are supported.");
|
||||
|
||||
documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, VectorSearchService vectorSearchService) =>
|
||||
{
|
||||
|
||||
@@ -18,7 +18,7 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
|
||||
public async Task<Guid> ImportAsync(Stream stream, string name, string contentType, Guid? documentId)
|
||||
{
|
||||
// Extract the contents of the file.
|
||||
var decoder = serviceProvider.GetRequiredKeyedService<IContentDecoder>(contentType);
|
||||
var decoder = serviceProvider.GetKeyedService<IContentDecoder>(contentType) ?? throw new NotSupportedException($"Content type '{contentType}' is not supported.");
|
||||
var content = await decoder.DecodeAsync(stream, contentType);
|
||||
|
||||
await dbContext.Database.BeginTransactionAsync();
|
||||
|
||||
Reference in New Issue
Block a user