mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Merge pull request #4 from marcominerva/streaming
Add support for response streaming
This commit is contained in:
Binary file not shown.
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 70 KiB |
@@ -1,3 +1,4 @@
|
|||||||
namespace SqlDatabaseVectorSearch.Models;
|
namespace SqlDatabaseVectorSearch.Models;
|
||||||
|
|
||||||
public record class Response(string Question, string Answer);
|
// Question and Asnwer can be null when using response streaming.
|
||||||
|
public record class Response(string? Question, string? Answer, StreamState? StreamState = null);
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
namespace SqlDatabaseVectorSearch.Models;
|
||||||
|
|
||||||
|
public enum StreamState
|
||||||
|
{
|
||||||
|
Start,
|
||||||
|
Append,
|
||||||
|
End
|
||||||
|
}
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
using System.ComponentModel;
|
using System.ComponentModel;
|
||||||
|
using System.Text.Json.Serialization;
|
||||||
using Microsoft.AspNetCore.Http.HttpResults;
|
using Microsoft.AspNetCore.Http.HttpResults;
|
||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
using Microsoft.SemanticKernel;
|
using Microsoft.SemanticKernel;
|
||||||
@@ -49,6 +50,11 @@ builder.Services.AddSingleton<TokenizerService>();
|
|||||||
builder.Services.AddSingleton<ChatService>();
|
builder.Services.AddSingleton<ChatService>();
|
||||||
builder.Services.AddScoped<VectorSearchService>();
|
builder.Services.AddScoped<VectorSearchService>();
|
||||||
|
|
||||||
|
builder.Services.ConfigureHttpJsonOptions(options =>
|
||||||
|
{
|
||||||
|
options.SerializerOptions.Converters.Add(new JsonStringEnumConverter());
|
||||||
|
});
|
||||||
|
|
||||||
builder.Services.AddOpenApi(options =>
|
builder.Services.AddOpenApi(options =>
|
||||||
{
|
{
|
||||||
options.RemoveServerList();
|
options.RemoveServerList();
|
||||||
@@ -114,7 +120,7 @@ documentsApiGroup.MapPost(string.Empty, async (IFormFile file, VectorSearchServi
|
|||||||
.DisableAntiforgery()
|
.DisableAntiforgery()
|
||||||
.ProducesProblem(StatusCodes.Status400BadRequest)
|
.ProducesProblem(StatusCodes.Status400BadRequest)
|
||||||
.WithSummary("Uploads a document")
|
.WithSummary("Uploads a document")
|
||||||
.WithDescription("Uploads a document to SQL Database and saves its embedding using the new native Vector type. The document will be indexed and used to answer questions. Currently, only PDF files are supported.");
|
.WithDescription("Uploads a document to SQL Database and saves its embedding using the native VECTOR type. The document will be indexed and used to answer questions. Currently, only PDF files are supported.");
|
||||||
|
|
||||||
documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, VectorSearchService vectorSearchService) =>
|
documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, VectorSearchService vectorSearchService) =>
|
||||||
{
|
{
|
||||||
@@ -134,4 +140,24 @@ app.MapPost("/api/ask", async (Question question, VectorSearchService vectorSear
|
|||||||
.WithDescription("The question will be reformulated taking into account the context of the chat identified by the given ConversationId.")
|
.WithDescription("The question will be reformulated taking into account the context of the chat identified by the given ConversationId.")
|
||||||
.WithTags("Ask");
|
.WithTags("Ask");
|
||||||
|
|
||||||
|
app.MapPost("/api/ask-streaming", (Question question, VectorSearchService vectorSearchService,
|
||||||
|
[Description("If true, the question will be reformulated taking into account the context of the chat identified by the given ConversationId.")] bool reformulate = true) =>
|
||||||
|
{
|
||||||
|
async IAsyncEnumerable<Response> Stream()
|
||||||
|
{
|
||||||
|
// Requests a streaming response.
|
||||||
|
var responseStream = vectorSearchService.AskStreamingAsync(question, reformulate);
|
||||||
|
|
||||||
|
await foreach (var delta in responseStream)
|
||||||
|
{
|
||||||
|
yield return delta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Stream();
|
||||||
|
})
|
||||||
|
.WithSummary("Asks a question and gets the response as streaming")
|
||||||
|
.WithDescription("The question will be reformulated taking into account the context of the chat identified by the given ConversationId.")
|
||||||
|
.WithTags("Ask");
|
||||||
|
|
||||||
app.Run();
|
app.Run();
|
||||||
@@ -35,6 +35,42 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
|
|||||||
}
|
}
|
||||||
|
|
||||||
public async Task<string> AskQuestionAsync(Guid conversationId, IEnumerable<string> chunks, string question)
|
public async Task<string> AskQuestionAsync(Guid conversationId, IEnumerable<string> chunks, string question)
|
||||||
|
{
|
||||||
|
var chat = CreateChatAsync(chunks, question);
|
||||||
|
|
||||||
|
var answer = await chatCompletionService.GetChatMessageContentAsync(chat, new AzureOpenAIPromptExecutionSettings
|
||||||
|
{
|
||||||
|
MaxTokens = appSettings.MaxOutputTokens
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add question and answer to the chat history.
|
||||||
|
await SetChatHistoryAsync(conversationId, question, answer.Content!);
|
||||||
|
|
||||||
|
return answer.Content!;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async IAsyncEnumerable<string> AskStreamingAsync(Guid conversationId, IEnumerable<string> chunks, string question)
|
||||||
|
{
|
||||||
|
var chat = CreateChatAsync(chunks, question);
|
||||||
|
|
||||||
|
var answer = new StringBuilder();
|
||||||
|
await foreach (var token in chatCompletionService.GetStreamingChatMessageContentsAsync(chat, new AzureOpenAIPromptExecutionSettings
|
||||||
|
{
|
||||||
|
MaxTokens = appSettings.MaxOutputTokens
|
||||||
|
}))
|
||||||
|
{
|
||||||
|
if (!string.IsNullOrEmpty(token.Content))
|
||||||
|
{
|
||||||
|
yield return token.Content;
|
||||||
|
answer.Append(token.Content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add question and answer to the chat history.
|
||||||
|
await SetChatHistoryAsync(conversationId, question, answer.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
private ChatHistory CreateChatAsync(IEnumerable<string> chunks, string question)
|
||||||
{
|
{
|
||||||
var chat = new ChatHistory("""
|
var chat = new ChatHistory("""
|
||||||
You can use only the information provided in this chat to answer questions. If you don't know the answer, reply suggesting to refine the question.
|
You can use only the information provided in this chat to answer questions. If you don't know the answer, reply suggesting to refine the question.
|
||||||
@@ -79,16 +115,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
|
|||||||
}
|
}
|
||||||
|
|
||||||
chat.AddUserMessage(prompt.ToString());
|
chat.AddUserMessage(prompt.ToString());
|
||||||
|
return chat;
|
||||||
var answer = await chatCompletionService.GetChatMessageContentAsync(chat, new AzureOpenAIPromptExecutionSettings
|
|
||||||
{
|
|
||||||
MaxTokens = appSettings.MaxOutputTokens
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add question and answer to the chat history.
|
|
||||||
await SetChatHistoryAsync(conversationId, question, answer.Content!);
|
|
||||||
|
|
||||||
return answer.Content!;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private async Task UpdateCacheAsync(Guid conversationId, ChatHistory chat)
|
private async Task UpdateCacheAsync(Guid conversationId, ChatHistory chat)
|
||||||
|
|||||||
@@ -81,6 +81,33 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
|||||||
=> dbContext.Documents.Where(d => d.Id == documentId).ExecuteDeleteAsync();
|
=> dbContext.Documents.Where(d => d.Id == documentId).ExecuteDeleteAsync();
|
||||||
|
|
||||||
public async Task<Response> AskQuestionAsync(Question question, bool reformulate = true)
|
public async Task<Response> AskQuestionAsync(Question question, bool reformulate = true)
|
||||||
|
{
|
||||||
|
var (reformulatedQuestion, chunks) = await CreateContextAsync(question, reformulate);
|
||||||
|
|
||||||
|
var answer = await chatService.AskQuestionAsync(question.ConversationId, chunks, reformulatedQuestion);
|
||||||
|
return new Response(reformulatedQuestion, answer);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async IAsyncEnumerable<Response> AskStreamingAsync(Question question, bool reformulate = true)
|
||||||
|
{
|
||||||
|
var (reformulatedQuestion, chunks) = await CreateContextAsync(question, reformulate);
|
||||||
|
|
||||||
|
var answerStream = chatService.AskStreamingAsync(question.ConversationId, chunks, reformulatedQuestion);
|
||||||
|
|
||||||
|
// The first message contains the original question.
|
||||||
|
yield return new Response(reformulatedQuestion, null, StreamState.Start);
|
||||||
|
|
||||||
|
// Return each token as a partial response.
|
||||||
|
await foreach (var token in answerStream)
|
||||||
|
{
|
||||||
|
yield return new Response(null, token, StreamState.Append);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The last message tells the client that the stream has ended.
|
||||||
|
yield return new Response(null, null, StreamState.End);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<(string Question, IEnumerable<string> Chunks)> CreateContextAsync(Question question, bool reformulate = true)
|
||||||
{
|
{
|
||||||
// Reformulate the following question taking into account the context of the chat to perform keyword search and embeddings:
|
// Reformulate the following question taking into account the context of the chat to perform keyword search and embeddings:
|
||||||
var reformulatedQuestion = reformulate ? await chatService.CreateQuestionAsync(question.ConversationId, question.Text) : question.Text;
|
var reformulatedQuestion = reformulate ? await chatService.CreateQuestionAsync(question.ConversationId, question.Text) : question.Text;
|
||||||
@@ -94,8 +121,7 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
|
|||||||
.Take(appSettings.MaxRelevantChunks)
|
.Take(appSettings.MaxRelevantChunks)
|
||||||
.ToListAsync();
|
.ToListAsync();
|
||||||
|
|
||||||
var answer = await chatService.AskQuestionAsync(question.ConversationId, chunks, reformulatedQuestion);
|
return (reformulatedQuestion, chunks);
|
||||||
return new Response(reformulatedQuestion, answer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Task<string> GetContentAsync(Stream stream)
|
private static Task<string> GetContentAsync(Stream stream)
|
||||||
|
|||||||
Reference in New Issue
Block a user