Merge pull request #4 from marcominerva/streaming

Add support for response streaming
This commit is contained in:
Marco Minerva
2025-01-28 11:36:05 +01:00
committed by GitHub
6 changed files with 102 additions and 14 deletions
Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 KiB

After

Width:  |  Height:  |  Size: 70 KiB

+2 -1
View File
@@ -1,3 +1,4 @@
namespace SqlDatabaseVectorSearch.Models; namespace SqlDatabaseVectorSearch.Models;
public record class Response(string Question, string Answer); // Question and Asnwer can be null when using response streaming.
public record class Response(string? Question, string? Answer, StreamState? StreamState = null);
@@ -0,0 +1,8 @@
namespace SqlDatabaseVectorSearch.Models;
public enum StreamState
{
Start,
Append,
End
}
+27 -1
View File
@@ -1,4 +1,5 @@
using System.ComponentModel; using System.ComponentModel;
using System.Text.Json.Serialization;
using Microsoft.AspNetCore.Http.HttpResults; using Microsoft.AspNetCore.Http.HttpResults;
using Microsoft.EntityFrameworkCore; using Microsoft.EntityFrameworkCore;
using Microsoft.SemanticKernel; using Microsoft.SemanticKernel;
@@ -49,6 +50,11 @@ builder.Services.AddSingleton<TokenizerService>();
builder.Services.AddSingleton<ChatService>(); builder.Services.AddSingleton<ChatService>();
builder.Services.AddScoped<VectorSearchService>(); builder.Services.AddScoped<VectorSearchService>();
builder.Services.ConfigureHttpJsonOptions(options =>
{
options.SerializerOptions.Converters.Add(new JsonStringEnumConverter());
});
builder.Services.AddOpenApi(options => builder.Services.AddOpenApi(options =>
{ {
options.RemoveServerList(); options.RemoveServerList();
@@ -114,7 +120,7 @@ documentsApiGroup.MapPost(string.Empty, async (IFormFile file, VectorSearchServi
.DisableAntiforgery() .DisableAntiforgery()
.ProducesProblem(StatusCodes.Status400BadRequest) .ProducesProblem(StatusCodes.Status400BadRequest)
.WithSummary("Uploads a document") .WithSummary("Uploads a document")
.WithDescription("Uploads a document to SQL Database and saves its embedding using the new native Vector type. The document will be indexed and used to answer questions. Currently, only PDF files are supported."); .WithDescription("Uploads a document to SQL Database and saves its embedding using the native VECTOR type. The document will be indexed and used to answer questions. Currently, only PDF files are supported.");
documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, VectorSearchService vectorSearchService) => documentsApiGroup.MapDelete("{documentId:guid}", async (Guid documentId, VectorSearchService vectorSearchService) =>
{ {
@@ -134,4 +140,24 @@ app.MapPost("/api/ask", async (Question question, VectorSearchService vectorSear
.WithDescription("The question will be reformulated taking into account the context of the chat identified by the given ConversationId.") .WithDescription("The question will be reformulated taking into account the context of the chat identified by the given ConversationId.")
.WithTags("Ask"); .WithTags("Ask");
app.MapPost("/api/ask-streaming", (Question question, VectorSearchService vectorSearchService,
[Description("If true, the question will be reformulated taking into account the context of the chat identified by the given ConversationId.")] bool reformulate = true) =>
{
async IAsyncEnumerable<Response> Stream()
{
// Requests a streaming response.
var responseStream = vectorSearchService.AskStreamingAsync(question, reformulate);
await foreach (var delta in responseStream)
{
yield return delta;
}
}
return Stream();
})
.WithSummary("Asks a question and gets the response as streaming")
.WithDescription("The question will be reformulated taking into account the context of the chat identified by the given ConversationId.")
.WithTags("Ask");
app.Run(); app.Run();
+37 -10
View File
@@ -35,6 +35,42 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
} }
public async Task<string> AskQuestionAsync(Guid conversationId, IEnumerable<string> chunks, string question) public async Task<string> AskQuestionAsync(Guid conversationId, IEnumerable<string> chunks, string question)
{
var chat = CreateChatAsync(chunks, question);
var answer = await chatCompletionService.GetChatMessageContentAsync(chat, new AzureOpenAIPromptExecutionSettings
{
MaxTokens = appSettings.MaxOutputTokens
});
// Add question and answer to the chat history.
await SetChatHistoryAsync(conversationId, question, answer.Content!);
return answer.Content!;
}
public async IAsyncEnumerable<string> AskStreamingAsync(Guid conversationId, IEnumerable<string> chunks, string question)
{
var chat = CreateChatAsync(chunks, question);
var answer = new StringBuilder();
await foreach (var token in chatCompletionService.GetStreamingChatMessageContentsAsync(chat, new AzureOpenAIPromptExecutionSettings
{
MaxTokens = appSettings.MaxOutputTokens
}))
{
if (!string.IsNullOrEmpty(token.Content))
{
yield return token.Content;
answer.Append(token.Content);
}
}
// Add question and answer to the chat history.
await SetChatHistoryAsync(conversationId, question, answer.ToString());
}
private ChatHistory CreateChatAsync(IEnumerable<string> chunks, string question)
{ {
var chat = new ChatHistory(""" var chat = new ChatHistory("""
You can use only the information provided in this chat to answer questions. If you don't know the answer, reply suggesting to refine the question. You can use only the information provided in this chat to answer questions. If you don't know the answer, reply suggesting to refine the question.
@@ -79,16 +115,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
} }
chat.AddUserMessage(prompt.ToString()); chat.AddUserMessage(prompt.ToString());
return chat;
var answer = await chatCompletionService.GetChatMessageContentAsync(chat, new AzureOpenAIPromptExecutionSettings
{
MaxTokens = appSettings.MaxOutputTokens
});
// Add question and answer to the chat history.
await SetChatHistoryAsync(conversationId, question, answer.Content!);
return answer.Content!;
} }
private async Task UpdateCacheAsync(Guid conversationId, ChatHistory chat) private async Task UpdateCacheAsync(Guid conversationId, ChatHistory chat)
@@ -81,6 +81,33 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
=> dbContext.Documents.Where(d => d.Id == documentId).ExecuteDeleteAsync(); => dbContext.Documents.Where(d => d.Id == documentId).ExecuteDeleteAsync();
public async Task<Response> AskQuestionAsync(Question question, bool reformulate = true) public async Task<Response> AskQuestionAsync(Question question, bool reformulate = true)
{
var (reformulatedQuestion, chunks) = await CreateContextAsync(question, reformulate);
var answer = await chatService.AskQuestionAsync(question.ConversationId, chunks, reformulatedQuestion);
return new Response(reformulatedQuestion, answer);
}
public async IAsyncEnumerable<Response> AskStreamingAsync(Question question, bool reformulate = true)
{
var (reformulatedQuestion, chunks) = await CreateContextAsync(question, reformulate);
var answerStream = chatService.AskStreamingAsync(question.ConversationId, chunks, reformulatedQuestion);
// The first message contains the original question.
yield return new Response(reformulatedQuestion, null, StreamState.Start);
// Return each token as a partial response.
await foreach (var token in answerStream)
{
yield return new Response(null, token, StreamState.Append);
}
// The last message tells the client that the stream has ended.
yield return new Response(null, null, StreamState.End);
}
private async Task<(string Question, IEnumerable<string> Chunks)> CreateContextAsync(Question question, bool reformulate = true)
{ {
// Reformulate the following question taking into account the context of the chat to perform keyword search and embeddings: // Reformulate the following question taking into account the context of the chat to perform keyword search and embeddings:
var reformulatedQuestion = reformulate ? await chatService.CreateQuestionAsync(question.ConversationId, question.Text) : question.Text; var reformulatedQuestion = reformulate ? await chatService.CreateQuestionAsync(question.ConversationId, question.Text) : question.Text;
@@ -94,8 +121,7 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG
.Take(appSettings.MaxRelevantChunks) .Take(appSettings.MaxRelevantChunks)
.ToListAsync(); .ToListAsync();
var answer = await chatService.AskQuestionAsync(question.ConversationId, chunks, reformulatedQuestion); return (reformulatedQuestion, chunks);
return new Response(reformulatedQuestion, answer);
} }
private static Task<string> GetContentAsync(Stream stream) private static Task<string> GetContentAsync(Stream stream)