Add TokenizerService and update settings configuration

Updated Program.cs to use ConfigureAndGet method for settings,
changed ChatService to singleton, and added TokenizerService
singleton. Modified ChatService to use TokenizerService for
token counting. Updated AppSettings and AzureOpenAISettings
with new properties. Added new package references in
SqlDatabaseVectorSearch.csproj. Updated appsettings.json with
new properties. Added TokenizerService class for token counting.
This commit is contained in:
Marco Minerva
2024-11-06 17:20:05 +01:00
parent c18a6b4e03
commit 5b43031251
7 changed files with 69 additions and 24 deletions
+3 -2
View File
@@ -14,7 +14,7 @@ var builder = WebApplication.CreateBuilder(args);
builder.Configuration.AddJsonFile("appsettings.local.json", optional: true, reloadOnChange: true); builder.Configuration.AddJsonFile("appsettings.local.json", optional: true, reloadOnChange: true);
// Add services to the container. // Add services to the container.
var aiSettings = builder.Configuration.GetSection<AzureOpenAISettings>("AzureOpenAI")!; var aiSettings = builder.Services.ConfigureAndGet<AzureOpenAISettings>(builder.Configuration, "AzureOpenAI")!;
var appSettings = builder.Services.ConfigureAndGet<AppSettings>(builder.Configuration, nameof(AppSettings))!; var appSettings = builder.Services.ConfigureAndGet<AppSettings>(builder.Configuration, nameof(AppSettings))!;
builder.Services.AddSingleton(TimeProvider.System); builder.Services.AddSingleton(TimeProvider.System);
@@ -35,7 +35,8 @@ builder.Services.AddKernel()
.AddAzureOpenAITextEmbeddingGeneration(aiSettings.Embedding.Deployment, aiSettings.Embedding.Endpoint, aiSettings.Embedding.ApiKey, dimensions: aiSettings.Embedding.Dimensions) .AddAzureOpenAITextEmbeddingGeneration(aiSettings.Embedding.Deployment, aiSettings.Embedding.Endpoint, aiSettings.Embedding.ApiKey, dimensions: aiSettings.Embedding.Dimensions)
.AddAzureOpenAIChatCompletion(aiSettings.ChatCompletion.Deployment, aiSettings.ChatCompletion.Endpoint, aiSettings.ChatCompletion.ApiKey); .AddAzureOpenAIChatCompletion(aiSettings.ChatCompletion.Deployment, aiSettings.ChatCompletion.Endpoint, aiSettings.ChatCompletion.ApiKey);
builder.Services.AddScoped<ChatService>(); builder.Services.AddSingleton<TokenizerService>();
builder.Services.AddSingleton<ChatService>();
builder.Services.AddScoped<VectorSearchService>(); builder.Services.AddScoped<VectorSearchService>();
builder.Services.AddEndpointsApiExplorer(); builder.Services.AddEndpointsApiExplorer();
+34 -15
View File
@@ -2,11 +2,12 @@
using Microsoft.Extensions.Caching.Memory; using Microsoft.Extensions.Caching.Memory;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
using SqlDatabaseVectorSearch.Settings; using SqlDatabaseVectorSearch.Settings;
namespace SqlDatabaseVectorSearch.Services; namespace SqlDatabaseVectorSearch.Services;
public class ChatService(IMemoryCache cache, IChatCompletionService chatCompletionService, IOptions<AppSettings> appSettingsOptions) public class ChatService(IMemoryCache cache, IChatCompletionService chatCompletionService, TokenizerService tokenizerService, IOptions<AppSettings> appSettingsOptions)
{ {
private readonly AppSettings appSettings = appSettingsOptions.Value; private readonly AppSettings appSettings = appSettingsOptions.Value;
@@ -35,36 +36,54 @@ public class ChatService(IMemoryCache cache, IChatCompletionService chatCompleti
public async Task<string> AskQuestionAsync(Guid conversationId, IEnumerable<string> chunks, string question) public async Task<string> AskQuestionAsync(Guid conversationId, IEnumerable<string> chunks, string question)
{ {
var chat = new ChatHistory("""" var chat = new ChatHistory("""
"""
You can use only the information provided in this chat to answer questions. If you don't know the answer, reply suggesting to refine the question. You can use only the information provided in this chat to answer questions. If you don't know the answer, reply suggesting to refine the question.
For example, if the user asks "What is the capital of France?" and in this chat there isn't information about France, you should reply something like "This information isn't available in the given context". For example, if the user asks "What is the capital of France?" and in this chat there isn't information about France, you should reply something like "This information isn't available in the given context".
Never answer to questions that are not related to this chat. Never answer to questions that are not related to this chat.
You must answer in the same language of the user's question. You must answer in the same language of the user's question.
""""); """);
var prompt = new StringBuilder(""" var prompt = new StringBuilder($"""
Answer the following question:
---
{question}
---
Using the following information: Using the following information:
--- ---
"""); """);
// TODO: Ensure that chunks are not too long, according to the model max token. var tokensAvailable = appSettings.MaxInputTokens
foreach (var result in chunks) - tokenizerService.CountTokens(chat[0].ToString()) - tokenizerService.CountTokens(prompt.ToString())
- appSettings.MaxOutputTokens; // To ensure there is enough space for the answer.
foreach (var chunk in chunks)
{ {
prompt.AppendLine(result); var text = $"{chunk}---";
prompt.AppendLine("---");
var tokenCount = tokenizerService.CountTokens(text);
if (tokenCount > tokensAvailable)
{
// There isn't enough space to add the text.
break;
} }
prompt.AppendLine($""" prompt.AppendLine(text);
Answer the following question:
--- tokensAvailable -= tokenCount;
{question} if (tokensAvailable <= 0)
"""); {
// There isn't enough space to add more chunks.
break;
}
}
chat.AddUserMessage(prompt.ToString()); chat.AddUserMessage(prompt.ToString());
var answer = await chatCompletionService.GetChatMessageContentAsync(chat)!; var answer = await chatCompletionService.GetChatMessageContentAsync(chat, new AzureOpenAIPromptExecutionSettings
{
MaxTokens = appSettings.MaxOutputTokens
});
// Add question and answer to the chat history. // Add question and answer to the chat history.
var history = new ChatHistory(cache.Get<ChatHistory?>(conversationId) ?? []); var history = new ChatHistory(cache.Get<ChatHistory?>(conversationId) ?? []);
@@ -0,0 +1,13 @@
using Microsoft.Extensions.Options;
using Microsoft.ML.Tokenizers;
using SqlDatabaseVectorSearch.Settings;
namespace SqlDatabaseVectorSearch.Services;
public class TokenizerService(IOptions<AzureOpenAISettings> settingsOptions)
{
private readonly TiktokenTokenizer tokenizer = TiktokenTokenizer.CreateForModel(settingsOptions.Value.ChatCompletion.ModelId);
public int CountTokens(string input)
=> tokenizer.CountTokens(input);
}
@@ -10,6 +10,10 @@ public class AppSettings
public int MaxRelevantChunks { get; init; } = 5; public int MaxRelevantChunks { get; init; } = 5;
public int MaxInputTokens { get; init; } = 16385;
public int MaxOutputTokens { get; init; } = 800;
public int MessageLimit { get; init; } public int MessageLimit { get; init; }
public TimeSpan MessageExpiration { get; init; } public TimeSpan MessageExpiration { get; init; }
@@ -4,7 +4,7 @@ public class AzureOpenAISettings
{ {
public required ServiceSettings ChatCompletion { get; init; } public required ServiceSettings ChatCompletion { get; init; }
public required EmbeddingServiceSettings Embedding { get; init; } public required EmbeddingSettings Embedding { get; init; }
} }
public class ServiceSettings public class ServiceSettings
@@ -13,10 +13,12 @@ public class ServiceSettings
public required string Deployment { get; init; } public required string Deployment { get; init; }
public required string ModelId { get; init; }
public required string ApiKey { get; init; } public required string ApiKey { get; init; }
} }
public class EmbeddingServiceSettings : ServiceSettings public class EmbeddingSettings : ServiceSettings
{ {
public int? Dimensions { get; set; } public int? Dimensions { get; set; }
} }
@@ -12,7 +12,9 @@
<PackageReference Include="EntityFrameworkCore.Exceptions.SqlServer" Version="8.1.3" /> <PackageReference Include="EntityFrameworkCore.Exceptions.SqlServer" Version="8.1.3" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.10" /> <PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.10" />
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="8.0.10" /> <PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="8.0.10" />
<PackageReference Include="Microsoft.SemanticKernel" Version="1.26.0" /> <PackageReference Include="Microsoft.ML.Tokenizers" Version="0.22.0-preview.24526.1" />
<PackageReference Include="Microsoft.ML.Tokenizers.Data.O200kBase" Version="0.22.0-preview.24526.1" />
<PackageReference Include="Microsoft.SemanticKernel" Version="1.27.0" />
<PackageReference Include="MinimalHelpers.OpenApi" Version="2.0.17" /> <PackageReference Include="MinimalHelpers.OpenApi" Version="2.0.17" />
<PackageReference Include="PdfPig" Version="0.1.9" /> <PackageReference Include="PdfPig" Version="0.1.9" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.9.0" /> <PackageReference Include="Swashbuckle.AspNetCore" Version="6.9.0" />
+5 -1
View File
@@ -6,12 +6,14 @@
"ChatCompletion": { "ChatCompletion": {
"Endpoint": "", "Endpoint": "",
"Deployment": "", "Deployment": "",
"ApiKey": "" "ApiKey": "",
"ModelId": ""
}, },
"Embedding": { "Embedding": {
"Endpoint": "", "Endpoint": "",
"Deployment": "", "Deployment": "",
"ApiKey": "", "ApiKey": "",
"ModelId": "",
// Set this value only if you're using a model that allows to specify the dimensions of the embeddings // Set this value only if you're using a model that allows to specify the dimensions of the embeddings
// (e.g. text-embedding-3-small or text-embedding-3-large). Currently, a maximum value of 1998 is supported. // (e.g. text-embedding-3-small or text-embedding-3-large). Currently, a maximum value of 1998 is supported.
"Dimensions": null "Dimensions": null
@@ -22,6 +24,8 @@
"MaxTokensPerParagraph": 1024, "MaxTokensPerParagraph": 1024,
"OverlapTokens": 100, "OverlapTokens": 100,
"MaxRelevantChunks": 10, "MaxRelevantChunks": 10,
"MaxInputTokens": 16385,
"MaxOutputTokens": 800,
"MessageLimit": 20, "MessageLimit": 20,
"MessageExpiration": "00:05:00" "MessageExpiration": "00:05:00"
}, },