diff --git a/SqlDatabaseVectorSearch/Program.cs b/SqlDatabaseVectorSearch/Program.cs index 48ccca6..bdfd360 100644 --- a/SqlDatabaseVectorSearch/Program.cs +++ b/SqlDatabaseVectorSearch/Program.cs @@ -13,7 +13,7 @@ var builder = WebApplication.CreateBuilder(args); builder.Configuration.AddJsonFile("appsettings.local.json", optional: true, reloadOnChange: true); // Add services to the container. -var aiSettings = builder.Configuration.GetSection("AzureOpenAI")!; +var aiSettings = builder.Services.ConfigureAndGet(builder.Configuration, "AzureOpenAI")!; var appSettings = builder.Services.ConfigureAndGet(builder.Configuration, nameof(AppSettings))!; builder.Services.AddSingleton(TimeProvider.System); @@ -40,6 +40,7 @@ builder.Services.AddKernel() .AddAzureOpenAITextEmbeddingGeneration(aiSettings.Embedding.Deployment, aiSettings.Embedding.Endpoint, aiSettings.Embedding.ApiKey, dimensions: aiSettings.Embedding.Dimensions) .AddAzureOpenAIChatCompletion(aiSettings.ChatCompletion.Deployment, aiSettings.ChatCompletion.Endpoint, aiSettings.ChatCompletion.ApiKey); +builder.Services.AddSingleton(); builder.Services.AddSingleton(); builder.Services.AddScoped(); diff --git a/SqlDatabaseVectorSearch/Services/ChatService.cs b/SqlDatabaseVectorSearch/Services/ChatService.cs index bcce685..c320d1c 100644 --- a/SqlDatabaseVectorSearch/Services/ChatService.cs +++ b/SqlDatabaseVectorSearch/Services/ChatService.cs @@ -1,10 +1,13 @@ using System.Text; using Microsoft.Extensions.Caching.Hybrid; +using Microsoft.Extensions.Options; using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.Connectors.AzureOpenAI; +using SqlDatabaseVectorSearch.Settings; namespace SqlDatabaseVectorSearch.Services; -public class ChatService(IChatCompletionService chatCompletionService, HybridCache cache) +public class ChatService(IChatCompletionService chatCompletionService, TokenizerService tokenizerService, HybridCache cache, IOptions appSettingsOptions) { public async Task CreateQuestionAsync(Guid conversationId, string question) { @@ -38,29 +41,47 @@ public class ChatService(IChatCompletionService chatCompletionService, HybridCac You must answer in the same language of the user's question. """); - var prompt = new StringBuilder(""" + var prompt = new StringBuilder($""" + Answer the following question: + --- + {question} + ===== Using the following information: """); - // TODO: Ensure that chunks are not too long, according to the model max token. - foreach (var text in chunks) + var tokensAvailable = appSettings.MaxInputTokens + - tokenizerService.CountTokens(chat[0].ToString()) // System prompt. + - tokenizerService.CountTokens(prompt.ToString()) // Initial user prompt. + - appSettings.MaxOutputTokens; // To ensure there is enough space for the answer. + + foreach (var chunk in chunks) { - prompt.AppendLine("---"); + var text = $"---{Environment.NewLine}{chunk}"; + + var tokenCount = tokenizerService.CountTokens(text); + if (tokenCount > tokensAvailable) + { + // There isn't enough space to add the text. + break; + } + prompt.Append(text); + + tokensAvailable -= tokenCount; + if (tokensAvailable <= 0) + { + // There isn't enough space to add more chunks. + break; + } } - prompt.AppendLine($""" - - ===== - Answer the following question: - --- - {question} - """); - chat.AddUserMessage(prompt.ToString()); - var answer = await chatCompletionService.GetChatMessageContentAsync(chat)!; + var answer = await chatCompletionService.GetChatMessageContentAsync(chat, new AzureOpenAIPromptExecutionSettings + { + MaxTokens = appSettings.MaxOutputTokens + }); // Add question and answer to the chat history. await SetChatHistoryAsync(conversationId, question, answer.Content!); diff --git a/SqlDatabaseVectorSearch/Services/TokenizerService.cs b/SqlDatabaseVectorSearch/Services/TokenizerService.cs new file mode 100644 index 0000000..9115b3d --- /dev/null +++ b/SqlDatabaseVectorSearch/Services/TokenizerService.cs @@ -0,0 +1,13 @@ +using Microsoft.Extensions.Options; +using Microsoft.ML.Tokenizers; +using SqlDatabaseVectorSearch.Settings; + +namespace SqlDatabaseVectorSearch.Services; + +public class TokenizerService(IOptions settingsOptions) +{ + private readonly TiktokenTokenizer tokenizer = TiktokenTokenizer.CreateForModel(settingsOptions.Value.ChatCompletion.ModelId); + + public int CountTokens(string input) + => tokenizer.CountTokens(input); +} diff --git a/SqlDatabaseVectorSearch/Settings/AppSettings.cs b/SqlDatabaseVectorSearch/Settings/AppSettings.cs index dd04b00..74564bc 100644 --- a/SqlDatabaseVectorSearch/Settings/AppSettings.cs +++ b/SqlDatabaseVectorSearch/Settings/AppSettings.cs @@ -10,5 +10,9 @@ public class AppSettings public int MaxRelevantChunks { get; init; } = 5; + public int MaxInputTokens { get; init; } = 16385; + + public int MaxOutputTokens { get; init; } = 800; + public TimeSpan MessageExpiration { get; init; } } diff --git a/SqlDatabaseVectorSearch/Settings/AzureOpenAISettings.cs b/SqlDatabaseVectorSearch/Settings/AzureOpenAISettings.cs index e85d51d..2faac44 100644 --- a/SqlDatabaseVectorSearch/Settings/AzureOpenAISettings.cs +++ b/SqlDatabaseVectorSearch/Settings/AzureOpenAISettings.cs @@ -4,7 +4,7 @@ public class AzureOpenAISettings { public required ServiceSettings ChatCompletion { get; init; } - public required EmbeddingServiceSettings Embedding { get; init; } + public required EmbeddingSettings Embedding { get; init; } } public class ServiceSettings @@ -13,10 +13,12 @@ public class ServiceSettings public required string Deployment { get; init; } + public required string ModelId { get; init; } + public required string ApiKey { get; init; } } -public class EmbeddingServiceSettings : ServiceSettings +public class EmbeddingSettings : ServiceSettings { public int? Dimensions { get; set; } -} +} \ No newline at end of file diff --git a/SqlDatabaseVectorSearch/SqlDatabaseVectorSearch.csproj b/SqlDatabaseVectorSearch/SqlDatabaseVectorSearch.csproj index 040b251..f4aa353 100644 --- a/SqlDatabaseVectorSearch/SqlDatabaseVectorSearch.csproj +++ b/SqlDatabaseVectorSearch/SqlDatabaseVectorSearch.csproj @@ -13,11 +13,14 @@ - - - - - + + + + + + + + diff --git a/SqlDatabaseVectorSearch/appsettings.json b/SqlDatabaseVectorSearch/appsettings.json index 53b2465..7c0435b 100644 --- a/SqlDatabaseVectorSearch/appsettings.json +++ b/SqlDatabaseVectorSearch/appsettings.json @@ -6,12 +6,14 @@ "ChatCompletion": { "Endpoint": "", "Deployment": "", - "ApiKey": "" + "ApiKey": "", + "ModelId": "" // o1, gpt-4o, gpt-4, gpt-3.5 }, "Embedding": { "Endpoint": "", "Deployment": "", "ApiKey": "", + "ModelId": "", // Set this value only if you're using a model that allows to specify the dimensions of the embeddings // (e.g. text-embedding-3-small or text-embedding-3-large). Currently, a maximum value of 1998 is supported. "Dimensions": null @@ -22,6 +24,8 @@ "MaxTokensPerParagraph": 1024, "OverlapTokens": 100, "MaxRelevantChunks": 10, + "MaxInputTokens": 16385, + "MaxOutputTokens": 800, "MessageExpiration": "00:05:00" }, "Logging": {