mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Update Azure OpenAI services and Semantic Kernel version
- Updated `VectorSearchService.cs` to replace the text embedding generation service with an embedding generator and adjusted embedding generation and vector search logic. - Changed `Microsoft.SemanticKernel` package version from 1.50.0 to 1.51.0 in `SqlDatabaseVectorSearch.csproj` and modified `NoWarn` property settings.
This commit is contained in:
@@ -60,8 +60,8 @@ builder.Services.ConfigureHttpClientDefaults(configure =>
|
||||
// Semantic Kernel is used to generate embeddings and to reformulate questions taking into account all the previous interactions,
|
||||
// so that embeddings themselves can be generated more accurately.
|
||||
builder.Services.AddKernel()
|
||||
.AddAzureOpenAITextEmbeddingGeneration(aiSettings.Embedding.Deployment, aiSettings.Embedding.Endpoint, aiSettings.Embedding.ApiKey, dimensions: aiSettings.Embedding.Dimensions)
|
||||
.AddAzureOpenAIChatCompletion(aiSettings.ChatCompletion.Deployment, aiSettings.ChatCompletion.Endpoint, aiSettings.ChatCompletion.ApiKey);
|
||||
.AddAzureOpenAIEmbeddingGenerator(aiSettings.Embedding.Deployment, aiSettings.Embedding.Endpoint, aiSettings.Embedding.ApiKey, modelId: aiSettings.Embedding.ModelId, dimensions: aiSettings.Embedding.Dimensions)
|
||||
.AddAzureOpenAIChatCompletion(aiSettings.ChatCompletion.Deployment, aiSettings.ChatCompletion.Endpoint, aiSettings.ChatCompletion.ApiKey, modelId: aiSettings.ChatCompletion.ModelId);
|
||||
|
||||
builder.Services.AddSingleton<TokenizerService>();
|
||||
builder.Services.AddSingleton<ChatService>();
|
||||
|
||||
@@ -1,18 +1,19 @@
|
||||
using System.Data;
|
||||
using System.Runtime.CompilerServices;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Microsoft.SemanticKernel.Embeddings;
|
||||
using SqlDatabaseVectorSearch.ContentDecoders;
|
||||
using SqlDatabaseVectorSearch.DataAccessLayer;
|
||||
using SqlDatabaseVectorSearch.Models;
|
||||
using SqlDatabaseVectorSearch.Settings;
|
||||
using SqlDatabaseVectorSearch.TextChunkers;
|
||||
using ChatResponse = SqlDatabaseVectorSearch.Models.ChatResponse;
|
||||
using Entities = SqlDatabaseVectorSearch.DataAccessLayer.Entities;
|
||||
|
||||
namespace SqlDatabaseVectorSearch.Services;
|
||||
|
||||
public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDbContext dbContext, DocumentService documentService, ITextEmbeddingGenerationService textEmbeddingGenerationService, TokenizerService tokenizerService, ChatService chatService, TimeProvider timeProvider, IOptions<AppSettings> appSettingsOptions, ILogger<VectorSearchService> logger)
|
||||
public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDbContext dbContext, DocumentService documentService, IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator, TokenizerService tokenizerService, ChatService chatService, TimeProvider timeProvider, IOptions<AppSettings> appSettingsOptions, ILogger<VectorSearchService> logger)
|
||||
{
|
||||
private readonly AppSettings appSettings = appSettingsOptions.Value;
|
||||
|
||||
@@ -42,14 +43,15 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
|
||||
// Split the content into chunks and generate the embeddings for each one.
|
||||
var textChunker = serviceProvider.GetRequiredKeyedService<ITextChunker>(contentType);
|
||||
var paragraphs = textChunker.Split(content);
|
||||
var embeddings = await textEmbeddingGenerationService.GenerateEmbeddingsAsync(paragraphs, cancellationToken: cancellationToken);
|
||||
|
||||
var embeddings = await embeddingGenerator.GenerateAndZipAsync(paragraphs, cancellationToken: cancellationToken);
|
||||
|
||||
// Save the document chunks and the corresponding embedding in the database.
|
||||
foreach (var (index, paragraph) in paragraphs.Index())
|
||||
foreach (var (index, embedding) in embeddings.Index())
|
||||
{
|
||||
logger.LogDebug("Storing a paragraph of {TokenCount} tokens.", tokenizerService.CountChatCompletionTokens(paragraph));
|
||||
logger.LogDebug("Storing a paragraph of {TokenCount} tokens.", tokenizerService.CountChatCompletionTokens(embedding.Value));
|
||||
|
||||
var documentChunk = new Entities.DocumentChunk { Document = document, Index = index, Content = paragraph!, Embedding = embeddings[index].ToArray() };
|
||||
var documentChunk = new Entities.DocumentChunk { Document = document, Index = index, Content = embedding.Value, Embedding = embedding.Embedding.Vector.ToArray() };
|
||||
dbContext.DocumentChunks.Add(documentChunk);
|
||||
}
|
||||
|
||||
@@ -108,7 +110,7 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
|
||||
logger.LogDebug("Embedding Token Count: {EmbeddingTokenCount}", embeddingTokenCount);
|
||||
|
||||
// Perform Vector Search on SQL Database.
|
||||
var questionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken);
|
||||
var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken);
|
||||
|
||||
var chunks = await dbContext.DocumentChunks
|
||||
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray()))
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<NoWarn>$(NoWarn);SKEXP0001;SKEXP0010;SKEXP0050;EXTEXP0018</NoWarn>
|
||||
<NoWarn>$(NoWarn);SKEXP0010;SKEXP0050</NoWarn>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
@@ -24,7 +24,7 @@
|
||||
<PackageReference Include="Microsoft.ML.Tokenizers" Version="1.0.2" />
|
||||
<PackageReference Include="Microsoft.ML.Tokenizers.Data.Cl100kBase" Version="1.0.2" />
|
||||
<PackageReference Include="Microsoft.ML.Tokenizers.Data.O200kBase" Version="1.0.2" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel" Version="1.50.0" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel" Version="1.51.0" />
|
||||
<PackageReference Include="MimeMapping" Version="3.1.0" />
|
||||
<PackageReference Include="MinimalHelpers.FluentValidation" Version="1.1.3" />
|
||||
<PackageReference Include="MinimalHelpers.Routing.Analyzers" Version="1.1.3" />
|
||||
|
||||
Reference in New Issue
Block a user