From 404cd7565a7a1d05ba6d042030e9b8fa2a2fb788 Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Wed, 10 Sep 2025 16:45:16 +0200 Subject: [PATCH] Switch to SqlVector for embeddings Updated the application to use SQL Server's native vector data type (`SqlVector`) for embeddings, replacing the previous `float[]` or `string` representations. - Updated `.editorconfig` with new code style preferences and diagnostic rule severities. - Modified `DocumentChunk.cs` to use `SqlVector` for the `Embedding` property. - Updated migrations and `ApplicationDbContextModelSnapshot` to reflect the new `SqlVector` type. - Replaced `AddAzureSql` with `AddSqlServer` in `Program.cs` and removed `UseVectorSearch`. - Adjusted `DocumentService` and `VectorSearchService` to handle `SqlVector` and updated vector search logic. - Removed the `EFCore.SqlServer.VectorSearch` package and upgraded EF Core to `10.0.0-rc.1`. - Made minor adjustments to OpenAPI configuration and dependency management. --- .editorconfig | 6 +++++- SqlDatabaseVectorSearch/Data/Entities/DocumentChunk.cs | 6 ++++-- .../Data/Migrations/00000000000000_Initial.Designer.cs | 8 ++++---- .../Data/Migrations/00000000000000_Initial.cs | 3 ++- .../Migrations/ApplicationDbContextModelSnapshot.cs | 6 +++--- SqlDatabaseVectorSearch/Program.cs | 10 +++------- SqlDatabaseVectorSearch/Services/DocumentService.cs | 2 +- .../Services/VectorSearchService.cs | 6 ++++-- SqlDatabaseVectorSearch/SqlDatabaseVectorSearch.csproj | 1 - 9 files changed, 26 insertions(+), 22 deletions(-) diff --git a/.editorconfig b/.editorconfig index f6b7d3a..8f8512a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -82,6 +82,7 @@ csharp_style_prefer_local_over_anonymous_function = true:silent csharp_style_prefer_extended_property_pattern = true:suggestion csharp_style_implicit_object_creation_when_type_is_apparent = true:silent csharp_style_prefer_tuple_swap = true:silent +csharp_style_prefer_simple_property_accessors = true:suggestion # Field preferences dotnet_style_readonly_field = true:suggestion @@ -299,4 +300,7 @@ dotnet_diagnostic.IDE0010.severity = none dotnet_diagnostic.IDE0072.severity = none # IDE0305: Simplify collection initialization -dotnet_diagnostic.IDE0305.severity = none \ No newline at end of file +dotnet_diagnostic.IDE0305.severity = none + +# CA1873: Avoid potentially expensive logging +dotnet_diagnostic.CA1873.severity = none \ No newline at end of file diff --git a/SqlDatabaseVectorSearch/Data/Entities/DocumentChunk.cs b/SqlDatabaseVectorSearch/Data/Entities/DocumentChunk.cs index bc67963..0f91b28 100644 --- a/SqlDatabaseVectorSearch/Data/Entities/DocumentChunk.cs +++ b/SqlDatabaseVectorSearch/Data/Entities/DocumentChunk.cs @@ -1,4 +1,6 @@ -namespace SqlDatabaseVectorSearch.Data.Entities; +using Microsoft.Data.SqlTypes; + +namespace SqlDatabaseVectorSearch.Data.Entities; public class DocumentChunk { @@ -14,7 +16,7 @@ public class DocumentChunk public required string Content { get; set; } - public required float[] Embedding { get; set; } + public required SqlVector Embedding { get; set; } public virtual Document Document { get; set; } = null!; } \ No newline at end of file diff --git a/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.Designer.cs b/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.Designer.cs index c071cf1..e4f78bc 100644 --- a/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.Designer.cs +++ b/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.Designer.cs @@ -1,5 +1,6 @@ // using System; +using Microsoft.Data.SqlTypes; using Microsoft.EntityFrameworkCore; using Microsoft.EntityFrameworkCore.Infrastructure; using Microsoft.EntityFrameworkCore.Metadata; @@ -12,7 +13,7 @@ using SqlDatabaseVectorSearch.Data; namespace SqlDatabaseVectorSearch.Migrations { [DbContext(typeof(ApplicationDbContext))] - [Migration("20250606091336_Initial")] + [Migration("00000000000000_Initial")] partial class Initial { /// @@ -20,7 +21,7 @@ namespace SqlDatabaseVectorSearch.Migrations { #pragma warning disable 612, 618 modelBuilder - .HasAnnotation("ProductVersion", "9.0.5") + .HasAnnotation("ProductVersion", "10.0.0-rc.1.25451.107") .HasAnnotation("Relational:MaxIdentifierLength", 128); SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder); @@ -57,8 +58,7 @@ namespace SqlDatabaseVectorSearch.Migrations b.Property("DocumentId") .HasColumnType("uniqueidentifier"); - b.PrimitiveCollection("Embedding") - .IsRequired() + b.Property>("Embedding") .HasColumnType("vector(1536)"); b.Property("Index") diff --git a/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.cs b/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.cs index 590cad4..4db809e 100644 --- a/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.cs +++ b/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.cs @@ -1,4 +1,5 @@ using System; +using Microsoft.Data.SqlTypes; using Microsoft.EntityFrameworkCore.Migrations; #nullable disable @@ -34,7 +35,7 @@ namespace SqlDatabaseVectorSearch.Migrations PageNumber = table.Column(type: "int", nullable: true), IndexOnPage = table.Column(type: "int", nullable: false), Content = table.Column(type: "nvarchar(max)", nullable: false), - Embedding = table.Column(type: "vector(1536)", nullable: false) + Embedding = table.Column>(type: "vector(1536)", nullable: false) }, constraints: table => { diff --git a/SqlDatabaseVectorSearch/Data/Migrations/ApplicationDbContextModelSnapshot.cs b/SqlDatabaseVectorSearch/Data/Migrations/ApplicationDbContextModelSnapshot.cs index aeb0666..0673aca 100644 --- a/SqlDatabaseVectorSearch/Data/Migrations/ApplicationDbContextModelSnapshot.cs +++ b/SqlDatabaseVectorSearch/Data/Migrations/ApplicationDbContextModelSnapshot.cs @@ -1,5 +1,6 @@ // using System; +using Microsoft.Data.SqlTypes; using Microsoft.EntityFrameworkCore; using Microsoft.EntityFrameworkCore.Infrastructure; using Microsoft.EntityFrameworkCore.Metadata; @@ -17,7 +18,7 @@ namespace SqlDatabaseVectorSearch.Migrations { #pragma warning disable 612, 618 modelBuilder - .HasAnnotation("ProductVersion", "9.0.5") + .HasAnnotation("ProductVersion", "10.0.0-rc.1.25451.107") .HasAnnotation("Relational:MaxIdentifierLength", 128); SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder); @@ -54,8 +55,7 @@ namespace SqlDatabaseVectorSearch.Migrations b.Property("DocumentId") .HasColumnType("uniqueidentifier"); - b.PrimitiveCollection("Embedding") - .IsRequired() + b.Property>("Embedding") .HasColumnType("vector(1536)"); b.Property("Index") diff --git a/SqlDatabaseVectorSearch/Program.cs b/SqlDatabaseVectorSearch/Program.cs index dfda707..a36ec70 100644 --- a/SqlDatabaseVectorSearch/Program.cs +++ b/SqlDatabaseVectorSearch/Program.cs @@ -11,7 +11,6 @@ using SqlDatabaseVectorSearch.Services; using SqlDatabaseVectorSearch.Settings; using SqlDatabaseVectorSearch.TextChunkers; using TinyHelpers.AspNetCore.Extensions; -using TinyHelpers.AspNetCore.OpenApi; var builder = WebApplication.CreateBuilder(args); builder.Configuration.AddJsonFile("appsettings.local.json", optional: true, reloadOnChange: true); @@ -32,10 +31,7 @@ builder.Services.ConfigureHttpJsonOptions(options => builder.Services.AddSingleton(TimeProvider.System); -builder.Services.AddAzureSql(builder.Configuration.GetConnectionString("SqlConnection"), options => -{ - options.UseVectorSearch(); -}, options => +builder.Services.AddSqlServer(builder.Configuration.GetConnectionString("SqlConnection"), optionsAction: options => { options.UseQueryTrackingBehavior(QueryTrackingBehavior.NoTracking); }); @@ -79,8 +75,8 @@ builder.Services.AddScoped(); builder.Services.AddOpenApi(options => { - options.RemoveServerList(); - options.AddDefaultProblemDetailsResponse(); + //options.RemoveServerList(); + //options.AddDefaultProblemDetailsResponse(); }); ValidatorOptions.Global.LanguageManager.Enabled = false; diff --git a/SqlDatabaseVectorSearch/Services/DocumentService.cs b/SqlDatabaseVectorSearch/Services/DocumentService.cs index a8aca12..c376ceb 100644 --- a/SqlDatabaseVectorSearch/Services/DocumentService.cs +++ b/SqlDatabaseVectorSearch/Services/DocumentService.cs @@ -28,7 +28,7 @@ public class DocumentService(ApplicationDbContext dbContext) public async Task GetChunkEmbeddingAsync(Guid documentId, Guid documentChunkId, CancellationToken cancellationToken = default) { var documentChunk = await dbContext.DocumentChunks.Where(c => c.Id == documentChunkId && c.DocumentId == documentId) - .Select(c => new DocumentChunk(c.Id, c.Index, c.Content, c.PageNumber, c.IndexOnPage, c.Embedding)) + .Select(c => new DocumentChunk(c.Id, c.Index, c.Content, c.PageNumber, c.IndexOnPage, c.Embedding.Memory.ToArray())) .FirstOrDefaultAsync(cancellationToken); return documentChunk; diff --git a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs index 18bfa3e..edaeea3 100644 --- a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs +++ b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs @@ -2,6 +2,7 @@ using System.Runtime.CompilerServices; using System.Text; using System.Text.RegularExpressions; +using Microsoft.Data.SqlTypes; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.AI; using Microsoft.Extensions.Options; @@ -66,7 +67,7 @@ public partial class VectorSearchService(IServiceProvider serviceProvider, Appli PageNumber = chunk.PageNumber, IndexOnPage = chunk.IndexOnPage, Content = chunk.Content, - Embedding = embedding.Vector.ToArray() + Embedding = new SqlVector(embedding.Vector) }; dbContext.DocumentChunks.Add(documentChunk); @@ -149,9 +150,10 @@ public partial class VectorSearchService(IServiceProvider serviceProvider, Appli // Perform Vector Search on SQL Database. var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken); + var embeddingVector = new SqlVector(questionEmbedding); var chunks = await dbContext.DocumentChunks.Include(c => c.Document) - .OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray())) + .OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, embeddingVector)) .Take(appSettings.MaxRelevantChunks) .ToListAsync(cancellationToken); diff --git a/SqlDatabaseVectorSearch/SqlDatabaseVectorSearch.csproj b/SqlDatabaseVectorSearch/SqlDatabaseVectorSearch.csproj index ecd9ad7..984f65a 100644 --- a/SqlDatabaseVectorSearch/SqlDatabaseVectorSearch.csproj +++ b/SqlDatabaseVectorSearch/SqlDatabaseVectorSearch.csproj @@ -10,7 +10,6 @@ -