Switch to SqlVector<float> for embeddings

Updated the application to use SQL Server's native vector data type (`SqlVector<float>`) for embeddings, replacing the previous `float[]` or `string` representations.

- Updated `.editorconfig` with new code style preferences and diagnostic rule severities.
- Modified `DocumentChunk.cs` to use `SqlVector<float>` for the `Embedding` property.
- Updated migrations and `ApplicationDbContextModelSnapshot` to reflect the new `SqlVector<float>` type.
- Replaced `AddAzureSql` with `AddSqlServer` in `Program.cs` and removed `UseVectorSearch`.
- Adjusted `DocumentService` and `VectorSearchService` to handle `SqlVector<float>` and updated vector search logic.
- Removed the `EFCore.SqlServer.VectorSearch` package and upgraded EF Core to `10.0.0-rc.1`.
- Made minor adjustments to OpenAPI configuration and dependency management.
This commit is contained in:
Marco Minerva
2025-09-10 16:45:16 +02:00
parent f5011bc44b
commit 404cd7565a
9 changed files with 26 additions and 22 deletions
+5 -1
View File
@@ -82,6 +82,7 @@ csharp_style_prefer_local_over_anonymous_function = true:silent
csharp_style_prefer_extended_property_pattern = true:suggestion
csharp_style_implicit_object_creation_when_type_is_apparent = true:silent
csharp_style_prefer_tuple_swap = true:silent
csharp_style_prefer_simple_property_accessors = true:suggestion
# Field preferences
dotnet_style_readonly_field = true:suggestion
@@ -299,4 +300,7 @@ dotnet_diagnostic.IDE0010.severity = none
dotnet_diagnostic.IDE0072.severity = none
# IDE0305: Simplify collection initialization
dotnet_diagnostic.IDE0305.severity = none
dotnet_diagnostic.IDE0305.severity = none
# CA1873: Avoid potentially expensive logging
dotnet_diagnostic.CA1873.severity = none
@@ -1,4 +1,6 @@
namespace SqlDatabaseVectorSearch.Data.Entities;
using Microsoft.Data.SqlTypes;
namespace SqlDatabaseVectorSearch.Data.Entities;
public class DocumentChunk
{
@@ -14,7 +16,7 @@ public class DocumentChunk
public required string Content { get; set; }
public required float[] Embedding { get; set; }
public required SqlVector<float> Embedding { get; set; }
public virtual Document Document { get; set; } = null!;
}
@@ -1,5 +1,6 @@
// <auto-generated />
using System;
using Microsoft.Data.SqlTypes;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
@@ -12,7 +13,7 @@ using SqlDatabaseVectorSearch.Data;
namespace SqlDatabaseVectorSearch.Migrations
{
[DbContext(typeof(ApplicationDbContext))]
[Migration("20250606091336_Initial")]
[Migration("00000000000000_Initial")]
partial class Initial
{
/// <inheritdoc />
@@ -20,7 +21,7 @@ namespace SqlDatabaseVectorSearch.Migrations
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "9.0.5")
.HasAnnotation("ProductVersion", "10.0.0-rc.1.25451.107")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
@@ -57,8 +58,7 @@ namespace SqlDatabaseVectorSearch.Migrations
b.Property<Guid>("DocumentId")
.HasColumnType("uniqueidentifier");
b.PrimitiveCollection<string>("Embedding")
.IsRequired()
b.Property<SqlVector<float>>("Embedding")
.HasColumnType("vector(1536)");
b.Property<int>("Index")
@@ -1,4 +1,5 @@
using System;
using Microsoft.Data.SqlTypes;
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
@@ -34,7 +35,7 @@ namespace SqlDatabaseVectorSearch.Migrations
PageNumber = table.Column<int>(type: "int", nullable: true),
IndexOnPage = table.Column<int>(type: "int", nullable: false),
Content = table.Column<string>(type: "nvarchar(max)", nullable: false),
Embedding = table.Column<string>(type: "vector(1536)", nullable: false)
Embedding = table.Column<SqlVector<float>>(type: "vector(1536)", nullable: false)
},
constraints: table =>
{
@@ -1,5 +1,6 @@
// <auto-generated />
using System;
using Microsoft.Data.SqlTypes;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
@@ -17,7 +18,7 @@ namespace SqlDatabaseVectorSearch.Migrations
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "9.0.5")
.HasAnnotation("ProductVersion", "10.0.0-rc.1.25451.107")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
@@ -54,8 +55,7 @@ namespace SqlDatabaseVectorSearch.Migrations
b.Property<Guid>("DocumentId")
.HasColumnType("uniqueidentifier");
b.PrimitiveCollection<string>("Embedding")
.IsRequired()
b.Property<SqlVector<float>>("Embedding")
.HasColumnType("vector(1536)");
b.Property<int>("Index")
+3 -7
View File
@@ -11,7 +11,6 @@ using SqlDatabaseVectorSearch.Services;
using SqlDatabaseVectorSearch.Settings;
using SqlDatabaseVectorSearch.TextChunkers;
using TinyHelpers.AspNetCore.Extensions;
using TinyHelpers.AspNetCore.OpenApi;
var builder = WebApplication.CreateBuilder(args);
builder.Configuration.AddJsonFile("appsettings.local.json", optional: true, reloadOnChange: true);
@@ -32,10 +31,7 @@ builder.Services.ConfigureHttpJsonOptions(options =>
builder.Services.AddSingleton(TimeProvider.System);
builder.Services.AddAzureSql<ApplicationDbContext>(builder.Configuration.GetConnectionString("SqlConnection"), options =>
{
options.UseVectorSearch();
}, options =>
builder.Services.AddSqlServer<ApplicationDbContext>(builder.Configuration.GetConnectionString("SqlConnection"), optionsAction: options =>
{
options.UseQueryTrackingBehavior(QueryTrackingBehavior.NoTracking);
});
@@ -79,8 +75,8 @@ builder.Services.AddScoped<VectorSearchService>();
builder.Services.AddOpenApi(options =>
{
options.RemoveServerList();
options.AddDefaultProblemDetailsResponse();
//options.RemoveServerList();
//options.AddDefaultProblemDetailsResponse();
});
ValidatorOptions.Global.LanguageManager.Enabled = false;
@@ -28,7 +28,7 @@ public class DocumentService(ApplicationDbContext dbContext)
public async Task<DocumentChunk?> GetChunkEmbeddingAsync(Guid documentId, Guid documentChunkId, CancellationToken cancellationToken = default)
{
var documentChunk = await dbContext.DocumentChunks.Where(c => c.Id == documentChunkId && c.DocumentId == documentId)
.Select(c => new DocumentChunk(c.Id, c.Index, c.Content, c.PageNumber, c.IndexOnPage, c.Embedding))
.Select(c => new DocumentChunk(c.Id, c.Index, c.Content, c.PageNumber, c.IndexOnPage, c.Embedding.Memory.ToArray()))
.FirstOrDefaultAsync(cancellationToken);
return documentChunk;
@@ -2,6 +2,7 @@
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Data.SqlTypes;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.Options;
@@ -66,7 +67,7 @@ public partial class VectorSearchService(IServiceProvider serviceProvider, Appli
PageNumber = chunk.PageNumber,
IndexOnPage = chunk.IndexOnPage,
Content = chunk.Content,
Embedding = embedding.Vector.ToArray()
Embedding = new SqlVector<float>(embedding.Vector)
};
dbContext.DocumentChunks.Add(documentChunk);
@@ -149,9 +150,10 @@ public partial class VectorSearchService(IServiceProvider serviceProvider, Appli
// Perform Vector Search on SQL Database.
var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken);
var embeddingVector = new SqlVector<float>(questionEmbedding);
var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray()))
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, embeddingVector))
.Take(appSettings.MaxRelevantChunks)
.ToListAsync(cancellationToken);
@@ -10,7 +10,6 @@
<ItemGroup>
<PackageReference Include="Blazor.Bootstrap" Version="3.4.0" />
<PackageReference Include="DocumentFormat.OpenXml" Version="3.3.0" />
<PackageReference Include="EFCore.SqlServer.VectorSearch" Version="9.0.0" />
<PackageReference Include="EntityFrameworkCore.Exceptions.SqlServer" Version="8.1.3" />
<PackageReference Include="FluentValidation.DependencyInjectionExtensions" Version="12.0.0" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="10.0.0-rc.1.25451.107" />