From b6a09d092694d7e770174c3fa8d3c7797c0db1cf Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Mon, 2 Sep 2024 10:42:30 +0200 Subject: [PATCH] Data access optimizations --- Scripts.sql | 13 +++------ .../DataAccessLayer/ApplicationDbContext.cs | 13 ++++++--- SqlDatabaseVectorSearch/Program.cs | 2 ++ .../Services/VectorSearchService.cs | 27 +++++-------------- 4 files changed, 23 insertions(+), 32 deletions(-) diff --git a/Scripts.sql b/Scripts.sql index 8765446..bb7843f 100644 --- a/Scripts.sql +++ b/Scripts.sql @@ -1,7 +1,7 @@ CREATE TABLE [dbo].[DocumentChunks]( [Id] [uniqueidentifier] NOT NULL, [DocumentId] [uniqueidentifier] NOT NULL, - [Index] INT NOT NULL, + [Index] [int] NOT NULL, [Content] [nvarchar](max) NOT NULL, [Embedding] [varbinary](8000) NOT NULL, CONSTRAINT [PK_DocumentChunks] PRIMARY KEY CLUSTERED @@ -17,15 +17,10 @@ CREATE TABLE [dbo].[Documents]( CONSTRAINT [PK_Documents] PRIMARY KEY CLUSTERED ( [Id] ASC -)) -GO - -ALTER TABLE [dbo].[DocumentChunks] ADD CONSTRAINT [DF_DocumentChunks_Id] DEFAULT (newid()) FOR [Id] -GO - -ALTER TABLE [dbo].[Documents] ADD CONSTRAINT [DF_Documents_Id] DEFAULT (newid()) FOR [Id] +)) GO ALTER TABLE [dbo].[DocumentChunks] WITH CHECK ADD CONSTRAINT [FK_DocumentChunks_Documents] FOREIGN KEY([DocumentId]) REFERENCES [dbo].[Documents] ([Id]) -GO +ON DELETE CASCADE +GO \ No newline at end of file diff --git a/SqlDatabaseVectorSearch/DataAccessLayer/ApplicationDbContext.cs b/SqlDatabaseVectorSearch/DataAccessLayer/ApplicationDbContext.cs index ec12f24..f4d88ed 100644 --- a/SqlDatabaseVectorSearch/DataAccessLayer/ApplicationDbContext.cs +++ b/SqlDatabaseVectorSearch/DataAccessLayer/ApplicationDbContext.cs @@ -15,13 +15,17 @@ public class ApplicationDbContext(DbContextOptions options base.OnConfiguring(optionsBuilder); optionsBuilder.UseExceptionProcessor(); + //optionsBuilder.EnableSensitiveDataLogging(); } protected override void OnModelCreating(ModelBuilder modelBuilder) { modelBuilder.Entity(entity => { - entity.Property(e => e.Id).HasDefaultValueSql("(newid())"); + entity.ToTable("Documents"); + entity.HasKey(e => e.Id); + + entity.Property(e => e.Id).ValueGeneratedOnAdd(); entity.Property(e => e.Name) .IsRequired() .HasMaxLength(255); @@ -29,7 +33,10 @@ public class ApplicationDbContext(DbContextOptions options modelBuilder.Entity(entity => { - entity.Property(e => e.Id).HasDefaultValueSql("(newid())"); + entity.ToTable("DocumentChunks"); + entity.HasKey(e => e.Id); + + entity.Property(e => e.Id).ValueGeneratedOnAdd(); entity.Property(e => e.Content).IsRequired(); entity.Property(e => e.Embedding) .IsRequired() @@ -38,7 +45,7 @@ public class ApplicationDbContext(DbContextOptions options entity.HasOne(d => d.Document).WithMany(p => p.Chunks) .HasForeignKey(d => d.DocumentId) - .OnDelete(DeleteBehavior.NoAction) + .OnDelete(DeleteBehavior.Cascade) .HasConstraintName("FK_DocumentChunks_Documents"); }); } diff --git a/SqlDatabaseVectorSearch/Program.cs b/SqlDatabaseVectorSearch/Program.cs index 202d018..3e7b5fd 100644 --- a/SqlDatabaseVectorSearch/Program.cs +++ b/SqlDatabaseVectorSearch/Program.cs @@ -17,6 +17,8 @@ builder.Configuration.AddJsonFile("appsettings.local.json", optional: true, relo var aiSettings = builder.Configuration.GetSection("AzureOpenAI")!; var appSettings = builder.Services.ConfigureAndGet(builder.Configuration, nameof(AppSettings))!; +builder.Services.AddSingleton(TimeProvider.System); + builder.Services.AddSqlServer(builder.Configuration.GetConnectionString("SqlConnection"), options => { options.UseVectorSearch(); diff --git a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs index 8a90832..77a99a5 100644 --- a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs +++ b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs @@ -12,7 +12,7 @@ using Entities = SqlDatabaseVectorSearch.DataAccessLayer.Entities; namespace SqlDatabaseVectorSearch.Services; -public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingGenerationService textEmbeddingGenerationService, ChatService chatService, IOptions appSettingsOptions) +public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingGenerationService textEmbeddingGenerationService, ChatService chatService, TimeProvider timeProvider, IOptions appSettingsOptions) { private readonly AppSettings appSettings = appSettingsOptions.Value; @@ -26,16 +26,10 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG if (documentId.HasValue) { // If the user is importing a document that already exists, delete the previous one. - await dbContext.DocumentChunks.Where(c => c.DocumentId == documentId).ExecuteDeleteAsync(); - await dbContext.Documents.Where(d => d.Id == documentId).ExecuteDeleteAsync(); - } - else - { - // Create a new document. - documentId = Guid.NewGuid(); + await DeleteDocumentAsync(documentId.Value); } - var document = new Entities.Document { Id = documentId.Value, Name = name, CreationDate = DateTimeOffset.UtcNow }; + var document = new Entities.Document { Id = documentId.GetValueOrDefault(), Name = name, CreationDate = timeProvider.GetUtcNow() }; dbContext.Documents.Add(document); // Split the content into chunks and generate the embeddings for each one. @@ -45,14 +39,14 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG var index = 0; foreach (var (paragraph, embedding) in paragraphs.Zip(embeddings, (p, e) => (p, e.ToArray()))) { - var documentChunk = new Entities.DocumentChunk { DocumentId = documentId.Value, Index = index++, Content = paragraph, Embedding = embedding }; + var documentChunk = new Entities.DocumentChunk { Document = document, Index = index++, Content = paragraph, Embedding = embedding }; dbContext.DocumentChunks.Add(documentChunk); } await dbContext.SaveChangesAsync(); await dbContext.Database.CommitTransactionAsync(); - return documentId.Value; + return document.Id; } public async Task> GetDocumentsAsync() @@ -82,15 +76,8 @@ public class VectorSearchService(ApplicationDbContext dbContext, ITextEmbeddingG return documentChunk; } - public async Task DeleteDocumentAsync(Guid documentId) - { - await dbContext.Database.BeginTransactionAsync(); - - await dbContext.DocumentChunks.Where(c => c.DocumentId == documentId).ExecuteDeleteAsync(); - await dbContext.Documents.Where(d => d.Id == documentId).ExecuteDeleteAsync(); - - await dbContext.Database.CommitTransactionAsync(); - } + public Task DeleteDocumentAsync(Guid documentId) + => dbContext.Documents.Where(d => d.Id == documentId).ExecuteDeleteAsync(); public async Task AskQuestionAsync(Question question, bool reformulate = true) {