-
@citation.FileName @if (!string.IsNullOrEmpty(citation.PageNumber))
+
@citation.FileName @if (citation.PageNumber.GetValueOrDefault() > 0)
{
pag. @citation.PageNumber
}
@@ -314,7 +314,7 @@
return (text ?? string.Empty, citations);
}
- var pattern = @"
[^']*)'\s+chunk-id='(?[^']*)'\s+filename='(?[^']*)'\s+page-number='(?[^']*)'>\s*(?.*?)\s*
";
+ var pattern = @"
[^']*)'\s+chunk-id='(?[^']*)'\s+filename='(?[^']*)'\s+page-number='(?[^']*)'\s+index-on-page='(?[^']*)'>\s*(?.*?)\s*
";
var matches = Regex.Matches(text, pattern, RegexOptions.Singleline);
foreach (Match match in matches)
@@ -326,7 +326,8 @@
DocumentId = Guid.Parse(match.Groups["documentId"].Value),
ChunkId = Guid.Parse(match.Groups["chunkId"].Value),
FileName = match.Groups["filename"].Value,
- PageNumber = match.Groups["pageNumber"].Value,
+ PageNumber = int.TryParse(match.Groups["pageNumber"].Value, out var pageNumber) && pageNumber > 0 ? pageNumber : null,
+ IndexOnPage = int.TryParse(match.Groups["indexOnPage"].Value, out var indexOnPage) ? indexOnPage : 0,
Quote = match.Groups["quote"].Value
});
}
@@ -368,6 +369,8 @@
public string Quote { get; set; } = null!;
- public string? PageNumber { get; set; }
+ public int? PageNumber { get; set; }
+
+ public int IndexOnPage { get; set; }
}
}
\ No newline at end of file
diff --git a/SqlDatabaseVectorSearch/Data/Entities/DocumentChunk.cs b/SqlDatabaseVectorSearch/Data/Entities/DocumentChunk.cs
index 580de9e..bc67963 100644
--- a/SqlDatabaseVectorSearch/Data/Entities/DocumentChunk.cs
+++ b/SqlDatabaseVectorSearch/Data/Entities/DocumentChunk.cs
@@ -8,6 +8,10 @@ public class DocumentChunk
public int Index { get; set; }
+ public int? PageNumber { get; set; }
+
+ public int IndexOnPage { get; set; }
+
public required string Content { get; set; }
public required float[] Embedding { get; set; }
diff --git a/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.Designer.cs b/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.Designer.cs
index 5789fb8..c071cf1 100644
--- a/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.Designer.cs
+++ b/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.Designer.cs
@@ -9,10 +9,10 @@ using SqlDatabaseVectorSearch.Data;
#nullable disable
-namespace SqlDatabaseVectorSearch.Data.Migrations
+namespace SqlDatabaseVectorSearch.Migrations
{
[DbContext(typeof(ApplicationDbContext))]
- [Migration("20250224102351_Initial")]
+ [Migration("20250606091336_Initial")]
partial class Initial
{
///
@@ -20,12 +20,12 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
{
#pragma warning disable 612, 618
modelBuilder
- .HasAnnotation("ProductVersion", "9.0.2")
+ .HasAnnotation("ProductVersion", "9.0.5")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
- modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", b =>
+ modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.Document", b =>
{
b.Property
("Id")
.ValueGeneratedOnAdd()
@@ -44,7 +44,7 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
b.ToTable("Documents", (string)null);
});
- modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.DocumentChunk", b =>
+ modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.DocumentChunk", b =>
{
b.Property("Id")
.ValueGeneratedOnAdd()
@@ -64,6 +64,12 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
b.Property("Index")
.HasColumnType("int");
+ b.Property("IndexOnPage")
+ .HasColumnType("int");
+
+ b.Property("PageNumber")
+ .HasColumnType("int");
+
b.HasKey("Id");
b.HasIndex("DocumentId");
@@ -71,9 +77,9 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
b.ToTable("DocumentChunks", (string)null);
});
- modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.DocumentChunk", b =>
+ modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.DocumentChunk", b =>
{
- b.HasOne("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", "Document")
+ b.HasOne("SqlDatabaseVectorSearch.Data.Entities.Document", "Document")
.WithMany("Chunks")
.HasForeignKey("DocumentId")
.OnDelete(DeleteBehavior.Cascade)
@@ -83,7 +89,7 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
b.Navigation("Document");
});
- modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", b =>
+ modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.Document", b =>
{
b.Navigation("Chunks");
});
diff --git a/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.cs b/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.cs
index 2a530b0..590cad4 100644
--- a/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.cs
+++ b/SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.cs
@@ -3,7 +3,7 @@ using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
-namespace SqlDatabaseVectorSearch.Data.Migrations
+namespace SqlDatabaseVectorSearch.Migrations
{
///
public partial class Initial : Migration
@@ -31,6 +31,8 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
Id = table.Column(type: "uniqueidentifier", nullable: false),
DocumentId = table.Column(type: "uniqueidentifier", nullable: false),
Index = table.Column(type: "int", nullable: false),
+ PageNumber = table.Column(type: "int", nullable: true),
+ IndexOnPage = table.Column(type: "int", nullable: false),
Content = table.Column(type: "nvarchar(max)", nullable: false),
Embedding = table.Column(type: "vector(1536)", nullable: false)
},
diff --git a/SqlDatabaseVectorSearch/Data/Migrations/ApplicationDbContextModelSnapshot.cs b/SqlDatabaseVectorSearch/Data/Migrations/ApplicationDbContextModelSnapshot.cs
index 6bb2ad9..aeb0666 100644
--- a/SqlDatabaseVectorSearch/Data/Migrations/ApplicationDbContextModelSnapshot.cs
+++ b/SqlDatabaseVectorSearch/Data/Migrations/ApplicationDbContextModelSnapshot.cs
@@ -8,7 +8,7 @@ using SqlDatabaseVectorSearch.Data;
#nullable disable
-namespace SqlDatabaseVectorSearch.Data.Migrations
+namespace SqlDatabaseVectorSearch.Migrations
{
[DbContext(typeof(ApplicationDbContext))]
partial class ApplicationDbContextModelSnapshot : ModelSnapshot
@@ -17,12 +17,12 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
{
#pragma warning disable 612, 618
modelBuilder
- .HasAnnotation("ProductVersion", "9.0.2")
+ .HasAnnotation("ProductVersion", "9.0.5")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
- modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", b =>
+ modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.Document", b =>
{
b.Property("Id")
.ValueGeneratedOnAdd()
@@ -41,7 +41,7 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
b.ToTable("Documents", (string)null);
});
- modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.DocumentChunk", b =>
+ modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.DocumentChunk", b =>
{
b.Property("Id")
.ValueGeneratedOnAdd()
@@ -61,6 +61,12 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
b.Property("Index")
.HasColumnType("int");
+ b.Property("IndexOnPage")
+ .HasColumnType("int");
+
+ b.Property("PageNumber")
+ .HasColumnType("int");
+
b.HasKey("Id");
b.HasIndex("DocumentId");
@@ -68,9 +74,9 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
b.ToTable("DocumentChunks", (string)null);
});
- modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.DocumentChunk", b =>
+ modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.DocumentChunk", b =>
{
- b.HasOne("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", "Document")
+ b.HasOne("SqlDatabaseVectorSearch.Data.Entities.Document", "Document")
.WithMany("Chunks")
.HasForeignKey("DocumentId")
.OnDelete(DeleteBehavior.Cascade)
@@ -80,7 +86,7 @@ namespace SqlDatabaseVectorSearch.Data.Migrations
b.Navigation("Document");
});
- modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", b =>
+ modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.Document", b =>
{
b.Navigation("Chunks");
});
diff --git a/SqlDatabaseVectorSearch/Services/ChatService.cs b/SqlDatabaseVectorSearch/Services/ChatService.cs
index 4269efa..2bc3e51 100644
--- a/SqlDatabaseVectorSearch/Services/ChatService.cs
+++ b/SqlDatabaseVectorSearch/Services/ChatService.cs
@@ -132,7 +132,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
First provide your complete answer, then list all citations.
Use this XML format for citations:
- exact quote here
+ exact quote here
""");
var prompt = new StringBuilder($"""
@@ -151,7 +151,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
foreach (var chunk in chunks)
{
- var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id} | Page Number: 1) {Environment.NewLine}{chunk.Content}{Environment.NewLine}";
+ var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id} | Page Number: {chunk.PageNumber} | Index on Page: {chunk.IndexOnPage}) {Environment.NewLine}{chunk.Content}{Environment.NewLine}";
var tokenCount = tokenizerService.CountChatCompletionTokens(text);
if (tokenCount > availableTokens)
diff --git a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs
index beda3cb..2663c39 100644
--- a/SqlDatabaseVectorSearch/Services/VectorSearchService.cs
+++ b/SqlDatabaseVectorSearch/Services/VectorSearchService.cs
@@ -39,14 +39,24 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
var document = new Entities.Document { Id = documentId.GetValueOrDefault(), Name = name, CreationDate = timeProvider.GetUtcNow() };
dbContext.Documents.Add(document);
- var embeddings = await embeddingGenerator.GenerateAndZipAsync(paragraphs.Select(p => p.Content), cancellationToken: cancellationToken);
+ var embeddings = await embeddingGenerator.GenerateAsync(paragraphs.Select(p => p.Content), cancellationToken: cancellationToken);
// Save the document chunks and the corresponding embedding in the database.
foreach (var (index, embedding) in embeddings.Index())
{
- logger.LogDebug("Storing a paragraph of {TokenCount} tokens.", tokenizerService.CountChatCompletionTokens(embedding.Value));
+ var paragraph = paragraphs.ElementAt(index);
+ logger.LogDebug("Storing a paragraph of {TokenCount} tokens.", tokenizerService.CountChatCompletionTokens(paragraph.Content));
+
+ var documentChunk = new Entities.DocumentChunk
+ {
+ Document = document,
+ Index = index,
+ PageNumber = paragraph.PageNumber,
+ IndexOnPage = paragraph.IndexOnPage,
+ Content = paragraph.Content,
+ Embedding = embedding.Vector.ToArray()
+ };
- var documentChunk = new Entities.DocumentChunk { Document = document, Index = index, Content = embedding.Value, Embedding = embedding.Embedding.Vector.ToArray() };
dbContext.DocumentChunks.Add(documentChunk);
}