From 5530a84d82e96ef070fb8c21e4eda1e1f717cbc3 Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Thu, 5 Jun 2025 16:25:16 +0200 Subject: [PATCH] Update citation handling and formatting in Ask.razor Refactor regex pattern in `Ask.razor` to capture `document-id` and `chunk-id`. Update `Citation` class to include new properties and make `PageNumber` nullable. Adjust citation addition logic and citation format rules. Modify chunk text formatting in `ChatService.cs` to include page number. --- .../Components/Pages/Ask.razor | 18 ++++++++++++------ .../Services/ChatService.cs | 4 ++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/SqlDatabaseVectorSearch/Components/Pages/Ask.razor b/SqlDatabaseVectorSearch/Components/Pages/Ask.razor index dbc459e..297ed39 100644 --- a/SqlDatabaseVectorSearch/Components/Pages/Ask.razor +++ b/SqlDatabaseVectorSearch/Components/Pages/Ask.razor @@ -314,18 +314,20 @@ return (text ?? string.Empty, citations); } - var pattern = "(.*?)<\\/citation>"; + var pattern = @"[^']*)'\s+chunk-id='(?[^']*)'\s+filename='(?[^']*)'\s+page-number='(?[^']*)'>\s*(?.*?)\s*"; var matches = Regex.Matches(text, pattern, RegexOptions.Singleline); foreach (Match match in matches) { - if (match.Success && match.Groups.Count == 4) + if (match.Success) { citations.Add(new Citation { - FileName = match.Groups[1].Value, - PageNumber = match.Groups[2].Value, - Quote = match.Groups[3].Value + DocumentId = Guid.Parse(match.Groups["documentId"].Value), + ChunkId = Guid.Parse(match.Groups["chunkId"].Value), + FileName = match.Groups["filename"].Value, + PageNumber = match.Groups["pageNumber"].Value, + Quote = match.Groups["quote"].Value }); } } @@ -358,10 +360,14 @@ public class Citation { + public Guid DocumentId { get; set; } + + public Guid ChunkId { get; set; } + public string FileName { get; set; } = null!; public string Quote { get; set; } = null!; - public string PageNumber { get; set; } = null!; + public string? PageNumber { get; set; } } } \ No newline at end of file diff --git a/SqlDatabaseVectorSearch/Services/ChatService.cs b/SqlDatabaseVectorSearch/Services/ChatService.cs index 77686cb..cc19d8a 100644 --- a/SqlDatabaseVectorSearch/Services/ChatService.cs +++ b/SqlDatabaseVectorSearch/Services/ChatService.cs @@ -133,7 +133,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer First provide your complete answer, then add a blank line, then list all citations. Use this XML format for citations: - exact quote here + exact quote here STRICT RULES for citations: - Citations MUST NEVER appear inside, before, or between sentences of your answer. They MUST be grouped together ONLY at the end, after a blank line. @@ -162,7 +162,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer foreach (var chunk in chunks) { - var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id}) {Environment.NewLine}{chunk.Content}{Environment.NewLine}"; + var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id} | Page Number: 1) {Environment.NewLine}{chunk.Content}{Environment.NewLine}"; var tokenCount = tokenizerService.CountChatCompletionTokens(text); if (tokenCount > availableTokens)