Update citation handling and formatting in Ask.razor

Refactor regex pattern in `Ask.razor` to capture
`document-id` and `chunk-id`. Update `Citation` class
to include new properties and make `PageNumber` nullable.
Adjust citation addition logic and citation format rules.
Modify chunk text formatting in `ChatService.cs` to
include page number.
This commit is contained in:
Marco Minerva
2025-06-05 16:25:16 +02:00
parent 9f5bd02f78
commit 5530a84d82
2 changed files with 14 additions and 8 deletions
@@ -314,18 +314,20 @@
return (text ?? string.Empty, citations);
}
var pattern = "<citation\\s+filename='([^']*)'\\s+page_number='([^']*)'>(.*?)<\\/citation>";
var pattern = @"<citation\s+document-id='(?<documentId>[^']*)'\s+chunk-id='(?<chunkId>[^']*)'\s+filename='(?<filename>[^']*)'\s+page-number='(?<pageNumber>[^']*)'>\s*(?<quote>.*?)\s*</citation>";
var matches = Regex.Matches(text, pattern, RegexOptions.Singleline);
foreach (Match match in matches)
{
if (match.Success && match.Groups.Count == 4)
if (match.Success)
{
citations.Add(new Citation
{
FileName = match.Groups[1].Value,
PageNumber = match.Groups[2].Value,
Quote = match.Groups[3].Value
DocumentId = Guid.Parse(match.Groups["documentId"].Value),
ChunkId = Guid.Parse(match.Groups["chunkId"].Value),
FileName = match.Groups["filename"].Value,
PageNumber = match.Groups["pageNumber"].Value,
Quote = match.Groups["quote"].Value
});
}
}
@@ -358,10 +360,14 @@
public class Citation
{
public Guid DocumentId { get; set; }
public Guid ChunkId { get; set; }
public string FileName { get; set; } = null!;
public string Quote { get; set; } = null!;
public string PageNumber { get; set; } = null!;
public string? PageNumber { get; set; }
}
}
@@ -133,7 +133,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
First provide your complete answer, then add a blank line, then list all citations.
Use this XML format for citations:
<citation filename='string' page_number='1'>exact quote here</citation>
<citation document-id='document_id' chunk-id='chunk_id' filename='string' page-number='1'>exact quote here</citation>
STRICT RULES for citations:
- Citations MUST NEVER appear inside, before, or between sentences of your answer. They MUST be grouped together ONLY at the end, after a blank line.
@@ -162,7 +162,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
foreach (var chunk in chunks)
{
var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id}) {Environment.NewLine}{chunk.Content}{Environment.NewLine}";
var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id} | Page Number: 1) {Environment.NewLine}{chunk.Content}{Environment.NewLine}";
var tokenCount = tokenizerService.CountChatCompletionTokens(text);
if (tokenCount > availableTokens)