mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Update citation handling and formatting in Ask.razor
Refactor regex pattern in `Ask.razor` to capture `document-id` and `chunk-id`. Update `Citation` class to include new properties and make `PageNumber` nullable. Adjust citation addition logic and citation format rules. Modify chunk text formatting in `ChatService.cs` to include page number.
This commit is contained in:
@@ -314,18 +314,20 @@
|
|||||||
return (text ?? string.Empty, citations);
|
return (text ?? string.Empty, citations);
|
||||||
}
|
}
|
||||||
|
|
||||||
var pattern = "<citation\\s+filename='([^']*)'\\s+page_number='([^']*)'>(.*?)<\\/citation>";
|
var pattern = @"<citation\s+document-id='(?<documentId>[^']*)'\s+chunk-id='(?<chunkId>[^']*)'\s+filename='(?<filename>[^']*)'\s+page-number='(?<pageNumber>[^']*)'>\s*(?<quote>.*?)\s*</citation>";
|
||||||
|
|
||||||
var matches = Regex.Matches(text, pattern, RegexOptions.Singleline);
|
var matches = Regex.Matches(text, pattern, RegexOptions.Singleline);
|
||||||
foreach (Match match in matches)
|
foreach (Match match in matches)
|
||||||
{
|
{
|
||||||
if (match.Success && match.Groups.Count == 4)
|
if (match.Success)
|
||||||
{
|
{
|
||||||
citations.Add(new Citation
|
citations.Add(new Citation
|
||||||
{
|
{
|
||||||
FileName = match.Groups[1].Value,
|
DocumentId = Guid.Parse(match.Groups["documentId"].Value),
|
||||||
PageNumber = match.Groups[2].Value,
|
ChunkId = Guid.Parse(match.Groups["chunkId"].Value),
|
||||||
Quote = match.Groups[3].Value
|
FileName = match.Groups["filename"].Value,
|
||||||
|
PageNumber = match.Groups["pageNumber"].Value,
|
||||||
|
Quote = match.Groups["quote"].Value
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -358,10 +360,14 @@
|
|||||||
|
|
||||||
public class Citation
|
public class Citation
|
||||||
{
|
{
|
||||||
|
public Guid DocumentId { get; set; }
|
||||||
|
|
||||||
|
public Guid ChunkId { get; set; }
|
||||||
|
|
||||||
public string FileName { get; set; } = null!;
|
public string FileName { get; set; } = null!;
|
||||||
|
|
||||||
public string Quote { get; set; } = null!;
|
public string Quote { get; set; } = null!;
|
||||||
|
|
||||||
public string PageNumber { get; set; } = null!;
|
public string? PageNumber { get; set; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -133,7 +133,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
|
|||||||
First provide your complete answer, then add a blank line, then list all citations.
|
First provide your complete answer, then add a blank line, then list all citations.
|
||||||
|
|
||||||
Use this XML format for citations:
|
Use this XML format for citations:
|
||||||
<citation filename='string' page_number='1'>exact quote here</citation>
|
<citation document-id='document_id' chunk-id='chunk_id' filename='string' page-number='1'>exact quote here</citation>
|
||||||
|
|
||||||
STRICT RULES for citations:
|
STRICT RULES for citations:
|
||||||
- Citations MUST NEVER appear inside, before, or between sentences of your answer. They MUST be grouped together ONLY at the end, after a blank line.
|
- Citations MUST NEVER appear inside, before, or between sentences of your answer. They MUST be grouped together ONLY at the end, after a blank line.
|
||||||
@@ -162,7 +162,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
|
|||||||
|
|
||||||
foreach (var chunk in chunks)
|
foreach (var chunk in chunks)
|
||||||
{
|
{
|
||||||
var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id}) {Environment.NewLine}{chunk.Content}{Environment.NewLine}";
|
var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id} | Page Number: 1) {Environment.NewLine}{chunk.Content}{Environment.NewLine}";
|
||||||
|
|
||||||
var tokenCount = tokenizerService.CountChatCompletionTokens(text);
|
var tokenCount = tokenizerService.CountChatCompletionTokens(text);
|
||||||
if (tokenCount > availableTokens)
|
if (tokenCount > availableTokens)
|
||||||
|
|||||||
Reference in New Issue
Block a user