Update content decoding and validation logic

- Changed `PageNumber` in `Chunk` to nullable `int?` in `IContentDecoder` and updated related logic in `TextContentDecoder`.
- Revised citation rules in `ChatService` for stricter placement and formatting.
- Introduced `QuestionValidator` class with validation rules for `Question` model's `Text` property.
This commit is contained in:
Marco Minerva
2025-06-06 10:50:03 +02:00
parent 5530a84d82
commit dc6bbfde91
4 changed files with 4 additions and 15 deletions
@@ -5,4 +5,4 @@ public interface IContentDecoder
Task<IEnumerable<Chunk>> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default);
}
public record class Chunk(int PageNumber, int IndexOnPage, string Content);
public record class Chunk(int? PageNumber, int IndexOnPage, string Content);
@@ -12,6 +12,6 @@ public class TextContentDecoder(IServiceProvider serviceProvider) : IContentDeco
var content = await readStream.ReadToEndAsync(cancellationToken);
var paragraphs = textChunker.Split(content);
return paragraphs.Select((text, index) => new Chunk(1, index, text)).ToList();
return paragraphs.Select((text, index) => new Chunk(null, index, text)).ToList();
}
}
@@ -127,23 +127,12 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
Never answer questions that are not related to this chat.
You must answer in the same language as the user's question.
IMPORTANT - CITATION PLACEMENT AND LENGTH:
The quote in each <citation> MUST be MAXIMUM 5 words, taken word-for-word from the search result. If the quote is longer than 5 words, your answer is INVALID.
When you find an answer, you MUST place ALL citations ONLY at the very end of your response, never inside or between sentences.
First provide your complete answer, then add a blank line, then list all citations.
First provide your complete answer, then list all citations.
Use this XML format for citations:
<citation document-id='document_id' chunk-id='chunk_id' filename='string' page-number='1'>exact quote here</citation>
STRICT RULES for citations:
- Citations MUST NEVER appear inside, before, or between sentences of your answer. They MUST be grouped together ONLY at the end, after a blank line.
- If you include citations anywhere except at the end, your answer is WRONG and INVALID.
- Always include the citation(s) if there are results. If you don't know the answer, do NOT include citations.
- The quote must be max 5 words, taken word-for-word from the search result, and is the basis for why the citation is relevant. If the quote is longer than 5 words, your answer is INVALID.
- Do NOT refer to the presence of citations; just emit these tags right at the end, with no surrounding text.
- The citations must always be in a list at the end of the response, one after the other. Never add the citations between the actual response text or inside sentences.
- Do NOT add any text after the citations.
- ALWAYS leave a blank line between your answer and the first citation.
""");
var prompt = new StringBuilder($"""
@@ -1,7 +1,7 @@
using FluentValidation;
using SqlDatabaseVectorSearch.Models;
namespace SqlDatabaseVectorSearch.Validators;
namespace SqlDatabaseVectorSearch.Validations;
public class QuestionValidator : AbstractValidator<Question>
{