mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Update content decoding and validation logic
- Changed `PageNumber` in `Chunk` to nullable `int?` in `IContentDecoder` and updated related logic in `TextContentDecoder`. - Revised citation rules in `ChatService` for stricter placement and formatting. - Introduced `QuestionValidator` class with validation rules for `Question` model's `Text` property.
This commit is contained in:
@@ -5,4 +5,4 @@ public interface IContentDecoder
|
|||||||
Task<IEnumerable<Chunk>> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default);
|
Task<IEnumerable<Chunk>> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default);
|
||||||
}
|
}
|
||||||
|
|
||||||
public record class Chunk(int PageNumber, int IndexOnPage, string Content);
|
public record class Chunk(int? PageNumber, int IndexOnPage, string Content);
|
||||||
@@ -12,6 +12,6 @@ public class TextContentDecoder(IServiceProvider serviceProvider) : IContentDeco
|
|||||||
var content = await readStream.ReadToEndAsync(cancellationToken);
|
var content = await readStream.ReadToEndAsync(cancellationToken);
|
||||||
|
|
||||||
var paragraphs = textChunker.Split(content);
|
var paragraphs = textChunker.Split(content);
|
||||||
return paragraphs.Select((text, index) => new Chunk(1, index, text)).ToList();
|
return paragraphs.Select((text, index) => new Chunk(null, index, text)).ToList();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -127,23 +127,12 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
|
|||||||
Never answer questions that are not related to this chat.
|
Never answer questions that are not related to this chat.
|
||||||
You must answer in the same language as the user's question.
|
You must answer in the same language as the user's question.
|
||||||
|
|
||||||
IMPORTANT - CITATION PLACEMENT AND LENGTH:
|
|
||||||
The quote in each <citation> MUST be MAXIMUM 5 words, taken word-for-word from the search result. If the quote is longer than 5 words, your answer is INVALID.
|
The quote in each <citation> MUST be MAXIMUM 5 words, taken word-for-word from the search result. If the quote is longer than 5 words, your answer is INVALID.
|
||||||
When you find an answer, you MUST place ALL citations ONLY at the very end of your response, never inside or between sentences.
|
When you find an answer, you MUST place ALL citations ONLY at the very end of your response, never inside or between sentences.
|
||||||
First provide your complete answer, then add a blank line, then list all citations.
|
First provide your complete answer, then list all citations.
|
||||||
|
|
||||||
Use this XML format for citations:
|
Use this XML format for citations:
|
||||||
<citation document-id='document_id' chunk-id='chunk_id' filename='string' page-number='1'>exact quote here</citation>
|
<citation document-id='document_id' chunk-id='chunk_id' filename='string' page-number='1'>exact quote here</citation>
|
||||||
|
|
||||||
STRICT RULES for citations:
|
|
||||||
- Citations MUST NEVER appear inside, before, or between sentences of your answer. They MUST be grouped together ONLY at the end, after a blank line.
|
|
||||||
- If you include citations anywhere except at the end, your answer is WRONG and INVALID.
|
|
||||||
- Always include the citation(s) if there are results. If you don't know the answer, do NOT include citations.
|
|
||||||
- The quote must be max 5 words, taken word-for-word from the search result, and is the basis for why the citation is relevant. If the quote is longer than 5 words, your answer is INVALID.
|
|
||||||
- Do NOT refer to the presence of citations; just emit these tags right at the end, with no surrounding text.
|
|
||||||
- The citations must always be in a list at the end of the response, one after the other. Never add the citations between the actual response text or inside sentences.
|
|
||||||
- Do NOT add any text after the citations.
|
|
||||||
- ALWAYS leave a blank line between your answer and the first citation.
|
|
||||||
""");
|
""");
|
||||||
|
|
||||||
var prompt = new StringBuilder($"""
|
var prompt = new StringBuilder($"""
|
||||||
|
|||||||
+1
-1
@@ -1,7 +1,7 @@
|
|||||||
using FluentValidation;
|
using FluentValidation;
|
||||||
using SqlDatabaseVectorSearch.Models;
|
using SqlDatabaseVectorSearch.Models;
|
||||||
|
|
||||||
namespace SqlDatabaseVectorSearch.Validators;
|
namespace SqlDatabaseVectorSearch.Validations;
|
||||||
|
|
||||||
public class QuestionValidator : AbstractValidator<Question>
|
public class QuestionValidator : AbstractValidator<Question>
|
||||||
{
|
{
|
||||||
Reference in New Issue
Block a user