Add citations feature and update streaming responses

- Updated README.md to include a new **Citations** feature, detailing how users can access source information.
- Modified JSON response examples to include a `citations` field and updated token usage details.
- Enhanced streaming response section to clarify the end of the stream includes citations.
- Adjusted `VectorSearchService.cs` to return `StreamState.End` and improved citation handling in streaming.
- Updated `appsettings.json` with new model IDs for Azure OpenAI configuration.
This commit is contained in:
Marco Minerva
2025-06-24 12:16:48 +02:00
parent c6ad2ca3ea
commit 30fba5cfe0
3 changed files with 157 additions and 49 deletions
+134 -31
View File
@@ -27,26 +27,47 @@ This repository contains a Blazor Web App as well as a Minimal API that allows t
- **Conversation History with Question Reformulation**: This feature allows users to view the history of their conversations, including the ability to reformulate questions for better clarity and understanding. This ensures that users can track their interactions and refine their queries as needed.
- **Information about Token Usage**: Users can access detailed information about token usage, which helps in understanding the consumption of tokens during interactions. This feature provides transparency and helps users manage their token usage effectively.
- **Response streaming**: This feature enables real-time streaming of responses, allowing users to receive information as it is being processed. This ensures a seamless and efficient flow of information, enhancing the overall user experience.
- **Citations**: The application provides citations for the sources used to justify each answer. This allows users to verify the information and understand the origin of the content provided by the system.
### Example of JSON response
```json
{
"originalQuestion": "why is mars called the red planet?",
"reformulatedQuestion": "Why is Mars referred to as the Red Planet?",
"answer": "Mars is referred to as the Red Planet due to its characteristic reddish color, which is caused by the abundance of iron oxide (rust) on its surface. This distinctive coloration has also been a significant factor in the cultural and mythological associations of Mars across different civilizations.",
"streamState": null,
"reformulatedQuestion": "Why is the planet Mars called the red planet?",
"answer": "Mars is called the Red Planet because its surface has an orange-red color due to being covered in iron(III) oxide dust, also known as rust. This iron oxide gives Mars its distinctive reddish appearance when observed from Earth and is the origin of its well-known nickname",
"streamState": "End",
"tokenUsage": {
"reformulation": {
"promptTokens": 107,
"completionTokens": 10,
"totalTokens": 117
"promptTokens": 812,
"completionTokens": 11,
"totalTokens": 823
},
"embeddingTokenCount": 10,
"question": {
"promptTokens": 9142,
"completionTokens": 53,
"totalTokens": 9195
"promptTokens": 31708,
"completionTokens": 227,
"totalTokens": 31935
}
},
"citations": [
{
"documentId": "b1870ad7-4685-42a3-576a-08ddb01159d5",
"chunkId": "749aba1e-0db5-4033-cfa6-08ddb0115da3",
"fileName": "Mars.pdf",
"quote": "surface of Mars is orange-red because it is covered in iron(III) oxide",
"pageNumber": 1,
"indexOnPage": 0
},
{
"documentId": "b1870ad7-4685-42a3-576a-08ddb01159d5",
"chunkId": "215e7197-513f-4fbe-cfa8-08ddb0115da3",
"fileName": "Mars.pdf",
"quote": "Martian surface is caused by ferric oxide, or rust",
"pageNumber": 3,
"indexOnPage": 0
}
]
}
```
@@ -58,69 +79,133 @@ When using the `/api/ask-streaming` endpoint, answers will be streamed as happen
[
{
"originalQuestion": "why is mars called the red planet?",
"reformulatedQuestion": "Why is Mars referred to as the Red Planet?",
"reformulatedQuestion": "Why is the planet Mars known as the red planet?",
"answer": null,
"streamState": "Start",
"tokenUsage": {
"reformulation": {
"promptTokens": 107,
"completionTokens": 10,
"totalTokens": 117
"promptTokens": 541,
"completionTokens": 12,
"totalTokens": 553
},
"embeddingTokenCount": 10,
"embeddingTokenCount": 11,
"question": null
}
},
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": "Mars",
"streamState": "Append",
"tokenUsage": null
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " is",
"streamState": "Append",
"tokenUsage": null
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " called",
"answer": " known",
"streamState": "Append",
"tokenUsage": null
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " as",
"streamState": "Append",
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " the",
"streamState": "Append",
"tokenUsage": null
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " Red",
"answer": " red",
"streamState": "Append",
"tokenUsage": null
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " Planet",
"answer": " planet",
"streamState": "Append",
"tokenUsage": null
"tokenUsage": null,
"citations": null
},
//...
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": ".",
"answer": " because",
"streamState": "Append",
"tokenUsage": null
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " its",
"streamState": "Append",
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " surface",
"streamState": "Append",
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " is",
"streamState": "Append",
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " covered",
"streamState": "Append",
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " in",
"streamState": "Append",
"tokenUsage": null,
"citations": null
},
{
"originalQuestion": null,
"reformulatedQuestion": null,
"answer": " iron",
"streamState": "Append",
"tokenUsage": null,
"citations": null
},
/// ...
{
"originalQuestion": null,
"reformulatedQuestion": null,
@@ -130,11 +215,29 @@ When using the `/api/ask-streaming` endpoint, answers will be streamed as happen
"reformulation": null,
"embeddingTokenCount": null,
"question": {
"promptTokens": 8986,
"completionTokens": 31,
"totalTokens": 9017
"promptTokens": 30949,
"completionTokens": 221,
"totalTokens": 31170
}
},
"citations": [
{
"documentId": "b1870ad7-4685-42a3-576a-08ddb01159d5",
"chunkId": "749aba1e-0db5-4033-cfa6-08ddb0115da3",
"fileName": "Mars.pdf",
"quote": "surface of Mars is orange-red",
"pageNumber": 1,
"indexOnPage": 0
},
{
"documentId": "b1870ad7-4685-42a3-576a-08ddb01159d5",
"chunkId": "215e7197-513f-4fbe-cfa8-08ddb0115da3",
"fileName": "Mars.pdf",
"quote": "red-orange appearance of the Martian surface is caused by ferric oxide, or rust",
"pageNumber": 3,
"indexOnPage": 0
}
]
}
]
```
@@ -147,7 +250,7 @@ When using the `/api/ask-streaming` endpoint, answers will be streamed as happen
- each one contains a token
- The *streamState* property is set to `Append`
- *origianlQuestion*, *reformulatedQuestion* and *tokenUsage* are always `null`
- The stream ends when an element with *streamState* equals to `End` is received. This element contains token usage information for the question and the whole answer.
- The stream ends when an element with *streamState* equals to `End` is received. This element contains token usage information for the question and the whole answer and the list of citations.
> [!NOTE]
> If you prefer to use straight SQL, check out the [sql branch](https://github.com/marcominerva/SqlDatabaseVectorSearch/tree/sql).
@@ -91,7 +91,7 @@ public partial class VectorSearchService(IServiceProvider serviceProvider, Appli
// Extract citations from the answer
var (answer, citations) = ExtractCitations(fullAnswer);
return new(question.Text, reformulatedQuestion.Text!, answer, null, new(reformulatedQuestion.TokenUsage, embeddingTokenCount, tokenUsage), citations);
return new(question.Text, reformulatedQuestion.Text!, answer, StreamState.End, new(reformulatedQuestion.TokenUsage, embeddingTokenCount, tokenUsage), citations);
}
public async IAsyncEnumerable<Response> AskStreamingAsync(Question question, bool reformulate = true, [EnumeratorCancellation] CancellationToken cancellationToken = default)
@@ -106,28 +106,33 @@ public partial class VectorSearchService(IServiceProvider serviceProvider, Appli
TokenUsageResponse? tokenUsageResponse = null;
var fullAnswer = new StringBuilder();
var areCitationsStarted = false;
var citationsStarted = false;
// Return each token as a partial response.
// Returns each token as a partial response.
await foreach (var (token, tokenUsage) in answerStream)
{
if (token is not null) // token can be null when the stream ends.
{
fullAnswer.Append(token);
if (token?.Contains('【') == true)
if (token.Contains('【'))
{
// Citations are started when we encounter a token containing a 【 character.
// Citations start when we encounter a token containing a 【 character.
// We need to track it because we don't want to return the citations in the actual response.
areCitationsStarted = true;
citationsStarted = true;
}
if (!areCitationsStarted)
if (!citationsStarted)
{
yield return new(token, StreamState.Append);
}
// Token usage is expected in the last message.
}
else
{
// Token usage is expected in the last message, when token is null.
tokenUsageResponse ??= tokenUsage is not null ? new(tokenUsage) : null;
}
}
// Extract citations at the end of streaming.
var (_, citations) = ExtractCitations(fullAnswer.ToString());
+1 -1
View File
@@ -6,7 +6,7 @@
"ChatCompletion": {
"Endpoint": "",
"Deployment": "",
"ModelId": "", // o1, gpt-4o, gpt-4o-mini, gpt-4, gpt-3.5
"ModelId": "", // gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, gpt-4o, gpt-4o-mini, gpt-4, gpt-3.5
"ApiKey": ""
},
"Embedding": {