Merge pull request #12 from marcominerva/decoder_updates

Add citation supports
2026-06-20 12:23:10 +00:00 · 2025-06-24 14:54:53 +02:00
parent 12e8a042db 06c1741f14
commit 13c0beeee6
28 changed files with 649 additions and 204 deletions
@@ -22,6 +22,7 @@ dotnet_style_operator_placement_when_wrapping = beginning_of_line
 dotnet_style_object_initializer = true:suggestion
 dotnet_style_coalesce_expression = true:suggestion
 dotnet_style_collection_initializer = true:suggestion
+dotnet_style_prefer_collection_expression = when_types_loosely_match:suggestion
 dotnet_style_prefer_simplified_boolean_expressions = true:suggestion
 dotnet_style_prefer_conditional_expression_over_assignment = false:silent
 dotnet_style_prefer_conditional_expression_over_return = false:silent
@@ -244,23 +245,23 @@ dotnet_naming_rule.async_method_should_be_ends_with_async.style = ends_with_asyn

 dotnet_naming_symbols.interface.applicable_kinds = interface
 dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
-dotnet_naming_symbols.interface.required_modifiers = 
+dotnet_naming_symbols.interface.required_modifiers =

 dotnet_naming_symbols.method.applicable_kinds = method
 dotnet_naming_symbols.method.applicable_accessibilities = public
-dotnet_naming_symbols.method.required_modifiers = 
+dotnet_naming_symbols.method.required_modifiers =

 dotnet_naming_symbols.private_or_internal_field.applicable_kinds = field
 dotnet_naming_symbols.private_or_internal_field.applicable_accessibilities = internal, private, private_protected
-dotnet_naming_symbols.private_or_internal_field.required_modifiers = 
+dotnet_naming_symbols.private_or_internal_field.required_modifiers =

 dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum
 dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
-dotnet_naming_symbols.types.required_modifiers = 
+dotnet_naming_symbols.types.required_modifiers =

 dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method
 dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
-dotnet_naming_symbols.non_field_members.required_modifiers = 
+dotnet_naming_symbols.non_field_members.required_modifiers =

 dotnet_naming_symbols.async_method.applicable_kinds = method
 dotnet_naming_symbols.async_method.applicable_accessibilities = *
@@ -268,24 +269,24 @@ dotnet_naming_symbols.async_method.required_modifiers = async

 # Naming styles

-dotnet_naming_style.pascal_case.required_prefix = 
-dotnet_naming_style.pascal_case.required_suffix = 
-dotnet_naming_style.pascal_case.word_separator = 
+dotnet_naming_style.pascal_case.required_prefix =
+dotnet_naming_style.pascal_case.required_suffix =
+dotnet_naming_style.pascal_case.word_separator =
 dotnet_naming_style.pascal_case.capitalization = pascal_case

 dotnet_naming_style.begins_with_i.required_prefix = I
-dotnet_naming_style.begins_with_i.required_suffix = 
-dotnet_naming_style.begins_with_i.word_separator = 
+dotnet_naming_style.begins_with_i.required_suffix =
+dotnet_naming_style.begins_with_i.word_separator =
 dotnet_naming_style.begins_with_i.capitalization = pascal_case

-dotnet_naming_style.camel_case.required_prefix = 
-dotnet_naming_style.camel_case.required_suffix = 
-dotnet_naming_style.camel_case.word_separator = 
+dotnet_naming_style.camel_case.required_prefix =
+dotnet_naming_style.camel_case.required_suffix =
+dotnet_naming_style.camel_case.word_separator =
 dotnet_naming_style.camel_case.capitalization = camel_case

-dotnet_naming_style.ends_with_async.required_prefix = 
+dotnet_naming_style.ends_with_async.required_prefix =
 dotnet_naming_style.ends_with_async.required_suffix = Async
-dotnet_naming_style.ends_with_async.word_separator = 
+dotnet_naming_style.ends_with_async.word_separator =
 dotnet_naming_style.ends_with_async.capitalization = pascal_case

 # IDE0058: Expression value is never used
@@ -295,4 +296,7 @@ dotnet_diagnostic.IDE0058.severity = none
 dotnet_diagnostic.IDE0010.severity = none

 # IDE0072: Add missing cases
-dotnet_diagnostic.IDE0072.severity = none
+dotnet_diagnostic.IDE0072.severity = none
+
+# IDE0305: Simplify collection initialization
+dotnet_diagnostic.IDE0305.severity = none
@@ -1,9 +1,36 @@
 # SQL Database Vector Search Sample
-A repository that showcases the native VECTOR type in Azure SQL Database to perform embeddings and RAG with Azure OpenAI.

-The application allows to load documents, generate embeddings and save them into the database as Vectors, and perform searches using Vector Search and RAG. Currently, PDF, DOCX, TXT and MD files are supported. Vectors are saved and retrieved with Entity Framework Core using the [EFCore.SqlServer.VectorSearch](https://github.com/efcore/EfCore.SqlServer.VectorSearch) library. Embedding and Chat Completion are integrated with [Semantic Kernel](https://github.com/microsoft/semantic-kernel).
+[![.NET 9](https://img.shields.io/badge/.NET-9-blue)](https://dotnet.microsoft.com/en-us/download/dotnet/9.0)
+[![Minimal API](https://img.shields.io/badge/Minimal%20API-Available-green)](https://dotnet.microsoft.com/apps/aspnet/apis)
+[![Blazor](https://img.shields.io/badge/Blazor-WebApp-purple)](https://dotnet.microsoft.com/apps/aspnet/web-apps/blazor)

-This repository contains a Blazor Web App as well as a Minimal API that allows to programmatically interact with embeddings and RAG.
+A Blazor Web App and Minimal API for performing RAG (Retrieval Augmented Generation) and vector search using the native VECTOR type in Azure SQL Database and Azure OpenAI.
+
+
+## Table of Contents
+- [Overview](#overview)
+- [Screenshots](#screenshots)
+- [Prerequisites](#prerequisites)
+- [Project Structure](#project-structure)
+- [Setup](#setup)
+- [Supported Features](#supported-features)
+- [How to Use](#how-to-use)
+- [Limitations & FAQ](#limitations-faq)
+- [Contributing](#contributing)
+- [License](#license)
+
+---
+
+## Overview
+This application allows you to:
+- Load documents (PDF, DOCX, TXT, MD)
+- Generate embeddings and save them as vectors in Azure SQL Database
+- Perform semantic search and RAG using Azure OpenAI
+- Interact via a Blazor Web App or programmatically via Minimal API
+
+Embeddings and chat completion are powered by [Semantic Kernel](https://github.com/microsoft/semantic-kernel). Vectors are managed with [EFCore.SqlServer.VectorSearch](https://github.com/efcore/EfCore.SqlServer.VectorSearch).
+
+## Screenshots

 ### Web App
 ![SQL Database Vector Search Web App](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/assets/SqlDatabaseVectorSearch_WebApp.png)
@@ -11,116 +38,242 @@ This repository contains a Blazor Web App as well as a Minimal API that allows t
 ### Web API
 ![SQL Database Vector Search API](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/assets/SqlDatabaseVectorSearch_API.png)

+## Prerequisites
+- [.NET 9 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/9.0)
+- [Azure SQL Database](https://learn.microsoft.com/en-us/azure/azure-sql/database/single-database-create-quickstart)
+- Azure OpenAI resource and API keys
+
+## Project Structure
+- `SqlDatabaseVectorSearch/` - Main Blazor Web App and API
+  - `Components/` - Blazor UI components
+  - `Data/` - EF Core context, migrations, and entities
+  - `Endpoints/` - Minimal API endpoints
+  - `Services/` - Business logic and integration services
+  - `TextChunkers/` - Text splitting utilities
+  - `Settings/` - Configuration classes
+
 ## Setup

- [Create an Azure SQL Database](https://learn.microsoft.com/en-us/azure/azure-sql/database/single-database-create-quickstart)
- Open the [appsettings.json](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/appsettings.json) file and set the connection string to the database and the other settings required by Azure OpenAI
-  - If your embedding model supports shortening, like **text-embedding-3-small** and **text-embedding-3-large**, and you want to use this feature, you need to set the [`Dimensions`](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/appsettings.json#L17) property to the corresponding value. If your model doesn't provide this feature, or do you want to use the default size, just leave the [`Dimensions`](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/appsettings.json#L17) property to NULL. Keep in mind that **text-embedding-3-small** has a dimension of 1536, while **text-embedding-3-large** uses vectors with 3072 elements, so with this latter model it is mandatory to specify a value (that must be less or equal to 1998, the maximum currently supported by the VECTOR type).
- You may need to update the size of the [`VECTOR`](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/DataAccessLayer/ApplicationDbContext.cs?plain=1#L42C1-L42C47) column to match the size of the embedding model. The default value is 1536. Currently, the maximum allowed value is 1998. If you change it, remember to update also the [Database Migration](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/DataAccessLayer/Migrations/00000000000000_Initial.cs?plain=1#L35C1-L35C92).
- Run the application and start importing your documents
- If you want to directly use the APIs:
-  - import your documents with the `/api/documents` endpoint.
-  - Ask questions using `/api/ask` or `/api/ask-streaming` endpoints.
+1. Clone the repository
+
+    ```bash
+    git clone https://github.com/marcominerva/SqlDatabaseVectorSearch.git
+    ```
+
+2. Configure the database and OpenAI settings
+   - Edit `SqlDatabaseVectorSearch/appsettings.json` and set your Azure SQL connection string and OpenAI settings.
+   - If using embedding models with shortening (e.g., `text-embedding-3-small` or `text-embedding-3-large`), set the `Dimensions` property accordingly. For `text-embedding-3-large`, you must specify a value <= 1998.
+   - If you change the VECTOR size, update both the [ApplicationDbContext](SqlDatabaseVectorSearch/Data/ApplicationDbContext.cs) and the [Initial Migration](SqlDatabaseVectorSearch/Data/Migrations/00000000000000_Initial.cs).
+
+3. Run the application
+
+    ```bash
+    dotnet run --project SqlDatabaseVectorSearch/SqlDatabaseVectorSearch.csproj
+    ```
+
+5. Access the Web App
+   - Navigate to `https://localhost:5001` (or the port shown in the console)

 ## Supported features

 - **Conversation History with Question Reformulation**: This feature allows users to view the history of their conversations, including the ability to reformulate questions for better clarity and understanding. This ensures that users can track their interactions and refine their queries as needed.
 - **Information about Token Usage**: Users can access detailed information about token usage, which helps in understanding the consumption of tokens during interactions. This feature provides transparency and helps users manage their token usage effectively.
- **Response streaming**: This feature enables real-time streaming of responses, allowing users to receive information as it is being processed. This ensures a seamless and efficient flow of information, enhancing the overall user experience.
+- **Response Streaming**: This feature enables real-time streaming of responses, allowing users to receive information as it is being processed. This ensures a seamless and efficient flow of information, enhancing the overall user experience.
+- **Citations**: The application provides citations for the sources used to justify each answer. This allows users to verify the information and understand the origin of the content provided by the system.
+
+## How to Use
+
+- **Web App**: Use the Blazor interface to upload documents, search, and chat with RAG.
+- **API**: Import documents via `POST /api/documents` and ask questions via `POST /api/ask` or `POST /api/ask-streaming`.
+
+#### Example API Request
+```
+POST /api/ask
+Content-Type: application/json
+
+{
+    "conversationId": "3d0bd178-499d-433a-b2bc-c35e488d9e2c"
+    "text": "Why is Mars called the red planet?"
+}
+```
+
+#### Example API Response

 ```json
 {
  "originalQuestion": "why is mars called the red planet?",
-  "reformulatedQuestion": "Why is Mars referred to as the Red Planet?",
-  "answer": "Mars is referred to as the Red Planet due to its characteristic reddish color, which is caused by the abundance of iron oxide (rust) on its surface. This distinctive coloration has also been a significant factor in the cultural and mythological associations of Mars across different civilizations.",
-  "streamState": null,
+  "reformulatedQuestion": "Why is the planet Mars called the red planet?",
+  "answer": "Mars is called the Red Planet because its surface has an orange-red color due to being covered in iron(III) oxide dust, also known as rust. This iron oxide gives Mars its distinctive reddish appearance when observed from Earth and is the origin of its well-known nickname",
+  "streamState": "End",
  "tokenUsage": {
    "reformulation": {
-      "promptTokens": 107,
-      "completionTokens": 10,
-      "totalTokens": 117
+      "promptTokens": 812,
+      "completionTokens": 11,
+      "totalTokens": 823
    },
    "embeddingTokenCount": 10,
    "question": {
-      "promptTokens": 9142,
-      "completionTokens": 53,
-      "totalTokens": 9195
+      "promptTokens": 31708,
+      "completionTokens": 227,
+      "totalTokens": 31935
    }
-  }
+  },
+  "citations": [
+    {
+      "documentId": "b1870ad7-4685-42a3-576a-08ddb01159d5",
+      "chunkId": "749aba1e-0db5-4033-cfa6-08ddb0115da3",
+      "fileName": "Mars.pdf",
+      "quote": "surface of Mars is orange-red because it is covered in iron(III) oxide",
+      "pageNumber": 1,
+      "indexOnPage": 0
+    },
+    {
+      "documentId": "b1870ad7-4685-42a3-576a-08ddb01159d5",
+      "chunkId": "215e7197-513f-4fbe-cfa8-08ddb0115da3",
+      "fileName": "Mars.pdf",
+      "quote": "Martian surface is caused by ferric oxide, or rust",
+      "pageNumber": 3,
+      "indexOnPage": 0
+    }
+  ]
 }
 ```

 ### How response streaming works

-When using the `/api/ask-streaming` endpoint, answers will be streamed as happens with the typical response from OpenAI. The format of the response is the following:
+When using the `/api/ask-streaming` endpoint, answers will be streamed as with the typical response from OpenAI. The format of the response is as follows:

 ```json
 [
  {
    "originalQuestion": "why is mars called the red planet?",
-    "reformulatedQuestion": "Why is Mars referred to as the Red Planet?",
+    "reformulatedQuestion": "Why is the planet Mars known as the red planet?",
    "answer": null,
    "streamState": "Start",
    "tokenUsage": {
      "reformulation": {
-        "promptTokens": 107,
-        "completionTokens": 10,
-        "totalTokens": 117
+        "promptTokens": 541,
+        "completionTokens": 12,
+        "totalTokens": 553
      },
-      "embeddingTokenCount": 10,
+      "embeddingTokenCount": 11,
      "question": null
-    }
+    },
+    "citations": null
  },
  {
    "originalQuestion": null,
    "reformulatedQuestion": null,
    "answer": "Mars",
    "streamState": "Append",
-    "tokenUsage": null
+    "tokenUsage": null,
+    "citations": null
  },
  {
    "originalQuestion": null,
    "reformulatedQuestion": null,
    "answer": " is",
    "streamState": "Append",
-    "tokenUsage": null
+    "tokenUsage": null,
+    "citations": null
  },
  {
    "originalQuestion": null,
    "reformulatedQuestion": null,
-    "answer": " called",
+    "answer": " known",
    "streamState": "Append",
-    "tokenUsage": null
+    "tokenUsage": null,
+    "citations": null
+  },
+  {
+    "originalQuestion": null,
+    "reformulatedQuestion": null,
+    "answer": " as",
+    "streamState": "Append",
+    "tokenUsage": null,
+    "citations": null
  },
  {
    "originalQuestion": null,
    "reformulatedQuestion": null,
    "answer": " the",
    "streamState": "Append",
-    "tokenUsage": null
+    "tokenUsage": null,
+    "citations": null
  },
  {
    "originalQuestion": null,
    "reformulatedQuestion": null,
-    "answer": " Red",
+    "answer": " red",
    "streamState": "Append",
-    "tokenUsage": null
+    "tokenUsage": null,
+    "citations": null
  },
  {
    "originalQuestion": null,
    "reformulatedQuestion": null,
-    "answer": " Planet",
+    "answer": " planet",
    "streamState": "Append",
-    "tokenUsage": null
+    "tokenUsage": null,
+    "citations": null
  },
-  //...
  {
    "originalQuestion": null,
    "reformulatedQuestion": null,
-    "answer": ".",
+    "answer": " because",
    "streamState": "Append",
-    "tokenUsage": null
+    "tokenUsage": null,
+    "citations": null
  },
+  {
+    "originalQuestion": null,
+    "reformulatedQuestion": null,
+    "answer": " its",
+    "streamState": "Append",
+    "tokenUsage": null,
+    "citations": null
+  },
+  {
+    "originalQuestion": null,
+    "reformulatedQuestion": null,
+    "answer": " surface",
+    "streamState": "Append",
+    "tokenUsage": null,
+    "citations": null
+  },
+  {
+    "originalQuestion": null,
+    "reformulatedQuestion": null,
+    "answer": " is",
+    "streamState": "Append",
+    "tokenUsage": null,
+    "citations": null
+  },
+  {
+    "originalQuestion": null,
+    "reformulatedQuestion": null,
+    "answer": " covered",
+    "streamState": "Append",
+    "tokenUsage": null,
+    "citations": null
+  },
+  {
+    "originalQuestion": null,
+    "reformulatedQuestion": null,
+    "answer": " in",
+    "streamState": "Append",
+    "tokenUsage": null,
+    "citations": null
+  },
+  {
+    "originalQuestion": null,
+    "reformulatedQuestion": null,
+    "answer": " iron",
+    "streamState": "Append",
+    "tokenUsage": null,
+    "citations": null
+  },
+  /// ...  
  {
    "originalQuestion": null,
    "reformulatedQuestion": null,
@@ -130,24 +283,59 @@ When using the `/api/ask-streaming` endpoint, answers will be streamed as happen
      "reformulation": null,
      "embeddingTokenCount": null,
      "question": {
-        "promptTokens": 8986,
-        "completionTokens": 31,
-        "totalTokens": 9017
+        "promptTokens": 30949,
+        "completionTokens": 221,
+        "totalTokens": 31170
      }
-    }
+    },
+    "citations": [
+      {
+        "documentId": "b1870ad7-4685-42a3-576a-08ddb01159d5",
+        "chunkId": "749aba1e-0db5-4033-cfa6-08ddb0115da3",
+        "fileName": "Mars.pdf",
+        "quote": "surface of Mars is orange-red",
+        "pageNumber": 1,
+        "indexOnPage": 0
+      },
+      {
+        "documentId": "b1870ad7-4685-42a3-576a-08ddb01159d5",
+        "chunkId": "215e7197-513f-4fbe-cfa8-08ddb0115da3",
+        "fileName": "Mars.pdf",
+        "quote": "red-orange appearance of the Martian surface is caused by ferric oxide, or rust",
+        "pageNumber": 3,
+        "indexOnPage": 0
+      }
+    ]
  }
 ]
 ```

 - The first piece of the response has the following characteristics:
-  - the *streamState* property is set to `Start`
-  - it contains the question and its reformulation (if not requested, *reformulatedQuestion* will be equals to *originalQuestion*)
-  - the *tokenUsage* section holds information about token used for reformulation (if done) and for the embedding of the question
+  - The *streamState* property is set to `Start`.
+  - It contains the question and its reformulation (if not requested, *reformulatedQuestion* will be equal to *originalQuestion*).
+  - The *tokenUsage* section holds information about tokens used for reformulation (if done) and for the embedding of the question.
 - Then, there are as many elements for the actual answer as necessary:
-  - each one contains a token
-  - The *streamState* property is set to `Append`
-  - *origianlQuestion*, *reformulatedQuestion* and *tokenUsage* are always `null`
- The stream ends when an element with *streamState* equals to `End` is received. This element contains token usage information for the question and the whole answer.
+  - Each one contains a token.
+  - The *streamState* property is set to `Append`.
+  - *originalQuestion*, *reformulatedQuestion*, *tokenUsage* and *citations* are always `null`.
+- The stream ends when an element with *streamState* equals `End` is received. This element contains token usage information for the question and the whole answer, and the list of citations.
+
+## Limitations & FAQ
+
+- **VECTOR column size**: Maximum allowed is 1998. For `text-embedding-3-large`, set `Dimensions` <= 1998.
+- **Supported file types**: PDF, DOCX, TXT, MD.
+- **Known Issues**: See [Issues](https://github.com/marcominerva/SqlDatabaseVectorSearch/issues)
+
+## Contributing
+
+Contributions are welcome! Please open issues or pull requests. For major changes, discuss them first via an issue.
+
+## License
+
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
+
+---

 > [!NOTE]
 > If you prefer to use straight SQL, check out the [sql branch](https://github.com/marcominerva/SqlDatabaseVectorSearch/tree/sql).
+
@@ -1,4 +1,5 @@
@page "/ask"
+@using System.Text.RegularExpressions

@inject IServiceProvider ServiceProvider
@inject IJSRuntime JSRuntime
@@ -72,6 +73,23 @@
                                                </Tooltip>
                                            </div>
                                        </div>
+                                        @if (message.Citations is not null && message.Citations.Count() > 0)
+                                        {
+                                            <div class="mt-3 d-flex flex-wrap">
+                                                @foreach (var citation in message.Citations)
+                                                {
+                                                    <div class="border rounded p-2 me-2 mb-2 citation-box small">
+                                                        <div>
+                                                            <strong>@citation.FileName</strong> @if (citation.PageNumber.GetValueOrDefault() > 0)
+                                                            {
+                                                                <span class="ms-2">pag. @citation.PageNumber</span>
+                                                            }
+                                                        </div>
+                                                        <div class="text-secondary small mt-1">@citation.Quote</div>
+                                                    </div>
+                                                }
+                                            </div>
+                                        }
                                    }
                                </div>
                            }
@@ -178,10 +196,22 @@
                }
                else if (delta.StreamState == StreamState.Append)
                {
+                    // Adds tokens to the assistant message as they are received.
                    assistantMessage.Text += delta.Answer;
                }
                else if (delta.StreamState == StreamState.End)
                {
+                    // Get citations from the response.
+                    assistantMessage.Citations = delta.Citations?.Select(c => new Citation
+                    {
+                        DocumentId = c.DocumentId,
+                        ChunkId = c.ChunkId,
+                        FileName = c.FileName,
+                        Quote = c.Quote,
+                        PageNumber = c.PageNumber,
+                        IndexOnPage = c.IndexOnPage
+                    });
+
                    assistantMessage.IsCompleted = true;
                    assistantMessage.TokenUsage += FormatTokenUsage(delta.TokenUsage);
                }
@@ -278,5 +308,23 @@
        public bool IsCompleted { get; set; }

        public string? TokenUsage { get; set; }
+
+        // List of citations extracted from the answer
+        public IEnumerable<Citation>? Citations { get; set; }
+    }
+
+    public class Citation
+    {
+        public Guid DocumentId { get; set; }
+
+        public Guid ChunkId { get; set; }
+
+        public string FileName { get; set; } = null!;
+
+        public string Quote { get; set; } = null!;
+
+        public int? PageNumber { get; set; }
+
+        public int IndexOnPage { get; set; }
    }
 }
@@ -76,7 +76,9 @@ else
            {
                <tr>
                    <td>
-                        <CheckboxInput @bind-Value="document.IsSelected" />
+                        <div class="d-flex justify-content-center align-items-center">
+                            <CheckboxInput @bind-Value="document.IsSelected" />
+                        </div>
                    </td>
                    <td>@document.Id</td>
                    <td>@document.Name</td>
@@ -107,7 +109,7 @@ else
    private bool isLoading = true;
    private IList<SelectableDocument> documents = [];

-    private UploadDocument Model { get; set; } = new();    
+    private UploadDocument Model { get; set; } = new();

    [Inject]
    protected ToastService ToastService { get; set; } = default!;
@@ -138,9 +140,9 @@ else
            foreach (var dbDocument in dbDocuments)
            {
                documents.Add(new SelectableDocument(dbDocument.Id, dbDocument.Name, dbDocument.CreationDate, dbDocument.ChunkCount)
-                    {
-                        LocalCreationDateString = await GetLocalDateTimeStringAsync(dbDocument.CreationDate)
-                    });
+                {
+                    LocalCreationDateString = await GetLocalDateTimeStringAsync(dbDocument.CreationDate)
+                });
            }
        }
        finally
@@ -193,12 +195,12 @@ else
        var selectedDocumentIds = documents?.Where(d => d.IsSelected).Select(d => d.Id) ?? [];

        var options = new ConfirmDialogOptions
-            {
-                YesButtonText = "Yes",
-                YesButtonColor = ButtonColor.Danger,
-                NoButtonText = "No",
-                NoButtonColor = ButtonColor.Secondary
-            };
+        {
+            YesButtonText = "Yes",
+            YesButtonColor = ButtonColor.Danger,
+            NoButtonText = "No",
+            NoButtonColor = ButtonColor.Secondary
+        };

        var confirmation = await dialog.ShowAsync(
            title: "Delete the selected documents?",
@@ -236,12 +238,12 @@ else
    private async Task<ToastMessage> CreateToastMessageAsync(ToastType toastType, string title, string message)
    {
        var toastMessage = new ToastMessage
-            {
-                Type = toastType,
-                Title = title,
-                HelpText = await GetLocalDateTimeStringAsync(DateTimeOffset.UtcNow),
-                Message = message
-            };
+        {
+            Type = toastType,
+            Title = title,
+            HelpText = await GetLocalDateTimeStringAsync(DateTimeOffset.UtcNow),
+            Message = message
+        };

        return toastMessage;
    }
@@ -4,16 +4,34 @@
 <PageTitle>SQL Database Vector Search</PageTitle>

 <h1>SQL Database Vector Search</h1>
-<p class="mt-3">
-    How to use the native VECTOR type in <img src="/images/sqldatabase.svg" /> Azure SQL Database to perform embeddings and RAG with <img src="/images/openai.svg" /> Azure OpenAI.
-</p>
-<p>
-    This application allows to load documents, generate embeddings and save them into the database as Vectors, and perform searches using Vector Search and RAG. Currently, PDF, DOCX, TXT and MD files are supported. Vectors are saved and retrieved with Entity Framework Core using the <a href="https://github.com/efcore/EfCore.SqlServer.VectorSearch" target="_blank">EFCore.SqlServer.VectorSearch</a> library. Embedding and Chat Completion are integrated with <a href="https://github.com/microsoft/semantic-kernel" target="_blank">Semantic Kernel</a>.
+
+<p class="mt-3 p-3 rounded bg-light text-dark shadow-sm">
+    A Blazor Web App and Minimal API for Retrieval Augmented Generation (RAG) and vector search using the native VECTOR type in <img src="/images/sqldatabase.svg" style="height:1.5em;vertical-align:middle;" /> Azure SQL Database with <img src="/images/openai.svg" style="height:1.5em;vertical-align:middle;" /> Azure OpenAI.
 </p>

-<h3>Supported features</h3>
+<p>
+    This application allows you to:
+    <ul>
+        <li>Load documents (PDF, DOCX, TXT, MD)</li>
+        <li>Generate embeddings and save them as vectors in Azure SQL Database</li>
+        <li>Perform semantic search and RAG using Azure OpenAI</li>
+        <li>Interact via a Blazor Web App or programmatically via Minimal API</li>
+    </ul>
+    Embeddings and chat completion are powered by <a href="https://github.com/microsoft/semantic-kernel" target="_blank">Semantic Kernel</a>. Vectors are managed with <a href="https://github.com/efcore/EfCore.SqlServer.VectorSearch" target="_blank">EFCore.SqlServer.VectorSearch</a>.
+</p>
+
+<h3>Supported Features</h3>
 <ul>
-    <li><strong>Conversation History with Question Reformulation</strong>: This feature allows users to view the history of their conversations, including the ability to reformulate questions for better clarity and understanding. This ensures that users can track their interactions and refine their queries as needed.</li>
-    <li><strong>Information about Token Usage</strong>: Users can access detailed information about token usage, which helps in understanding the consumption of tokens during interactions. This feature provides transparency and helps users manage their token usage effectively.</li>
-    <li><strong>Response Streaming</strong>: This feature enables real-time streaming of responses, allowing users to receive information as it is being processed. This ensures a seamless and efficient flow of information, enhancing the overall user experience.</li>
+    <li><strong>Conversation History with Question Reformulation</strong>: View and reformulate your conversation history for better clarity and understanding.</li>
+    <li><strong>Information about Token Usage</strong>: Access detailed information about token usage for transparency and management.</li>
+    <li><strong>Response Streaming</strong>: Receive real-time streaming of responses for a seamless and efficient user experience.</li>
+    <li><strong>Citations</strong>: Get citations for the sources used to justify each answer, allowing you to verify and understand the origin of the content.</li>
 </ul>
+
+<p class="mt-3 p-3 rounded bg-light text-dark shadow-sm">
+    Try <a href="/documents">uploading a document</a> or <a href="/ask">ask a question</a> to get started!
+</p>
+
+<p class="mt-4">
+    <em>For API usage and more details, see the <a href="https://github.com/marcominerva/SqlDatabaseVectorSearch#how-to-use" target="_blank">README</a>.</em>
+</p>
@@ -1,25 +1,32 @@
 using System.Text;
 using DocumentFormat.OpenXml.Packaging;
 using DocumentFormat.OpenXml.Wordprocessing;
+using SqlDatabaseVectorSearch.TextChunkers;

 namespace SqlDatabaseVectorSearch.ContentDecoders;

-public class DocxContentDecoder : IContentDecoder
+public class DocxContentDecoder(IServiceProvider serviceProvider) : IContentDecoder
 {
-    public Task<string> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default)
+    public Task<IEnumerable<Chunk>> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default)
    {
+        var textChunker = serviceProvider.GetRequiredKeyedService<ITextChunker>(contentType);
+
        // Open a Word document for read-only access.
        using var document = WordprocessingDocument.Open(stream, false);

        var body = document.MainDocumentPart?.Document.Body;
        var content = new StringBuilder();

-        var paragraphs = body?.Descendants<Paragraph>() ?? [];
-        foreach (var p in paragraphs)
+        foreach (var p in body?.Descendants<Paragraph>() ?? [])
        {
            content.AppendLine(p.InnerText);
        }

-        return Task.FromResult(content.ToString());
+        var paragraphs = textChunker.Split(content.ToString().Trim());
+
+        // Pages do not exist in the OpenXML format until they are rendered by a word processor.
+        // See https://stackoverflow.com/questions/43700252/how-to-get-page-numbers-based-on-openxmlelement for more details.
+        // Therefore, we will not assign a page number.
+        return Task.FromResult(paragraphs.Select((text, index) => new Chunk(null, index, text)).ToList().AsEnumerable());
    }
 }
@@ -2,5 +2,7 @@

 public interface IContentDecoder
 {
-    Task<string> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default);
+    Task<IEnumerable<Chunk>> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default);
 }
+
+public record class Chunk(int? PageNumber, int IndexOnPage, string Content);
@@ -1,24 +1,33 @@
-using System.Text;
+using SqlDatabaseVectorSearch.TextChunkers;
 using UglyToad.PdfPig;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
+using UglyToad.PdfPig.Content;
+using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
+using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor;

 namespace SqlDatabaseVectorSearch.ContentDecoders;

-public class PdfContentDecoder : IContentDecoder
+public class PdfContentDecoder(IServiceProvider serviceProvider) : IContentDecoder
 {
-    public Task<string> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default)
+    public Task<IEnumerable<Chunk>> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default)
    {
-        var content = new StringBuilder();
+        var textChunker = serviceProvider.GetRequiredKeyedService<ITextChunker>(contentType);

        // Read the content of the PDF document.
        using var pdfDocument = PdfDocument.Open(stream);
+        var paragraphs = pdfDocument.GetPages().SelectMany(page => GetPageParagraphs(page, textChunker)).ToList();

-        foreach (var page in pdfDocument.GetPages().Where(x => x is not null))
-        {
-            var pageContent = ContentOrderTextExtractor.GetText(page) ?? string.Empty;
-            content.AppendLine(pageContent);
-        }
+        return Task.FromResult(paragraphs.AsEnumerable());
+    }

-        return Task.FromResult(content.ToString());
+    private static IEnumerable<Chunk> GetPageParagraphs(Page pdfPage, ITextChunker textChunker)
+    {
+        var letters = pdfPage.Letters;
+        var words = NearestNeighbourWordExtractor.Instance.GetWords(letters);
+        var textBlocks = DocstrumBoundingBoxes.Instance.GetBlocks(words);
+        var pageText = string.Join($"{Environment.NewLine}{Environment.NewLine}", textBlocks.Select(t => t.Text.ReplaceLineEndings(" ")));
+
+        var paragraphs = textChunker.Split(pageText.Trim());
+
+        return paragraphs.Where(p => !string.IsNullOrWhiteSpace(p)).Select((text, index) => new Chunk(pdfPage.Number, index, text));
    }
 }
@@ -1,12 +1,17 @@
-namespace SqlDatabaseVectorSearch.ContentDecoders;
+using SqlDatabaseVectorSearch.TextChunkers;

-public class TextContentDecoder : IContentDecoder
+namespace SqlDatabaseVectorSearch.ContentDecoders;
+
+public class TextContentDecoder(IServiceProvider serviceProvider) : IContentDecoder
 {
-    public async Task<string> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default)
+    public async Task<IEnumerable<Chunk>> DecodeAsync(Stream stream, string contentType, CancellationToken cancellationToken = default)
    {
+        var textChunker = serviceProvider.GetRequiredKeyedService<ITextChunker>(contentType);
+
        using var readStream = new StreamReader(stream);
        var content = await readStream.ReadToEndAsync(cancellationToken);

-        return content;
+        var paragraphs = textChunker.Split(content.Trim());
+        return paragraphs.Select((text, index) => new Chunk(null, index, text)).ToList();
    }
 }
@@ -1,8 +1,8 @@
 using EntityFramework.Exceptions.SqlServer;
 using Microsoft.EntityFrameworkCore;
-using SqlDatabaseVectorSearch.DataAccessLayer.Entities;
+using SqlDatabaseVectorSearch.Data.Entities;

-namespace SqlDatabaseVectorSearch.DataAccessLayer;
+namespace SqlDatabaseVectorSearch.Data;

 public class ApplicationDbContext(DbContextOptions<ApplicationDbContext> options) : DbContext(options)
 {
@@ -1,4 +1,4 @@
-namespace SqlDatabaseVectorSearch.DataAccessLayer.Entities;
+namespace SqlDatabaseVectorSearch.Data.Entities;

 public class Document
 {
@@ -1,4 +1,4 @@
-namespace SqlDatabaseVectorSearch.DataAccessLayer.Entities;
+namespace SqlDatabaseVectorSearch.Data.Entities;

 public class DocumentChunk
 {
@@ -8,6 +8,10 @@ public class DocumentChunk

    public int Index { get; set; }

+    public int? PageNumber { get; set; }
+
+    public int IndexOnPage { get; set; }
+
    public required string Content { get; set; }

    public required float[] Embedding { get; set; }
@@ -5,14 +5,14 @@ using Microsoft.EntityFrameworkCore.Infrastructure;
 using Microsoft.EntityFrameworkCore.Metadata;
 using Microsoft.EntityFrameworkCore.Migrations;
 using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
-using SqlDatabaseVectorSearch.DataAccessLayer;
+using SqlDatabaseVectorSearch.Data;

 #nullable disable

-namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
+namespace SqlDatabaseVectorSearch.Migrations
 {
    [DbContext(typeof(ApplicationDbContext))]
-    [Migration("20250224102351_Initial")]
+    [Migration("20250606091336_Initial")]
    partial class Initial
    {
        /// <inheritdoc />
@@ -20,12 +20,12 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
        {
 #pragma warning disable 612, 618
            modelBuilder
-                .HasAnnotation("ProductVersion", "9.0.2")
+                .HasAnnotation("ProductVersion", "9.0.5")
                .HasAnnotation("Relational:MaxIdentifierLength", 128);

            SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);

-            modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", b =>
+            modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.Document", b =>
                {
                    b.Property<Guid>("Id")
                        .ValueGeneratedOnAdd()
@@ -44,7 +44,7 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
                    b.ToTable("Documents", (string)null);
                });

-            modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.DocumentChunk", b =>
+            modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.DocumentChunk", b =>
                {
                    b.Property<Guid>("Id")
                        .ValueGeneratedOnAdd()
@@ -64,6 +64,12 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
                    b.Property<int>("Index")
                        .HasColumnType("int");

+                    b.Property<int>("IndexOnPage")
+                        .HasColumnType("int");
+
+                    b.Property<int?>("PageNumber")
+                        .HasColumnType("int");
+
                    b.HasKey("Id");

                    b.HasIndex("DocumentId");
@@ -71,9 +77,9 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
                    b.ToTable("DocumentChunks", (string)null);
                });

-            modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.DocumentChunk", b =>
+            modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.DocumentChunk", b =>
                {
-                    b.HasOne("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", "Document")
+                    b.HasOne("SqlDatabaseVectorSearch.Data.Entities.Document", "Document")
                        .WithMany("Chunks")
                        .HasForeignKey("DocumentId")
                        .OnDelete(DeleteBehavior.Cascade)
@@ -83,7 +89,7 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
                    b.Navigation("Document");
                });

-            modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", b =>
+            modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.Document", b =>
                {
                    b.Navigation("Chunks");
                });
@@ -3,7 +3,7 @@ using Microsoft.EntityFrameworkCore.Migrations;

 #nullable disable

-namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
+namespace SqlDatabaseVectorSearch.Migrations
 {
    /// <inheritdoc />
    public partial class Initial : Migration
@@ -31,6 +31,8 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
                    Id = table.Column<Guid>(type: "uniqueidentifier", nullable: false),
                    DocumentId = table.Column<Guid>(type: "uniqueidentifier", nullable: false),
                    Index = table.Column<int>(type: "int", nullable: false),
+                    PageNumber = table.Column<int>(type: "int", nullable: true),
+                    IndexOnPage = table.Column<int>(type: "int", nullable: false),
                    Content = table.Column<string>(type: "nvarchar(max)", nullable: false),
                    Embedding = table.Column<string>(type: "vector(1536)", nullable: false)
                },
@@ -4,11 +4,11 @@ using Microsoft.EntityFrameworkCore;
 using Microsoft.EntityFrameworkCore.Infrastructure;
 using Microsoft.EntityFrameworkCore.Metadata;
 using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
-using SqlDatabaseVectorSearch.DataAccessLayer;
+using SqlDatabaseVectorSearch.Data;

 #nullable disable

-namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
+namespace SqlDatabaseVectorSearch.Migrations
 {
    [DbContext(typeof(ApplicationDbContext))]
    partial class ApplicationDbContextModelSnapshot : ModelSnapshot
@@ -17,12 +17,12 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
        {
 #pragma warning disable 612, 618
            modelBuilder
-                .HasAnnotation("ProductVersion", "9.0.2")
+                .HasAnnotation("ProductVersion", "9.0.5")
                .HasAnnotation("Relational:MaxIdentifierLength", 128);

            SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);

-            modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", b =>
+            modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.Document", b =>
                {
                    b.Property<Guid>("Id")
                        .ValueGeneratedOnAdd()
@@ -41,7 +41,7 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
                    b.ToTable("Documents", (string)null);
                });

-            modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.DocumentChunk", b =>
+            modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.DocumentChunk", b =>
                {
                    b.Property<Guid>("Id")
                        .ValueGeneratedOnAdd()
@@ -61,6 +61,12 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
                    b.Property<int>("Index")
                        .HasColumnType("int");

+                    b.Property<int>("IndexOnPage")
+                        .HasColumnType("int");
+
+                    b.Property<int?>("PageNumber")
+                        .HasColumnType("int");
+
                    b.HasKey("Id");

                    b.HasIndex("DocumentId");
@@ -68,9 +74,9 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
                    b.ToTable("DocumentChunks", (string)null);
                });

-            modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.DocumentChunk", b =>
+            modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.DocumentChunk", b =>
                {
-                    b.HasOne("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", "Document")
+                    b.HasOne("SqlDatabaseVectorSearch.Data.Entities.Document", "Document")
                        .WithMany("Chunks")
                        .HasForeignKey("DocumentId")
                        .OnDelete(DeleteBehavior.Cascade)
@@ -80,7 +86,7 @@ namespace SqlDatabaseVectorSearch.DataAccessLayer.Migrations
                    b.Navigation("Document");
                });

-            modelBuilder.Entity("SqlDatabaseVectorSearch.DataAccessLayer.Entities.Document", b =>
+            modelBuilder.Entity("SqlDatabaseVectorSearch.Data.Entities.Document", b =>
                {
                    b.Navigation("Chunks");
                });
@@ -23,7 +23,7 @@ public class AskEndpoints : IEndpointRouteHandlerBuilder
        endpoints.MapPost("/api/ask-streaming", (Question question, VectorSearchService vectorSearchService, CancellationToken cancellationToken,
            [Description("If true, the question will be reformulated taking into account the context of the chat identified by the given ConversationId.")] bool reformulate = true) =>
        {
-            async IAsyncEnumerable<QuestionResponse> Stream()
+            async IAsyncEnumerable<Response> Stream()
            {
                // Requests a streaming response.
                var responseStream = vectorSearchService.AskStreamingAsync(question, reformulate, cancellationToken);
@@ -0,0 +1,16 @@
+namespace SqlDatabaseVectorSearch.Models;
+
+public class Citation
+{
+    public Guid DocumentId { get; set; }
+
+    public Guid ChunkId { get; set; }
+
+    public string FileName { get; set; } = null!;
+
+    public string Quote { get; set; } = null!;
+
+    public int? PageNumber { get; set; }
+
+    public int IndexOnPage { get; set; }
+}
@@ -1,10 +0,0 @@
-namespace SqlDatabaseVectorSearch.Models;
-
-// Question and Answer can be null when using response streaming.
-public record class QuestionResponse(string? OriginalQuestion, string? ReformulatedQuestion, string? Answer, StreamState? StreamState = null, TokenUsageResponse? TokenUsage = null)
-{
-    public QuestionResponse(string? token, StreamState streamState, TokenUsageResponse? tokenUsageResponse = null)
-        : this(null, null, token, streamState, tokenUsageResponse)
-    {
-    }
-}
@@ -0,0 +1,10 @@
+namespace SqlDatabaseVectorSearch.Models;
+
+// Question and Answer can be null when using response streaming.
+public record class Response(string? OriginalQuestion, string? ReformulatedQuestion, string? Answer, StreamState? StreamState = null, TokenUsageResponse? TokenUsage = null, IEnumerable<Citation>? Citations = null)
+{
+    public Response(string? token, StreamState streamState, TokenUsageResponse? tokenUsageResponse = null, IEnumerable<Citation>? citations = null)
+        : this(null, null, token, streamState, tokenUsageResponse, citations)
+    {
+    }
+}
@@ -5,7 +5,7 @@ using Microsoft.EntityFrameworkCore;
 using Microsoft.SemanticKernel;
 using SqlDatabaseVectorSearch.Components;
 using SqlDatabaseVectorSearch.ContentDecoders;
-using SqlDatabaseVectorSearch.DataAccessLayer;
+using SqlDatabaseVectorSearch.Data;
 using SqlDatabaseVectorSearch.Extensions;
 using SqlDatabaseVectorSearch.Services;
 using SqlDatabaseVectorSearch.Settings;
@@ -63,12 +63,6 @@ builder.Services.AddKernel()
    .AddAzureOpenAIEmbeddingGenerator(aiSettings.Embedding.Deployment, aiSettings.Embedding.Endpoint, aiSettings.Embedding.ApiKey, modelId: aiSettings.Embedding.ModelId, dimensions: aiSettings.Embedding.Dimensions)
    .AddAzureOpenAIChatCompletion(aiSettings.ChatCompletion.Deployment, aiSettings.ChatCompletion.Endpoint, aiSettings.ChatCompletion.ApiKey, modelId: aiSettings.ChatCompletion.ModelId);

-builder.Services.AddSingleton<TokenizerService>();
-builder.Services.AddSingleton<ChatService>();
-
-builder.Services.AddScoped<DocumentService>();
-builder.Services.AddScoped<VectorSearchService>();
-
 builder.Services.AddKeyedSingleton<IContentDecoder, PdfContentDecoder>(MediaTypeNames.Application.Pdf);
 builder.Services.AddKeyedSingleton<IContentDecoder, DocxContentDecoder>("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
 builder.Services.AddKeyedSingleton<IContentDecoder, TextContentDecoder>(MediaTypeNames.Text.Plain);
@@ -77,6 +71,12 @@ builder.Services.AddKeyedSingleton<IContentDecoder, TextContentDecoder>(MediaTyp
 builder.Services.AddKeyedSingleton<ITextChunker, DefaultTextChunker>(KeyedService.AnyKey);
 builder.Services.AddKeyedSingleton<ITextChunker, MarkdownTextChunker>(MediaTypeNames.Text.Markdown);

+builder.Services.AddSingleton<TokenizerService>();
+builder.Services.AddSingleton<ChatService>();
+
+builder.Services.AddScoped<DocumentService>();
+builder.Services.AddScoped<VectorSearchService>();
+
 builder.Services.AddOpenApi(options =>
 {
    options.RemoveServerList();
@@ -7,6 +7,7 @@ using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
 using OpenAI.Chat;
 using SqlDatabaseVectorSearch.Models;
 using SqlDatabaseVectorSearch.Settings;
+using Entities = SqlDatabaseVectorSearch.Data.Entities;

 namespace SqlDatabaseVectorSearch.Services;

@@ -41,7 +42,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
        return new(reformulatedQuestion.Content!, tokenUsage);
    }

-    public async Task<ChatResponse> AskQuestionAsync(Guid conversationId, IEnumerable<string> chunks, string question, CancellationToken cancellationToken = default)
+    public async Task<ChatResponse> AskQuestionAsync(Guid conversationId, IEnumerable<Entities.DocumentChunk> chunks, string question, CancellationToken cancellationToken = default)
    {
        var chat = CreateChatAsync(chunks, question);

@@ -59,7 +60,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
        return new(answer.Content!, tokenUsage);
    }

-    public async IAsyncEnumerable<ChatResponse> AskStreamingAsync(Guid conversationId, IEnumerable<string> chunks, string question, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    public async IAsyncEnumerable<ChatResponse> AskStreamingAsync(Guid conversationId, IEnumerable<Entities.DocumentChunk> chunks, string question, [EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        var chat = CreateChatAsync(chunks, question);

@@ -110,19 +111,62 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer
        return null;
    }

-    private ChatHistory CreateChatAsync(IEnumerable<string> chunks, string question)
+    private ChatHistory CreateChatAsync(IEnumerable<Entities.DocumentChunk> chunks, string question)
    {
        var chat = new ChatHistory("""
            You can use only the information provided in this chat to answer questions. If you don't know the answer, reply suggesting to refine the question.
-            For example, if the user asks "What is the capital of France?" and in this chat there isn't information about France, you should reply something like:
+
+            For example, if the user asks "What is the capital of Italy?" and in this chat there isn't information about Italy, you should reply something like:
            - This information isn't available in the given context
            - I'm sorry, I don't know the answer to that question
            - I don't have that information
            - I don't know
            - Given the context, I can't answer that question
-            - I'my sorry, I don't have enough information to answer that question
-            Never answer to questions that are not related to this chat.
-            You must answer in the same language of the user's question. For example, it the user asks a question in English, the answer must be in English.
+            - I'm sorry, I don't have enough information to answer that question
+
+            Never answer questions that are not related to this chat.
+            You must answer in the same language as the user's question. For example, if the user asks a question in English, the answer must be in English, no matter the language of the documents.
+
+            IMPORTANT: Your answer must always end with a period and a space.
+
+            After the answer, you need to include citations following the XML format below ONLY IF you know the answer and are providing information from the context. If you do NOT know the answer, DO NOT include the citations section at all.
+                        
+            【<citation document-id="document_id" chunk-id="chunk_id" filename="string" page-number="page_number" index-on-page="index_on-page">exact quote here</citation>
+            <citation document-id="document_id" chunk-id="chunk_id" filename="string" page-number="page_number" index-on-page="index_on-page">exact quote here</citation>】
+
+            The entire list of XML citations MUST be enclosed between 【 and 】 (U+3010 and U+3011) and must exactly match the above format.
+            The quote in each <citation> MUST be MAXIMUM 5 words, taken word-for-word from the search result.
+
+            IMPORTANT CITATION RULES:
+            1. NEVER put citations inside your answer text.
+            2. ALWAYS provide your complete answer FIRST.
+            3. ONLY AFTER completing your answer, add ALL citations in a block at the very end.
+            4. The citations block MUST be the last thing in your response, with absolutely nothing (no text, no spaces, no newlines, no punctuation, no comments) after it.
+            5. NEVER reference citations by number or mention them in your answer text.
+            6. The citations MUST ALWAYS follow the XML format exactly as shown below. Any other format is NOT ACCEPTED.
+            7. If you add anything after the citations block, your answer will be considered invalid.
+            8. If you do NOT know the answer, DO NOT include the citations block at all.
+
+            ---
+            Example of a correct answer:
+            The capital of Italy is Rome.
+            【<citation document-id="123" chunk-id="456" filename="italy.pdf" page-number="1" index-on-page="1">capital of Italy is Rome</citation>】
+
+            Example of a correct answer when you do NOT know the answer:
+            I'm sorry, I don't know the answer to that question
+
+            Example of an incorrect answer (NOT ACCEPTED):
+            The capital of Italy is Rome.
+            【<citation document-id="123" chunk-id="456" filename="italy.pdf" page-number="1" index-on-page="1">capital of Italy is Rome</citation>】
+            Thank you for your question.
+
+            Another incorrect example (NOT ACCEPTED):
+            The capital of Italy is Rome.
+            【<citation document-id="123" chunk-id="456" filename="italy.pdf" page-number="1" index-on-page="1">capital of Italy is Rome</citation>】
+            [1] italy.pdf, page 1
+            ---
+            Only the correct format is accepted. If you do not follow the XML format exactly, or if you add anything after the citations block, your answer will be considered invalid.
+            If you do NOT know the answer, DO NOT include the citations block at all.
            """);

        var prompt = new StringBuilder($"""
@@ -141,7 +185,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer

        foreach (var chunk in chunks)
        {
-            var text = $"---{Environment.NewLine}{chunk}";
+            var text = $"--- {chunk.Document.Name} (Document ID: {chunk.Document.Id} | Chunk ID: {chunk.Id} | Page Number: {chunk.PageNumber} | Index on Page: {chunk.IndexOnPage}) {Environment.NewLine}{chunk.Content}{Environment.NewLine}";

            var tokenCount = tokenizerService.CountChatCompletionTokens(text);
            if (tokenCount > availableTokens)
@@ -1,6 +1,6 @@
 using System.Data;
 using Microsoft.EntityFrameworkCore;
-using SqlDatabaseVectorSearch.DataAccessLayer;
+using SqlDatabaseVectorSearch.Data;
 using SqlDatabaseVectorSearch.Models;

 namespace SqlDatabaseVectorSearch.Services;
@@ -1,19 +1,20 @@
 using System.Data;
 using System.Runtime.CompilerServices;
+using System.Text;
+using System.Text.RegularExpressions;
 using Microsoft.EntityFrameworkCore;
 using Microsoft.Extensions.AI;
 using Microsoft.Extensions.Options;
 using SqlDatabaseVectorSearch.ContentDecoders;
-using SqlDatabaseVectorSearch.DataAccessLayer;
+using SqlDatabaseVectorSearch.Data;
 using SqlDatabaseVectorSearch.Models;
 using SqlDatabaseVectorSearch.Settings;
-using SqlDatabaseVectorSearch.TextChunkers;
 using ChatResponse = SqlDatabaseVectorSearch.Models.ChatResponse;
-using Entities = SqlDatabaseVectorSearch.DataAccessLayer.Entities;
+using Entities = SqlDatabaseVectorSearch.Data.Entities;

 namespace SqlDatabaseVectorSearch.Services;

-public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDbContext dbContext, DocumentService documentService, IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator, TokenizerService tokenizerService, ChatService chatService, TimeProvider timeProvider, IOptions<AppSettings> appSettingsOptions, ILogger<VectorSearchService> logger)
+public partial class VectorSearchService(IServiceProvider serviceProvider, ApplicationDbContext dbContext, DocumentService documentService, IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator, TokenizerService tokenizerService, ChatService chatService, TimeProvider timeProvider, IOptions<AppSettings> appSettingsOptions, ILogger<VectorSearchService> logger)
 {
    private readonly AppSettings appSettings = appSettingsOptions.Value;

@@ -21,10 +22,11 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
    {
        // Extract the contents of the file.
        var decoder = serviceProvider.GetKeyedService<IContentDecoder>(contentType) ?? throw new NotSupportedException($"Content type '{contentType}' is not supported.");
-        var content = await decoder.DecodeAsync(stream, contentType, cancellationToken);
+        var chunks = await decoder.DecodeAsync(stream, contentType, cancellationToken);
+        var chunkContents = chunks.Select(p => p.Content).ToList();

        // We get the token count of the whole document because it is the total number of token used by embedding (it may be necessary, for example, for cost analysis).
-        var tokenCount = tokenizerService.CountEmbeddingTokens(content);
+        var tokenCount = tokenizerService.CountEmbeddingTokens(string.Join(" ", chunkContents));

        var strategy = dbContext.Database.CreateExecutionStrategy();
        var document = await strategy.ExecuteAsync(async (cancellationToken) =>
@@ -40,18 +42,33 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
            var document = new Entities.Document { Id = documentId.GetValueOrDefault(), Name = name, CreationDate = timeProvider.GetUtcNow() };
            dbContext.Documents.Add(document);

-            // Split the content into chunks and generate the embeddings for each one.
-            var textChunker = serviceProvider.GetRequiredKeyedService<ITextChunker>(contentType);
-            var paragraphs = textChunker.Split(content);
+            // Process paragraphs in batches.
+            var embeddings = new List<Embedding<float>>();
+            foreach (var batch in chunkContents.Chunk(appSettings.EmbeddingBatchSize))
+            {
+                logger.LogDebug("Processing batch of {Count} chunks for embedding generation...", batch.Length);

-            var embeddings = await embeddingGenerator.GenerateAndZipAsync(paragraphs, cancellationToken: cancellationToken);
+                // Generate embeddings for this batch.
+                var batchEmbeddings = await embeddingGenerator.GenerateAsync(batch, cancellationToken: cancellationToken);
+                embeddings.AddRange(batchEmbeddings);
+            }

            // Save the document chunks and the corresponding embedding in the database.
            foreach (var (index, embedding) in embeddings.Index())
            {
-                logger.LogDebug("Storing a paragraph of {TokenCount} tokens.", tokenizerService.CountChatCompletionTokens(embedding.Value));
+                var chunk = chunks.ElementAt(index);
+                logger.LogDebug("Storing a chunk of {TokenCount} tokens.", tokenizerService.CountChatCompletionTokens(chunk.Content));
+
+                var documentChunk = new Entities.DocumentChunk
+                {
+                    Document = document,
+                    Index = index,
+                    PageNumber = chunk.PageNumber,
+                    IndexOnPage = chunk.IndexOnPage,
+                    Content = chunk.Content,
+                    Embedding = embedding.Vector.ToArray()
+                };

-                var documentChunk = new Entities.DocumentChunk { Document = document, Index = index, Content = embedding.Value, Embedding = embedding.Embedding.Vector.ToArray() };
                dbContext.DocumentChunks.Add(documentChunk);
            }

@@ -64,17 +81,20 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
        return new(document.Id, tokenCount);
    }

-    public async Task<QuestionResponse> AskQuestionAsync(Question question, bool reformulate = true, CancellationToken cancellationToken = default)
+    public async Task<Response> AskQuestionAsync(Question question, bool reformulate = true, CancellationToken cancellationToken = default)
    {
        // It the user doesn't want to reforulate the question, CreateContextAsync returns the original one.
        var (reformulatedQuestion, embeddingTokenCount, chunks) = await CreateContextAsync(question, reformulate, cancellationToken);

-        var (answer, tokenUsage) = await chatService.AskQuestionAsync(question.ConversationId, chunks, reformulatedQuestion.Text!, cancellationToken);
+        var (fullAnswer, tokenUsage) = await chatService.AskQuestionAsync(question.ConversationId, chunks, reformulatedQuestion.Text!, cancellationToken);

-        return new(question.Text, reformulatedQuestion.Text!, answer, null, new(reformulatedQuestion.TokenUsage, embeddingTokenCount, tokenUsage));
+        // Extract citations from the answer
+        var (answer, citations) = ExtractCitations(fullAnswer);
+
+        return new(question.Text, reformulatedQuestion.Text!, answer, StreamState.End, new(reformulatedQuestion.TokenUsage, embeddingTokenCount, tokenUsage), citations);
    }

-    public async IAsyncEnumerable<QuestionResponse> AskStreamingAsync(Question question, bool reformulate = true, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    public async IAsyncEnumerable<Response> AskStreamingAsync(Question question, bool reformulate = true, [EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        // It the user doesn't want to reforulate the question, CreateContextAsync returns the original one.
        var (reformulatedQuestion, embeddingTokenCount, chunks) = await CreateContextAsync(question, reformulate, cancellationToken);
@@ -85,23 +105,41 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
        yield return new(question.Text, reformulatedQuestion.Text!, null, StreamState.Start, new(reformulatedQuestion.TokenUsage, embeddingTokenCount, null));

        TokenUsageResponse? tokenUsageResponse = null;
+        var fullAnswer = new StringBuilder();
+        var citationsStarted = false;

-        // Return each token as a partial response.
+        // Returns each token as a partial response.
        await foreach (var (token, tokenUsage) in answerStream)
        {
-            // Token usage is expected in the last message.
-            tokenUsageResponse = tokenUsage is not null ? new(tokenUsage) : null;
-            yield return new(token, tokenUsageResponse is null ? StreamState.Append : StreamState.End, tokenUsageResponse);
+            if (token is not null) // token can be null when the stream ends. 
+            {
+                fullAnswer.Append(token);
+
+                if (token.Contains('【'))
+                {
+                    // Citations start when we encounter a token containing a 【 character.
+                    // We need to track it because we don't want to return the citations in the actual response.
+                    citationsStarted = true;
+                }
+
+                if (!citationsStarted)
+                {
+                    yield return new(token, StreamState.Append);
+                }
+            }
+            else
+            {
+                // Token usage is expected in the last message, when token is null.
+                tokenUsageResponse ??= tokenUsage is not null ? new(tokenUsage) : null;
+            }
        }

-        // If the token usage has not been returned in the last message, we must explicitly tells that the stream is ended.
-        if (tokenUsageResponse is null)
-        {
-            yield return new(null, StreamState.End);
-        }
+        // Extract citations at the end of streaming.
+        var (_, citations) = ExtractCitations(fullAnswer.ToString());
+        yield return new(null, StreamState.End, tokenUsageResponse, citations);
    }

-    private async Task<(ChatResponse ReformulatedQuestion, int EmbeddingTokenCount, IEnumerable<string> Chunks)> CreateContextAsync(Question question, bool reformulate, CancellationToken cancellationToken)
+    private async Task<(ChatResponse ReformulatedQuestion, int EmbeddingTokenCount, IEnumerable<Entities.DocumentChunk> Chunks)> CreateContextAsync(Question question, bool reformulate, CancellationToken cancellationToken)
    {
        // Reformulate the question taking into account the context of the chat to perform keyword search and embeddings.
        var reformulatedQuestion = reformulate ? await chatService.CreateQuestionAsync(question.ConversationId, question.Text, cancellationToken) : new(question.Text);
@@ -112,12 +150,49 @@ public class VectorSearchService(IServiceProvider serviceProvider, ApplicationDb
        // Perform Vector Search on SQL Database.
        var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken);

-        var chunks = await dbContext.DocumentChunks
+        var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
                    .OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, questionEmbedding.ToArray()))
-                    .Select(c => c.Content)
                    .Take(appSettings.MaxRelevantChunks)
                    .ToListAsync(cancellationToken);

        return (reformulatedQuestion, embeddingTokenCount, chunks);
    }
+
+    private static (string, IEnumerable<Citation>) ExtractCitations(string? text)
+    {
+        var citations = new List<Citation>();
+
+        if (string.IsNullOrEmpty(text))
+        {
+            return (text ?? string.Empty, citations);
+        }
+
+        var matches = CitationRegEx.Matches(text);
+
+        foreach (Match match in matches)
+        {
+            if (match.Success)
+            {
+                citations.Add(new Citation
+                {
+                    DocumentId = Guid.Parse(match.Groups["documentId"].Value),
+                    ChunkId = Guid.Parse(match.Groups["chunkId"].Value),
+                    FileName = match.Groups["filename"].Value,
+                    PageNumber = int.TryParse(match.Groups["pageNumber"].Value, out var pageNumber) && pageNumber > 0 ? pageNumber : null,
+                    IndexOnPage = int.TryParse(match.Groups["indexOnPage"].Value, out var indexOnPage) ? indexOnPage : 0,
+                    Quote = match.Groups["quote"].Value
+                });
+            }
+        }
+
+        // Remove all content between 【 and 】.
+        var cleanText = RemoveCitationsRegEx.Replace(text, string.Empty).TrimEnd();
+        return (cleanText, citations);
+    }
+
+    [GeneratedRegex(@"<citation\s+document-id=(?:""|'|)(?<documentId>[^""']*)(?:""|'|)\s+chunk-id=(?:""|'|)(?<chunkId>[^""']*)(?:""|'|)\s+filename=(?:""|'|)(?<filename>[^""']*)(?:""|'|)\s+page-number=(?:""|'|)(?<pageNumber>[^""']*)(?:""|'|)\s+index-on-page=(?:""|'|)(?<indexOnPage>[^""']*)(?:""|'|)>\s*(?<quote>.*?)\s*</citation>", RegexOptions.Singleline)]
+    private static partial Regex CitationRegEx { get; }
+
+    [GeneratedRegex(@"【.*?】", RegexOptions.Singleline)]
+    private static partial Regex RemoveCitationsRegEx { get; }
 }
@@ -2,6 +2,8 @@

 public class AppSettings
 {
+    public int EmbeddingBatchSize { get; init; } = 32;
+
    public int MaxTokensPerLine { get; init; } = 300;

    public int MaxTokensPerParagraph { get; init; } = 1000;
@@ -13,24 +13,24 @@
        <PackageReference Include="EFCore.SqlServer.VectorSearch" Version="9.0.0-preview.2" />
        <PackageReference Include="EntityFrameworkCore.Exceptions.SqlServer" Version="8.1.3" />
        <PackageReference Include="FluentValidation.DependencyInjectionExtensions" Version="12.0.0" />
-        <PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="9.0.5" />
-        <PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="9.0.5" />
-        <PackageReference Include="Microsoft.EntityFrameworkCore.Tools" Version="9.0.5">
+        <PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="9.0.6" />
+        <PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="9.0.6" />
+        <PackageReference Include="Microsoft.EntityFrameworkCore.Tools" Version="9.0.6">
            <PrivateAssets>all</PrivateAssets>
            <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
        </PackageReference>
-        <PackageReference Include="Microsoft.Extensions.Caching.Hybrid" Version="9.5.0" />
-        <PackageReference Include="Microsoft.Extensions.Http.Resilience" Version="9.5.0" />
+        <PackageReference Include="Microsoft.Extensions.Caching.Hybrid" Version="9.6.0" />
+        <PackageReference Include="Microsoft.Extensions.Http.Resilience" Version="9.6.0" />
        <PackageReference Include="Microsoft.ML.Tokenizers" Version="1.0.2" />
        <PackageReference Include="Microsoft.ML.Tokenizers.Data.Cl100kBase" Version="1.0.2" />
        <PackageReference Include="Microsoft.ML.Tokenizers.Data.O200kBase" Version="1.0.2" />
-        <PackageReference Include="Microsoft.SemanticKernel" Version="1.55.0" />
+        <PackageReference Include="Microsoft.SemanticKernel" Version="1.57.0" />
        <PackageReference Include="MimeMapping" Version="3.1.0" />
        <PackageReference Include="MinimalHelpers.FluentValidation" Version="1.1.3" />
        <PackageReference Include="MinimalHelpers.Routing.Analyzers" Version="1.1.3" />
        <PackageReference Include="PdfPig" Version="0.1.10" />
-        <PackageReference Include="Swashbuckle.AspNetCore.SwaggerUI" Version="8.1.3" />
-        <PackageReference Include="TinyHelpers.AspNetCore" Version="4.0.26" />
+        <PackageReference Include="Swashbuckle.AspNetCore.SwaggerUI" Version="9.0.1" />
+        <PackageReference Include="TinyHelpers.AspNetCore" Version="4.0.29" />
    </ItemGroup>

 </Project>
@@ -1,7 +1,7 @@
 using FluentValidation;
 using SqlDatabaseVectorSearch.Models;

-namespace SqlDatabaseVectorSearch.Validators;
+namespace SqlDatabaseVectorSearch.Validations;

 public class QuestionValidator : AbstractValidator<Question>
 {
@@ -6,7 +6,7 @@
        "ChatCompletion": {
            "Endpoint": "",
            "Deployment": "",
-            "ModelId": "", // o1, gpt-4o, gpt-4o-mini, gpt-4, gpt-3.5 
+            "ModelId": "", // gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, gpt-4o, gpt-4o-mini, gpt-4, gpt-3.5 
            "ApiKey": ""
        },
        "Embedding": {
@@ -20,11 +20,12 @@
        }
    },
    "AppSettings": {
+        "EmbeddingBatchSize": 32,
        "MaxTokensPerLine": 300,
        "MaxTokensPerParagraph": 1000,
        "OverlapTokens": 100,
-        "MaxRelevantChunks": 10,
-        "MaxInputTokens": 16384,
+        "MaxRelevantChunks": 50,
+        "MaxInputTokens": 32768,
        "MaxOutputTokens": 800,
        "MessageExpiration": "00:05:00",
        "MessageLimit": 20
@@ -62,3 +62,9 @@ h1:focus {
    .blazor-error-boundary::after {
        content: "An error has occurred."
    }
+
+.citation-box {
+    width: fit-content;
+    max-width: 100%;
+    background-color: #f8f9fa;
+}