From b155f8eb2ec2b92989fd0f7772ce05c3ff02a917 Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Mon, 17 Feb 2025 12:40:06 +0100 Subject: [PATCH 1/8] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a62f962..7faa62a 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The application is a Minimal API that exposes endpoints to load documents, gener > [!NOTE] > If you prefer to use straight SQL, check out the [sql branch](https://github.com/marcominerva/SqlDatabaseVectorSearch/tree/sql). -![SQL Database Vector Search](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch.png) +![SQL Database Vector Search](https://raw.githubusercontent.com/marcominerva/SqlDatabaseVectorSearch/refs/heads/master/SqlDatabaseVectorSearch.png) ## Setup @@ -143,4 +143,4 @@ When using the `/api/ask-streaming` endpoint, answers will be streamed as happen - each one contains a token - The *streamState* property is set to `Append` - *origianlQuestion*, *reformulatedQuestion* and *tokenUsage* are always `null` -- The stream ends when an element with *streamState* equals to `End` is received. This element contains token usage information for the question and the whole answer. \ No newline at end of file +- The stream ends when an element with *streamState* equals to `End` is received. This element contains token usage information for the question and the whole answer. From a0c777c138c306a054894ff84588defcfbc653a3 Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Mon, 17 Feb 2025 12:45:41 +0100 Subject: [PATCH 2/8] Update README.md --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 7faa62a..962f844 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,6 @@ A repository that showcases the native VECTOR type in Azure SQL Database to perf The application is a Minimal API that exposes endpoints to load documents, generate embeddings and save them into the database as Vectors, and perform searches using Vector Search and RAG. Currently, PDF, DOCX, TXT and MD files are supported. Vectors are saved and retrieved with Entity Framework Core using the [EFCore.SqlServer.VectorSearch](https://github.com/efcore/EfCore.SqlServer.VectorSearch) library. Embedding and Chat Completion are integrated with [Semantic Kernel](https://github.com/microsoft/semantic-kernel). -> [!NOTE] -> If you prefer to use straight SQL, check out the [sql branch](https://github.com/marcominerva/SqlDatabaseVectorSearch/tree/sql). - ![SQL Database Vector Search](https://raw.githubusercontent.com/marcominerva/SqlDatabaseVectorSearch/refs/heads/master/SqlDatabaseVectorSearch.png) ## Setup From 79e7ddf8b1603c1fc4badd20e623d852e2d4ca5c Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Wed, 19 Feb 2025 17:36:46 +0100 Subject: [PATCH 3/8] Create docs.md --- docs.md | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 docs.md diff --git a/docs.md b/docs.md new file mode 100644 index 0000000..fcf9f37 --- /dev/null +++ b/docs.md @@ -0,0 +1,138 @@ +# SQL Database Vector Search + +## Setup + +- [Create an Azure SQL Database](https://learn.microsoft.com/en-us/azure/azure-sql/database/single-database-create-quickstart) on a server that has the Vector Support feature enabled +- Execute the [Scripts.sql](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/Scripts.sql) file to create the tables needed by the application + - You may need to update the size of the [`VECTOR`](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/Scripts.sql#L17) column to match the size of the embedding model. Currently, the maximum allowed value is 1998. +- Open the [appsettings.json](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/appsettings.json) file and set the connection string to the database and the other settings required by Azure OpenAI + - If your embedding model supports shortening, like **text-embedding-3-small** and **text-embedding-3-large**, and you want to use this feature, you need to set the [`Dimensions`](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/appsettings.json#L17) property to match the value you have used in the SQL script. If your model doesn't provide this feature, or do you want to use the default size, just leave the [`Dimensions`](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/appsettings.json#L17) property to NULL. Keep in mind that **text-embedding-3-small** has a dimension of 1536, while **text-embedding-3-large** uses vectors with 3072 elements, so with this latter model it is mandatory to specify a value (that, as said, must be less or equal to 1998). +- Run the application and start importing your documents with `/api/documents` endpoint. +- Ask questions using `/api/ask` or `/api/ask-streaming` endpoints. + +## Supported features + +- Conversation history with question reformulation +- Information about token usage +- Response streaming + +```json +{ + "originalQuestion": "why is mars called the red planet?", + "reformulatedQuestion": "Why is Mars referred to as the Red Planet?", + "answer": "Mars is referred to as the Red Planet due to its characteristic reddish color, which is caused by the abundance of iron oxide (rust) on its surface. This distinctive coloration has also been a significant factor in the cultural and mythological associations of Mars across different civilizations.", + "streamState": null, + "tokenUsage": { + "reformulation": { + "inputTokenCount": 107, + "outputTokenCount": 10, + "totalTokenCount": 117 + }, + "embeddingTokenCount": 10, + "question": { + "inputTokenCount": 9142, + "outputTokenCount": 53, + "totalTokenCount": 9195 + } + } +} +``` + +### How response streaming works + +When using the `/api/ask-streaming` endpoint, answers will be streamed as happens with the typical response from OpenAI. The format of the response is the following: + +```json +[ + { + "originalQuestion": "why is mars called the red planet?", + "reformulatedQuestion": "Why is Mars referred to as the Red Planet?", + "answer": null, + "streamState": "Start", + "tokenUsage": { + "reformulation": { + "inputTokenCount": 107, + "outputTokenCount": 10, + "totalTokenCount": 117 + }, + "embeddingTokenCount": 10, + "question": null + } + }, + { + "originalQuestion": null, + "reformulatedQuestion": null, + "answer": "Mars", + "streamState": "Append", + "tokenUsage": null + }, + { + "originalQuestion": null, + "reformulatedQuestion": null, + "answer": " is", + "streamState": "Append", + "tokenUsage": null + }, + { + "originalQuestion": null, + "reformulatedQuestion": null, + "answer": " called", + "streamState": "Append", + "tokenUsage": null + }, + { + "originalQuestion": null, + "reformulatedQuestion": null, + "answer": " the", + "streamState": "Append", + "tokenUsage": null + }, + { + "originalQuestion": null, + "reformulatedQuestion": null, + "answer": " Red", + "streamState": "Append", + "tokenUsage": null + }, + { + "originalQuestion": null, + "reformulatedQuestion": null, + "answer": " Planet", + "streamState": "Append", + "tokenUsage": null + }, + //... + { + "originalQuestion": null, + "reformulatedQuestion": null, + "answer": ".", + "streamState": "Append", + "tokenUsage": null + }, + { + "originalQuestion": null, + "reformulatedQuestion": null, + "answer": null, + "streamState": "End", + "tokenUsage": { + "reformulation": null, + "embeddingTokenCount": null, + "question": { + "inputTokenCount": 8986, + "outputTokenCount": 31, + "totalTokenCount": 9017 + } + } + } +] +``` + +- The first piece of the response has the following characteristics: + - the *streamState* property is set to `Start`, + - it contains the question and its reformulation (if not requested, *reformulatedQuestion* will be equals to *originalQuestion*) + - the *tokenUsage* section holds information about token used for reformulation (if done) and for the embedding of the question +- Then, there are as many elements for the actual answer as necessary: + - each one contains a token + - The *streamState* property is set to `Append` + - *origianlQuestion*, *reformulatedQuestion* and *tokenUsage* are always `null` +- The stream ends when an element with *streamState* equals to `End` is received. This element contains token usage information for the question and the whole answer. From a9028929ebd5433dd1399b1e432546ff0672ccb6 Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Wed, 19 Feb 2025 17:37:17 +0100 Subject: [PATCH 4/8] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 962f844..7faa62a 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,9 @@ A repository that showcases the native VECTOR type in Azure SQL Database to perf The application is a Minimal API that exposes endpoints to load documents, generate embeddings and save them into the database as Vectors, and perform searches using Vector Search and RAG. Currently, PDF, DOCX, TXT and MD files are supported. Vectors are saved and retrieved with Entity Framework Core using the [EFCore.SqlServer.VectorSearch](https://github.com/efcore/EfCore.SqlServer.VectorSearch) library. Embedding and Chat Completion are integrated with [Semantic Kernel](https://github.com/microsoft/semantic-kernel). +> [!NOTE] +> If you prefer to use straight SQL, check out the [sql branch](https://github.com/marcominerva/SqlDatabaseVectorSearch/tree/sql). + ![SQL Database Vector Search](https://raw.githubusercontent.com/marcominerva/SqlDatabaseVectorSearch/refs/heads/master/SqlDatabaseVectorSearch.png) ## Setup From e1d83f1051d2c3e793415b05e4d15ef6ba0ef33c Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Thu, 20 Feb 2025 09:09:05 +0100 Subject: [PATCH 5/8] Delete docs.md --- docs.md | 138 -------------------------------------------------------- 1 file changed, 138 deletions(-) delete mode 100644 docs.md diff --git a/docs.md b/docs.md deleted file mode 100644 index fcf9f37..0000000 --- a/docs.md +++ /dev/null @@ -1,138 +0,0 @@ -# SQL Database Vector Search - -## Setup - -- [Create an Azure SQL Database](https://learn.microsoft.com/en-us/azure/azure-sql/database/single-database-create-quickstart) on a server that has the Vector Support feature enabled -- Execute the [Scripts.sql](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/Scripts.sql) file to create the tables needed by the application - - You may need to update the size of the [`VECTOR`](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/Scripts.sql#L17) column to match the size of the embedding model. Currently, the maximum allowed value is 1998. -- Open the [appsettings.json](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/appsettings.json) file and set the connection string to the database and the other settings required by Azure OpenAI - - If your embedding model supports shortening, like **text-embedding-3-small** and **text-embedding-3-large**, and you want to use this feature, you need to set the [`Dimensions`](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/appsettings.json#L17) property to match the value you have used in the SQL script. If your model doesn't provide this feature, or do you want to use the default size, just leave the [`Dimensions`](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch/appsettings.json#L17) property to NULL. Keep in mind that **text-embedding-3-small** has a dimension of 1536, while **text-embedding-3-large** uses vectors with 3072 elements, so with this latter model it is mandatory to specify a value (that, as said, must be less or equal to 1998). -- Run the application and start importing your documents with `/api/documents` endpoint. -- Ask questions using `/api/ask` or `/api/ask-streaming` endpoints. - -## Supported features - -- Conversation history with question reformulation -- Information about token usage -- Response streaming - -```json -{ - "originalQuestion": "why is mars called the red planet?", - "reformulatedQuestion": "Why is Mars referred to as the Red Planet?", - "answer": "Mars is referred to as the Red Planet due to its characteristic reddish color, which is caused by the abundance of iron oxide (rust) on its surface. This distinctive coloration has also been a significant factor in the cultural and mythological associations of Mars across different civilizations.", - "streamState": null, - "tokenUsage": { - "reformulation": { - "inputTokenCount": 107, - "outputTokenCount": 10, - "totalTokenCount": 117 - }, - "embeddingTokenCount": 10, - "question": { - "inputTokenCount": 9142, - "outputTokenCount": 53, - "totalTokenCount": 9195 - } - } -} -``` - -### How response streaming works - -When using the `/api/ask-streaming` endpoint, answers will be streamed as happens with the typical response from OpenAI. The format of the response is the following: - -```json -[ - { - "originalQuestion": "why is mars called the red planet?", - "reformulatedQuestion": "Why is Mars referred to as the Red Planet?", - "answer": null, - "streamState": "Start", - "tokenUsage": { - "reformulation": { - "inputTokenCount": 107, - "outputTokenCount": 10, - "totalTokenCount": 117 - }, - "embeddingTokenCount": 10, - "question": null - } - }, - { - "originalQuestion": null, - "reformulatedQuestion": null, - "answer": "Mars", - "streamState": "Append", - "tokenUsage": null - }, - { - "originalQuestion": null, - "reformulatedQuestion": null, - "answer": " is", - "streamState": "Append", - "tokenUsage": null - }, - { - "originalQuestion": null, - "reformulatedQuestion": null, - "answer": " called", - "streamState": "Append", - "tokenUsage": null - }, - { - "originalQuestion": null, - "reformulatedQuestion": null, - "answer": " the", - "streamState": "Append", - "tokenUsage": null - }, - { - "originalQuestion": null, - "reformulatedQuestion": null, - "answer": " Red", - "streamState": "Append", - "tokenUsage": null - }, - { - "originalQuestion": null, - "reformulatedQuestion": null, - "answer": " Planet", - "streamState": "Append", - "tokenUsage": null - }, - //... - { - "originalQuestion": null, - "reformulatedQuestion": null, - "answer": ".", - "streamState": "Append", - "tokenUsage": null - }, - { - "originalQuestion": null, - "reformulatedQuestion": null, - "answer": null, - "streamState": "End", - "tokenUsage": { - "reformulation": null, - "embeddingTokenCount": null, - "question": { - "inputTokenCount": 8986, - "outputTokenCount": 31, - "totalTokenCount": 9017 - } - } - } -] -``` - -- The first piece of the response has the following characteristics: - - the *streamState* property is set to `Start`, - - it contains the question and its reformulation (if not requested, *reformulatedQuestion* will be equals to *originalQuestion*) - - the *tokenUsage* section holds information about token used for reformulation (if done) and for the embedding of the question -- Then, there are as many elements for the actual answer as necessary: - - each one contains a token - - The *streamState* property is set to `Append` - - *origianlQuestion*, *reformulatedQuestion* and *tokenUsage* are always `null` -- The stream ends when an element with *streamState* equals to `End` is received. This element contains token usage information for the question and the whole answer. From b1aa81e4ecba6d6927dc9a98c73c910475c60191 Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Thu, 20 Feb 2025 10:06:11 +0100 Subject: [PATCH 6/8] Enhance question reformulation in ChatService Updated the `embeddingQuestion` string to require that the reformulated question explicitly contains the subject of the original question. The reformulation must also be in the same language as the user's question and should not include phrases like "in this chat" or "search for." --- SqlDatabaseVectorSearch/Services/ChatService.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/SqlDatabaseVectorSearch/Services/ChatService.cs b/SqlDatabaseVectorSearch/Services/ChatService.cs index 172cd96..25da2b6 100644 --- a/SqlDatabaseVectorSearch/Services/ChatService.cs +++ b/SqlDatabaseVectorSearch/Services/ChatService.cs @@ -23,6 +23,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer --- {question} --- + The reformulation must always explicitly contain the subject of the question. You must reformulate the question in the same language of the user's question. For example, it the user asks a question in English, the answer must be in English. Never add "in this chat", "in the context of this chat", "in the context of our conversation", "search for" or something like that in your answer. """; From 8472775333f34c88363d683571b7717076befad1 Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Thu, 20 Feb 2025 10:30:55 +0100 Subject: [PATCH 7/8] Add message limit to chat history management Updated `ChatService` to enforce a message limit based on the new `MessageLimit` property in `AppSettings`. Excess messages are removed before updating the cache to optimize performance. Adjusted `appsettings.json` to reflect the new configuration, changing `MaxInputTokens` from 16385 to 16384 and adding `MessageLimit` with a default value of 20. --- SqlDatabaseVectorSearch/Services/ChatService.cs | 9 ++++++++- SqlDatabaseVectorSearch/Settings/AppSettings.cs | 2 ++ SqlDatabaseVectorSearch/appsettings.json | 5 +++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/SqlDatabaseVectorSearch/Services/ChatService.cs b/SqlDatabaseVectorSearch/Services/ChatService.cs index 25da2b6..973fb2d 100644 --- a/SqlDatabaseVectorSearch/Services/ChatService.cs +++ b/SqlDatabaseVectorSearch/Services/ChatService.cs @@ -156,7 +156,14 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer } private async Task UpdateCacheAsync(Guid conversationId, ChatHistory chat, CancellationToken cancellationToken) - => await cache.SetAsync(conversationId.ToString(), chat, cancellationToken: cancellationToken); + { + if (chat.Count > appSettings.MessageLimit) + { + chat.RemoveRange(0, chat.Count - appSettings.MessageLimit); + } + + await cache.SetAsync(conversationId.ToString(), chat, cancellationToken: cancellationToken); + } private async Task GetChatHistoryAsync(Guid conversationId, CancellationToken cancellationToken) { diff --git a/SqlDatabaseVectorSearch/Settings/AppSettings.cs b/SqlDatabaseVectorSearch/Settings/AppSettings.cs index 76ed3ad..a75a26e 100644 --- a/SqlDatabaseVectorSearch/Settings/AppSettings.cs +++ b/SqlDatabaseVectorSearch/Settings/AppSettings.cs @@ -15,4 +15,6 @@ public class AppSettings public int MaxOutputTokens { get; init; } = 800; public TimeSpan MessageExpiration { get; init; } + + public int MessageLimit { get; set; } = 20; } diff --git a/SqlDatabaseVectorSearch/appsettings.json b/SqlDatabaseVectorSearch/appsettings.json index 4df5bad..52ff3cf 100644 --- a/SqlDatabaseVectorSearch/appsettings.json +++ b/SqlDatabaseVectorSearch/appsettings.json @@ -24,9 +24,10 @@ "MaxTokensPerParagraph": 1000, "OverlapTokens": 100, "MaxRelevantChunks": 10, - "MaxInputTokens": 16385, + "MaxInputTokens": 16384, "MaxOutputTokens": 800, - "MessageExpiration": "00:05:00" + "MessageExpiration": "00:05:00", + "MessageLimit": 20 }, "Logging": { "LogLevel": { From 7922fff402042a8ebf829c28138d1e7a9ec974cb Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Thu, 20 Feb 2025 10:36:58 +0100 Subject: [PATCH 8/8] Enhance question reformulation instructions Added a line to the `embeddingQuestion` string to emphasize that the reformulated question must explicitly contain the subject. This change clarifies the requirements for the chat service's functionality. --- SqlDatabaseVectorSearch/Services/ChatService.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SqlDatabaseVectorSearch/Services/ChatService.cs b/SqlDatabaseVectorSearch/Services/ChatService.cs index 973fb2d..0dc6e3e 100644 --- a/SqlDatabaseVectorSearch/Services/ChatService.cs +++ b/SqlDatabaseVectorSearch/Services/ChatService.cs @@ -23,7 +23,7 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer --- {question} --- - The reformulation must always explicitly contain the subject of the question. + The reformulation must always explicitly contain the subject of the question. You must reformulate the question in the same language of the user's question. For example, it the user asks a question in English, the answer must be in English. Never add "in this chat", "in the context of this chat", "in the context of our conversation", "search for" or something like that in your answer. """;