diff --git a/README.md b/README.md index ccb1791..95720a0 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The application is a Minimal API that exposes endpoints to load documents, gener > [!NOTE] > If you prefer to use straight SQL, check out the [sql branch](https://github.com/marcominerva/SqlDatabaseVectorSearch/tree/sql). -![SQL Database Vector Search](https://github.com/marcominerva/SqlDatabaseVectorSearch/blob/master/SqlDatabaseVectorSearch.png) +![SQL Database Vector Search](https://raw.githubusercontent.com/marcominerva/SqlDatabaseVectorSearch/refs/heads/master/SqlDatabaseVectorSearch.png) ## Setup @@ -143,4 +143,4 @@ When using the `/api/ask-streaming` endpoint, answers will be streamed as happen - each one contains a token - The *streamState* property is set to `Append` - *origianlQuestion*, *reformulatedQuestion* and *tokenUsage* are always `null` -- The stream ends when an element with *streamState* equals to `End` is received. This element contains token usage information for the question and the whole answer. \ No newline at end of file +- The stream ends when an element with *streamState* equals to `End` is received. This element contains token usage information for the question and the whole answer. diff --git a/SqlDatabaseVectorSearch/Services/ChatService.cs b/SqlDatabaseVectorSearch/Services/ChatService.cs index d830954..0dc6e3e 100644 --- a/SqlDatabaseVectorSearch/Services/ChatService.cs +++ b/SqlDatabaseVectorSearch/Services/ChatService.cs @@ -156,7 +156,14 @@ public class ChatService(IChatCompletionService chatCompletionService, Tokenizer } private async Task UpdateCacheAsync(Guid conversationId, ChatHistory chat, CancellationToken cancellationToken) - => await cache.SetAsync(conversationId.ToString(), chat, cancellationToken: cancellationToken); + { + if (chat.Count > appSettings.MessageLimit) + { + chat.RemoveRange(0, chat.Count - appSettings.MessageLimit); + } + + await cache.SetAsync(conversationId.ToString(), chat, cancellationToken: cancellationToken); + } private async Task GetChatHistoryAsync(Guid conversationId, CancellationToken cancellationToken) { diff --git a/SqlDatabaseVectorSearch/Settings/AppSettings.cs b/SqlDatabaseVectorSearch/Settings/AppSettings.cs index 76ed3ad..a75a26e 100644 --- a/SqlDatabaseVectorSearch/Settings/AppSettings.cs +++ b/SqlDatabaseVectorSearch/Settings/AppSettings.cs @@ -15,4 +15,6 @@ public class AppSettings public int MaxOutputTokens { get; init; } = 800; public TimeSpan MessageExpiration { get; init; } + + public int MessageLimit { get; set; } = 20; } diff --git a/SqlDatabaseVectorSearch/appsettings.json b/SqlDatabaseVectorSearch/appsettings.json index 4df5bad..52ff3cf 100644 --- a/SqlDatabaseVectorSearch/appsettings.json +++ b/SqlDatabaseVectorSearch/appsettings.json @@ -24,9 +24,10 @@ "MaxTokensPerParagraph": 1000, "OverlapTokens": 100, "MaxRelevantChunks": 10, - "MaxInputTokens": 16385, + "MaxInputTokens": 16384, "MaxOutputTokens": 800, - "MessageExpiration": "00:05:00" + "MessageExpiration": "00:05:00", + "MessageLimit": 20 }, "Logging": { "LogLevel": {