mirror of
https://github.com/marcominerva/SqlDatabaseVectorSearch.git
synced 2026-06-20 12:23:10 +00:00
Add Markdown support and refactor text chunking
Updated README.md to include Markdown file support. Introduced new endpoint for uploading Markdown documents with MIME type handling. Removed TextChunkerService and created DefaultTextChunker and MarkdownTextChunker classes implementing ITextChunker. Updated VectorSearchService to utilize the new chunking interface. Added MimeMapping package reference in the project file.
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
using Microsoft.SemanticKernel.Text;
|
||||
using SqlDatabaseVectorSearch.Services;
|
||||
using SqlDatabaseVectorSearch.Settings;
|
||||
|
||||
namespace SqlDatabaseVectorSearch.TextChunkers;
|
||||
|
||||
public class DefaultTextChunker(TokenizerService tokenizerService, IOptions<AppSettings> appSettingsOptions) : ITextChunker
|
||||
{
|
||||
private readonly AppSettings appSettings = appSettingsOptions.Value;
|
||||
|
||||
public IList<string> Split(string text)
|
||||
{
|
||||
var lines = TextChunker.SplitPlainTextLines(text, appSettings.MaxTokensPerLine, tokenizerService.CountChatCompletionTokens);
|
||||
var paragraphs = TextChunker.SplitPlainTextParagraphs(lines, appSettings.MaxTokensPerParagraph, appSettings.OverlapTokens, tokenCounter: tokenizerService.CountChatCompletionTokens);
|
||||
|
||||
return paragraphs;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace SqlDatabaseVectorSearch.TextChunkers;
|
||||
|
||||
public interface ITextChunker
|
||||
{
|
||||
IList<string> Split(string text);
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
using Microsoft.SemanticKernel.Text;
|
||||
using SqlDatabaseVectorSearch.Services;
|
||||
using SqlDatabaseVectorSearch.Settings;
|
||||
|
||||
namespace SqlDatabaseVectorSearch.TextChunkers;
|
||||
|
||||
public class MarkdownTextChunker(TokenizerService tokenizerService, IOptions<AppSettings> appSettingsOptions) : ITextChunker
|
||||
{
|
||||
private readonly AppSettings appSettings = appSettingsOptions.Value;
|
||||
|
||||
public IList<string> Split(string text)
|
||||
{
|
||||
var lines = TextChunker.SplitMarkDownLines(text, appSettings.MaxTokensPerLine, tokenizerService.CountChatCompletionTokens);
|
||||
var paragraphs = TextChunker.SplitMarkdownParagraphs(lines, appSettings.MaxTokensPerParagraph, appSettings.OverlapTokens, tokenCounter: tokenizerService.CountChatCompletionTokens);
|
||||
|
||||
return paragraphs;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user