diff --git a/BotSharp.sln b/BotSharp.sln
index e992d26ad..102137084 100644
--- a/BotSharp.sln
+++ b/BotSharp.sln
@@ -149,6 +149,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ExcelHandle
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ImageHandler", "src\Plugins\BotSharp.Plugin.ImageHandler\BotSharp.Plugin.ImageHandler.csproj", "{242F2D93-FCCE-4982-8075-F3052ECCA92C}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.FuzzySharp", "src\Plugins\BotSharp.Plugin.FuzzySharp\BotSharp.Plugin.FuzzySharp.csproj", "{E7C243B9-E751-B3B4-8F16-95C76CA90D31}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -629,6 +631,14 @@ Global
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|Any CPU.Build.0 = Release|Any CPU
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|x64.ActiveCfg = Release|Any CPU
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|x64.Build.0 = Release|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|x64.Build.0 = Debug|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|Any CPU.Build.0 = Release|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|x64.ActiveCfg = Release|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|x64.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -701,6 +711,7 @@ Global
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
{FC63C875-E880-D8BB-B8B5-978AB7B62983} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
{242F2D93-FCCE-4982-8075-F3052ECCA92C} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {A9969D89-C98B-40A5-A12B-FC87E55B3A19}
diff --git a/Directory.Packages.props b/Directory.Packages.props
index c7ef3907b..53e2bb5be 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -5,6 +5,8 @@
true
+
+
@@ -18,6 +20,7 @@
+
diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseCollection.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseCollection.cs
new file mode 100644
index 000000000..9238e3220
--- /dev/null
+++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseCollection.cs
@@ -0,0 +1,7 @@
+namespace BotSharp.Abstraction.Knowledges;
+
+public interface IPhraseCollection
+{
+ Task>> LoadVocabularyAsync();
+ Task> LoadSynonymMappingAsync();
+}
diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseService.cs
new file mode 100644
index 000000000..1ca84024a
--- /dev/null
+++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseService.cs
@@ -0,0 +1,6 @@
+namespace BotSharp.Abstraction.Knowledges;
+
+public interface IPhraseService
+{
+ Task> SearchPhrasesAsync(string term);
+}
\ No newline at end of file
diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/SearchPhrasesResult.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/SearchPhrasesResult.cs
new file mode 100644
index 000000000..64dc0c18f
--- /dev/null
+++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/SearchPhrasesResult.cs
@@ -0,0 +1,11 @@
+
+namespace BotSharp.Abstraction.Knowledges.Models;
+
+public class SearchPhrasesResult
+{
+ public string Token { get; set; } = string.Empty;
+ public List Sources { get; set; } = new();
+ public string CanonicalForm { get; set; } = string.Empty;
+ public string MatchType { get; set; } = string.Empty;
+ public double Confidence { get; set; }
+}
\ No newline at end of file
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj b/src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj
new file mode 100644
index 000000000..8561dc204
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj
@@ -0,0 +1,21 @@
+
+
+
+ $(TargetFramework)
+ enable
+ $(LangVersion)
+ $(BotSharpVersion)
+ $(GeneratePackageOnBuild)
+ $(GenerateDocumentationFile)
+ $(SolutionDir)packages
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs
new file mode 100644
index 000000000..f46b3abf7
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs
@@ -0,0 +1,20 @@
+
+namespace BotSharp.Plugin.FuzzySharp.Constants;
+
+public static class MatchReason
+{
+ ///
+ /// Token matched a synonym term (e.g., HVAC -> Air Conditioning/Heating)
+ ///
+ public const string SynonymMatch = "synonym_match";
+
+ ///
+ /// Token exactly matched a vocabulary entry
+ ///
+ public const string ExactMatch = "exact_match";
+
+ ///
+ /// Token was flagged as a potential typo and a correction was suggested
+ ///
+ public const string TypoCorrection = "typo_correction";
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs
new file mode 100644
index 000000000..a8c749d13
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs
@@ -0,0 +1,29 @@
+
+namespace BotSharp.Plugin.FuzzySharp.Constants;
+
+public static class TextConstants
+{
+ ///
+ /// Characters that need to be separated during tokenization (by adding spaces before and after)
+ /// Includes: parentheses, brackets, braces, punctuation marks, special symbols, etc.
+ /// This ensures "(IH)" is split into "(", "IH", ")"
+ ///
+ public static readonly char[] SeparatorChars =
+ {
+ // Parentheses and brackets
+ '(', ')', '[', ']', '{', '}',
+ // Punctuation marks
+ ',', '.', ';', ':', '!', '?',
+ // Special symbols
+ '=', '@', '#', '$', '%', '^', '&', '*', '+', '-', '\\', '|', '<', '>', '~', '`'
+ };
+
+ ///
+ /// Whitespace characters used as token separators during tokenization.
+ /// Includes: space, tab, newline, and carriage return.
+ ///
+ public static readonly char[] TokenSeparators =
+ {
+ ' ', '\t', '\n', '\r'
+ };
+}
\ No newline at end of file
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Controllers/FuzzySharpController.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Controllers/FuzzySharpController.cs
new file mode 100644
index 000000000..bd1288b92
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Controllers/FuzzySharpController.cs
@@ -0,0 +1,59 @@
+using BotSharp.Abstraction.Knowledges;
+using BotSharp.Abstraction.Knowledges.Models;
+using Microsoft.AspNetCore.Http;
+using Microsoft.AspNetCore.Mvc;
+using Microsoft.Extensions.Logging;
+
+namespace BotSharp.Plugin.FuzzySharp.Controllers;
+
+[ApiController]
+public class FuzzySharpController : ControllerBase
+{
+ private readonly IPhraseService _phraseService;
+ private readonly ILogger _logger;
+
+ public FuzzySharpController(
+ IPhraseService phraseService,
+ ILogger logger)
+ {
+ _phraseService = phraseService;
+ _logger = logger;
+ }
+
+ ///
+ /// Analyze text for typos and entities using vocabulary.
+ ///
+ /// Returns:
+ /// - `original`: Original input text
+ /// - `tokens`: Tokenized text (only included if `include_tokens=true`)
+ /// - `flagged`: List of flagged items (each with `match_type`):
+ /// - `synonym_match` - Business abbreviations (confidence=1.0)
+ /// - `exact_match` - Exact vocabulary matches (confidence=1.0)
+ /// - `typo_correction` - Spelling corrections (confidence less than 1.0)
+ /// - `processing_time_ms`: Processing time in milliseconds
+ ///
+ /// Text analysis request
+ /// Text analysis response
+ [HttpPost("fuzzy-sharp/analyze-text")]
+ [ProducesResponseType(typeof(List), StatusCodes.Status200OK)]
+ [ProducesResponseType(StatusCodes.Status400BadRequest)]
+ [ProducesResponseType(StatusCodes.Status500InternalServerError)]
+ public async Task AnalyzeText([FromBody] string text)
+ {
+ try
+ {
+ if (string.IsNullOrWhiteSpace(text))
+ {
+ return BadRequest(new { error = "Text is required" });
+ }
+
+ var result = await _phraseService.SearchPhrasesAsync(text);
+ return Ok(result);
+ }
+ catch (Exception ex)
+ {
+ _logger.LogError(ex, "Error analyzing and searching entities");
+ return StatusCode(500, new { error = $"Error analyzing and searching entities: {ex.Message}" });
+ }
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Arguments/TextAnalysisRequest.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Arguments/TextAnalysisRequest.cs
new file mode 100644
index 000000000..92bfad905
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Arguments/TextAnalysisRequest.cs
@@ -0,0 +1,13 @@
+
+namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Arguments;
+
+public class TextAnalysisRequest
+{
+ public string Text { get; set; } = string.Empty;
+ public string? VocabularyFolderName { get; set; }
+ public string? SynonymMappingFile { get; set; }
+ public double Cutoff { get; set; } = 0.82;
+ public int TopK { get; set; } = 5;
+ public int MaxNgram { get; set; } = 5;
+ public bool IncludeTokens { get; set; } = false;
+}
\ No newline at end of file
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/INgramProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/INgramProcessor.cs
new file mode 100644
index 000000000..90a9a06f1
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/INgramProcessor.cs
@@ -0,0 +1,26 @@
+using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
+
+namespace BotSharp.Plugin.FuzzySharp.FuzzSharp;
+
+public interface INgramProcessor
+{
+ ///
+ /// Process tokens and generate all possible n-gram match results
+ ///
+ /// List of tokens to process
+ /// Vocabulary (source -> vocabulary set)
+ /// Synonym term Mapping
+ /// Lookup table (lowercase vocabulary -> (canonical form, source list))
+ /// Maximum n-gram length
+ /// Minimum confidence threshold for fuzzy matching
+ /// Maximum number of matches to return
+ /// List of flagged items
+ List ProcessNgrams(
+ List tokens,
+ Dictionary> vocabulary,
+ Dictionary synonymMapping,
+ Dictionary Sources)> lookup,
+ int maxNgram,
+ double cutoff,
+ int topK);
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/IResultProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/IResultProcessor.cs
new file mode 100644
index 000000000..c900877bf
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/IResultProcessor.cs
@@ -0,0 +1,17 @@
+using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
+
+namespace BotSharp.Plugin.FuzzySharp.FuzzSharp;
+
+///
+/// Result processor interface
+/// Responsible for processing match results, including deduplication and sorting
+///
+public interface IResultProcessor
+{
+ ///
+ /// Process a list of flagged items, removing overlapping duplicates and sorting
+ ///
+ /// List of flagged items to process
+ /// Processed list of flagged items (deduplicated and sorted)
+ List ProcessResults(List flagged);
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs
new file mode 100644
index 000000000..c715a8255
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs
@@ -0,0 +1,39 @@
+namespace BotSharp.Plugin.FuzzySharp.FuzzSharp;
+
+public interface ITokenMatcher
+{
+ ///
+ /// Try to match a content span and return a match result
+ ///
+ /// The matching context containing all necessary information
+ /// Match result if found, null otherwise
+ MatchResult? TryMatch(MatchContext context);
+
+ ///
+ /// Priority of this matcher (higher priority matchers are tried first)
+ ///
+ int Priority { get; }
+}
+
+///
+/// Context information for token matching
+///
+public record MatchContext(
+ string ContentSpan,
+ string ContentLow,
+ int StartIndex,
+ int NgramLength,
+ Dictionary> Vocabulary,
+ Dictionary SynonymMapping,
+ Dictionary Sources)> Lookup,
+ double Cutoff,
+ int TopK);
+
+///
+/// Result of a token match
+///
+public record MatchResult(
+ string CanonicalForm,
+ List Sources,
+ string MatchType,
+ double Confidence);
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/FlaggedItem.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/FlaggedItem.cs
new file mode 100644
index 000000000..67bbd2802
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/FlaggedItem.cs
@@ -0,0 +1,13 @@
+
+namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
+
+public class FlaggedItem
+{
+ public int Index { get; set; }
+ public string Token { get; set; } = string.Empty;
+ public List Sources { get; set; } = new();
+ public string MatchType { get; set; } = string.Empty;
+ public string CanonicalForm { get; set; } = string.Empty;
+ public double Confidence { get; set; }
+ public int NgramLength { get; set; }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/TextAnalysisResponse.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/TextAnalysisResponse.cs
new file mode 100644
index 000000000..0a05d9cd1
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/TextAnalysisResponse.cs
@@ -0,0 +1,10 @@
+
+namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
+
+public class TextAnalysisResponse
+{
+ public string Original { get; set; } = string.Empty;
+ public List? Tokens { get; set; }
+ public List Flagged { get; set; } = new();
+ public double ProcessingTimeMs { get; set; }
+}
\ No newline at end of file
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs
new file mode 100644
index 000000000..1a125ea08
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs
@@ -0,0 +1,29 @@
+using BotSharp.Plugin.FuzzySharp.FuzzSharp;
+using BotSharp.Abstraction.Knowledges;
+using BotSharp.Abstraction.Plugins;
+using BotSharp.Plugin.FuzzySharp.Services;
+using BotSharp.Plugin.FuzzySharp.Services.Matching;
+using BotSharp.Plugin.FuzzySharp.Services.Processors;
+using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
+
+namespace BotSharp.Plugin.FuzzySharp;
+
+public class FuzzySharpPlugin : IBotSharpPlugin
+{
+ public string Id => "379e6f7b-c58c-458b-b8cd-0374e5830711";
+ public string Name => "Fuzzy Sharp";
+ public string Description => "Analyze text for typos and entities using domain-specific vocabulary.";
+ public string IconUrl => "https://cdn-icons-png.flaticon.com/512/9592/9592995.png";
+
+ public void RegisterDI(IServiceCollection services, IConfiguration config)
+ {
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/CsvPhraseCollectionLoader.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/CsvPhraseCollectionLoader.cs
new file mode 100644
index 000000000..af471bfee
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/CsvPhraseCollectionLoader.cs
@@ -0,0 +1,187 @@
+using BotSharp.Abstraction.Knowledges;
+using BotSharp.Core.Infrastructures;
+using CsvHelper;
+using CsvHelper.Configuration;
+using Microsoft.Extensions.Logging;
+using System.Globalization;
+using System.IO;
+
+namespace BotSharp.Plugin.FuzzySharp.Services;
+
+public class CsvPhraseCollectionLoader : IPhraseCollection
+{
+ private readonly ILogger _logger;
+
+ public CsvPhraseCollectionLoader(ILogger logger)
+ {
+ _logger = logger;
+ }
+
+ [SharpCache(60)]
+ public async Task>> LoadVocabularyAsync()
+ {
+ string foldername = "";
+ var vocabulary = new Dictionary>();
+
+ if (string.IsNullOrEmpty(foldername))
+ {
+ return vocabulary;
+ }
+
+ // Load CSV files from the folder
+ var csvFileDict = await LoadCsvFilesFromFolderAsync(foldername);
+ if (csvFileDict.Count == 0)
+ {
+ return vocabulary;
+ }
+
+ // Load each CSV file
+ foreach (var (source, filePath) in csvFileDict)
+ {
+ try
+ {
+ var terms = await LoadCsvFileAsync(filePath);
+ vocabulary[source] = terms;
+ _logger.LogInformation($"Loaded {terms.Count} terms for source '{source}' from {filePath}");
+ }
+ catch (Exception ex)
+ {
+ _logger.LogError(ex, $"Error loading CSV file for source '{source}': {filePath}");
+ }
+ }
+
+ return vocabulary;
+ }
+
+ [SharpCache(60)]
+ public async Task> LoadSynonymMappingAsync()
+ {
+ string filename = "";
+ var result = new Dictionary();
+ if (string.IsNullOrWhiteSpace(filename))
+ {
+ return result;
+ }
+
+ var searchFolder = Path.Combine(AppContext.BaseDirectory, "data", "plugins", "fuzzySharp");
+ var filePath = Path.Combine(searchFolder, filename);
+
+ if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
+ {
+ return result;
+ }
+
+ try
+ {
+ using var reader = new StreamReader(filePath);
+ using var csv = new CsvReader(reader, CreateCsvConfig());
+
+ await csv.ReadAsync();
+ csv.ReadHeader();
+
+ if (!HasRequiredColumns(csv))
+ {
+ _logger.LogWarning("Synonym mapping file missing required columns: {FilePath}", filePath);
+ return result;
+ }
+
+ while (await csv.ReadAsync())
+ {
+ var term = csv.GetField("term") ?? string.Empty;
+ var dbPath = csv.GetField("dbPath") ?? string.Empty;
+ var canonicalForm = csv.GetField("canonical_form") ?? string.Empty;
+
+ if (term.Length == 0 || dbPath.Length == 0 || canonicalForm.Length == 0)
+ {
+ _logger.LogWarning(
+ "Missing column(s) in CSV at row {Row}: term={Term}, dbPath={DbPath}, canonical_form={CanonicalForm}",
+ csv.Parser.RawRow,
+ term ?? "",
+ dbPath ?? "",
+ canonicalForm ?? "");
+ continue;
+ }
+
+ var key = term.ToLowerInvariant();
+ result[key] = (dbPath, canonicalForm);
+ }
+
+ _logger.LogInformation("Loaded synonym mapping from {FilePath}: {Count} terms", filePath, result.Count);
+ }
+ catch (Exception ex)
+ {
+ _logger.LogError(ex, "Error loading synonym mapping file: {FilePath}", filePath);
+ }
+
+ return result;
+ }
+
+ private async Task> LoadCsvFileAsync(string filePath)
+ {
+ var terms = new HashSet(StringComparer.OrdinalIgnoreCase);
+
+ if (!File.Exists(filePath))
+ {
+ _logger.LogWarning($"CSV file does not exist: {filePath}");
+ return terms;
+ }
+
+ using var reader = new StreamReader(filePath);
+ using var csv = new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture)
+ {
+ HasHeaderRecord = false // No header in the CSV files
+ });
+
+ while (await csv.ReadAsync())
+ {
+ // Read the first column (assuming it contains the terms)
+ var term = csv.GetField(0);
+ if (!string.IsNullOrWhiteSpace(term))
+ {
+ terms.Add(term.Trim());
+ }
+ }
+
+ _logger.LogInformation($"Loaded {terms.Count} terms from {Path.GetFileName(filePath)}");
+ return terms;
+ }
+
+ private async Task> LoadCsvFilesFromFolderAsync(string folderName)
+ {
+ var csvFileDict = new Dictionary();
+ var searchFolder = Path.Combine(AppContext.BaseDirectory, "data", "plugins", "fuzzySharp", folderName);
+ if (!Directory.Exists(searchFolder))
+ {
+ _logger.LogWarning($"Folder does not exist: {searchFolder}");
+ return csvFileDict;
+ }
+
+ var csvFiles = Directory.GetFiles(searchFolder, "*.csv");
+ foreach (var file in csvFiles)
+ {
+ var fileName = Path.GetFileNameWithoutExtension(file);
+ csvFileDict[fileName] = file;
+ }
+
+ _logger.LogInformation($"Loaded {csvFileDict.Count} CSV files from {searchFolder}");
+ return await Task.FromResult(csvFileDict);
+ }
+
+ private static CsvConfiguration CreateCsvConfig()
+ {
+ return new CsvConfiguration(CultureInfo.InvariantCulture)
+ {
+ HasHeaderRecord = true,
+ DetectColumnCountChanges = true,
+ MissingFieldFound = null
+ };
+ }
+
+ private static bool HasRequiredColumns(CsvReader csv)
+ {
+ return csv.HeaderRecord is { Length: > 0 } headers
+ && headers.Contains("term")
+ && headers.Contains("dbPath")
+ && headers.Contains("canonical_form");
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/ExactMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/ExactMatcher.cs
new file mode 100644
index 000000000..38e562eff
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/ExactMatcher.cs
@@ -0,0 +1,23 @@
+using BotSharp.Plugin.FuzzySharp.FuzzSharp;
+using BotSharp.Plugin.FuzzySharp.Constants;
+
+namespace BotSharp.Plugin.FuzzySharp.Services.Matching;
+
+public class ExactMatcher : ITokenMatcher
+{
+ public int Priority => 2; // Second highest priority
+
+ public MatchResult? TryMatch(MatchContext context)
+ {
+ if (context.Lookup.TryGetValue(context.ContentLow, out var match))
+ {
+ return new MatchResult(
+ CanonicalForm: match.CanonicalForm,
+ Sources: match.Sources,
+ MatchType: MatchReason.ExactMatch,
+ Confidence: 1.0);
+ }
+
+ return null;
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/FuzzyMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/FuzzyMatcher.cs
new file mode 100644
index 000000000..193e28bc6
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/FuzzyMatcher.cs
@@ -0,0 +1,81 @@
+using BotSharp.Plugin.FuzzySharp.FuzzSharp;
+using System.Text.RegularExpressions;
+using FuzzySharp;
+using FuzzySharp.SimilarityRatio;
+using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive;
+using BotSharp.Plugin.FuzzySharp.Constants;
+
+namespace BotSharp.Plugin.FuzzySharp.Services.Matching;
+
+public class FuzzyMatcher : ITokenMatcher
+{
+ public int Priority => 1; // Lowest priority
+
+ public MatchResult? TryMatch(MatchContext context)
+ {
+ var match = CheckTypoCorrection(context.ContentSpan, context.Lookup, context.Cutoff);
+ if (match == null)
+ {
+ return null;
+ }
+
+ var (canonicalForm, sources, confidence) = match.Value;
+ return new MatchResult(
+ CanonicalForm: canonicalForm,
+ Sources: sources,
+ MatchType: MatchReason.TypoCorrection,
+ Confidence: confidence);
+ }
+
+ ///
+ /// Check typo correction using fuzzy matching
+ ///
+ private (string CanonicalForm, List Sources, double Confidence)? CheckTypoCorrection(
+ string contentSpan,
+ Dictionary Sources)> lookup,
+ double cutoff)
+ {
+ // Convert cutoff to 0-100 scale for FuzzySharp
+ var scoreCutoff = (int)(cutoff * 100);
+
+ // Get all candidates from lookup
+ var candidates = lookup.Keys.ToList();
+
+ // Find best match using ExtractOne
+ var scorer = ScorerCache.Get();
+ var result = Process.ExtractOne(
+ contentSpan,
+ candidates,
+ candidate => Normalize(candidate), // Preprocessor function
+ scorer,
+ scoreCutoff // Score cutoff
+ );
+
+ if (result == null)
+ {
+ return null;
+ }
+
+ // Get the canonical form and sources from lookup
+ var match = lookup[result.Value];
+ return (match.CanonicalForm, match.Sources, Math.Round(result.Score / 100.0, 3));
+ }
+
+ ///
+ /// Normalize text for fuzzy matching comparison
+ /// - Replaces all non-word characters (except apostrophes) with spaces
+ /// - Converts to lowercase
+ /// - Collapses multiple spaces into single space
+ /// - Trims leading/trailing whitespace
+ /// Example: "Test-Value (123)" → "test value 123"
+ ///
+ /// Text to normalize
+ /// Normalized text suitable for fuzzy matching
+ private string Normalize(string text)
+ {
+ // Replace non-word characters (except apostrophes) with spaces
+ var normalized = Regex.Replace(text, @"[^\w']+", " ", RegexOptions.IgnoreCase);
+ // Convert to lowercase, collapse multiple spaces, and trim
+ return Regex.Replace(normalized.ToLowerInvariant(), @"\s+", " ").Trim();
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/SynonymMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/SynonymMatcher.cs
new file mode 100644
index 000000000..9f6d8f97d
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/SynonymMatcher.cs
@@ -0,0 +1,23 @@
+using BotSharp.Plugin.FuzzySharp.FuzzSharp;
+using BotSharp.Plugin.FuzzySharp.Constants;
+
+namespace BotSharp.Plugin.FuzzySharp.Services.Matching;
+
+public class SynonymMatcher : ITokenMatcher
+{
+ public int Priority => 3; // Highest priority
+
+ public MatchResult? TryMatch(MatchContext context)
+ {
+ if (context.SynonymMapping.TryGetValue(context.ContentLow, out var match))
+ {
+ return new MatchResult(
+ CanonicalForm: match.CanonicalForm,
+ Sources: new List { match.DbPath },
+ MatchType: MatchReason.SynonymMatch,
+ Confidence: 1.0);
+ }
+
+ return null;
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/PhraseService.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/PhraseService.cs
new file mode 100644
index 000000000..cd05ca6a6
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/PhraseService.cs
@@ -0,0 +1,199 @@
+using BotSharp.Plugin.FuzzySharp.FuzzSharp;
+using BotSharp.Plugin.FuzzySharp.FuzzSharp.Arguments;
+using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
+using BotSharp.Abstraction.Knowledges;
+using BotSharp.Abstraction.Knowledges.Models;
+using BotSharp.Plugin.FuzzySharp.Utils;
+using Microsoft.Extensions.Logging;
+using System.Diagnostics;
+
+namespace BotSharp.Plugin.FuzzySharp.Services;
+
+public class PhraseService : IPhraseService
+{
+ private readonly ILogger _logger;
+ private readonly IEnumerable _phraseLoaderServices;
+ private readonly INgramProcessor _ngramProcessor;
+ private readonly IResultProcessor _resultProcessor;
+
+ public PhraseService(
+ ILogger logger,
+ IEnumerable phraseLoaderServices,
+ INgramProcessor ngramProcessor,
+ IResultProcessor resultProcessor)
+ {
+ _logger = logger;
+ _phraseLoaderServices = phraseLoaderServices;
+ _ngramProcessor = ngramProcessor;
+ _resultProcessor = resultProcessor;
+ }
+
+ public Task> SearchPhrasesAsync(string term)
+ {
+ var request = BuildTextAnalysisRequest(term);
+ var response = AnalyzeTextAsync(request);
+ return response.ContinueWith(t =>
+ {
+ var results = t.Result.Flagged.Select(f => new SearchPhrasesResult
+ {
+ Token = f.Token,
+ Sources = f.Sources,
+ CanonicalForm = f.CanonicalForm,
+ MatchType = f.MatchType,
+ Confidence = f.Confidence
+ }).ToList();
+ return results;
+ });
+ }
+
+ private TextAnalysisRequest BuildTextAnalysisRequest(string inputText)
+ {
+ return new TextAnalysisRequest
+ {
+ Text = inputText
+ };
+ }
+
+ ///
+ /// Analyze text for typos and entities using domain-specific vocabulary
+ ///
+ private async Task AnalyzeTextAsync(TextAnalysisRequest request)
+ {
+ var stopwatch = Stopwatch.StartNew();
+ try
+ {
+ // Tokenize the text
+ var tokens = TextTokenizer.Tokenize(request.Text);
+
+ // Load vocabulary
+ var vocabulary = await LoadAllVocabularyAsync();
+
+ // Load synonym mapping
+ var synonymMapping = await LoadAllSynonymMappingAsync();
+
+ // Analyze text
+ var flagged = AnalyzeTokens(tokens, vocabulary, synonymMapping, request);
+
+ stopwatch.Stop();
+
+ var response = new TextAnalysisResponse
+ {
+ Original = request.Text,
+ Flagged = flagged,
+ ProcessingTimeMs = Math.Round(stopwatch.Elapsed.TotalMilliseconds, 2)
+ };
+
+ if (request.IncludeTokens)
+ {
+ response.Tokens = tokens;
+ }
+
+ _logger.LogInformation(
+ $"Text analysis completed in {response.ProcessingTimeMs}ms | " +
+ $"Text length: {request.Text.Length} chars | " +
+ $"Flagged items: {flagged.Count}");
+
+ return response;
+ }
+ catch (Exception)
+ {
+ stopwatch.Stop();
+ throw;
+ }
+ }
+
+ public async Task>> LoadAllVocabularyAsync()
+ {
+ var results = await Task.WhenAll(_phraseLoaderServices.Select(c => c.LoadVocabularyAsync()));
+ var merged = new Dictionary>();
+
+ foreach (var dict in results)
+ {
+ foreach (var kvp in dict)
+ {
+ if (!merged.TryGetValue(kvp.Key, out var set))
+ merged[kvp.Key] = new HashSet(kvp.Value);
+ else
+ set.UnionWith(kvp.Value);
+ }
+ }
+
+ return merged;
+ }
+
+ public async Task> LoadAllSynonymMappingAsync()
+ {
+ var results = await Task.WhenAll(_phraseLoaderServices.Select(c => c.LoadSynonymMappingAsync()));
+ var merged = new Dictionary();
+
+ foreach (var dict in results)
+ {
+ foreach (var kvp in dict)
+ merged[kvp.Key] = kvp.Value; // later entries override earlier ones
+ }
+
+ return merged;
+ }
+
+ ///
+ /// Analyze tokens for typos and entities
+ ///
+ private List AnalyzeTokens(
+ List tokens,
+ Dictionary> vocabulary,
+ Dictionary synonymMapping,
+ TextAnalysisRequest request)
+ {
+ // Build lookup table for O(1) exact match lookups (matching Python's build_lookup)
+ var lookup = BuildLookup(vocabulary);
+
+ // Process n-grams and find matches
+ var flagged = _ngramProcessor.ProcessNgrams(
+ tokens,
+ vocabulary,
+ synonymMapping,
+ lookup,
+ request.MaxNgram,
+ request.Cutoff,
+ request.TopK);
+
+ // Process results: deduplicate and sort
+ return _resultProcessor.ProcessResults(flagged);
+ }
+
+ ///
+ /// Build a lookup dictionary mapping lowercase terms to their canonical form and sources.
+ /// This is a performance optimization - instead of iterating through all sources for each lookup,
+ /// we build a flat dictionary once at the start.
+ ///
+ /// Matches Python's build_lookup() function.
+ ///
+ private Dictionary Sources)> BuildLookup(
+ Dictionary> vocabulary)
+ {
+ var lookup = new Dictionary Sources)>();
+
+ foreach (var (source, terms) in vocabulary)
+ {
+ foreach (var term in terms)
+ {
+ var key = term.ToLowerInvariant();
+ if (lookup.TryGetValue(key, out var existing))
+ {
+ // Term already exists - add this source to the list if not already there
+ if (!existing.Sources.Contains(source))
+ {
+ existing.Sources.Add(source);
+ }
+ }
+ else
+ {
+ // New term - create entry with single source in list
+ lookup[key] = (term, new List { source });
+ }
+ }
+ }
+
+ return lookup;
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/NgramProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/NgramProcessor.cs
new file mode 100644
index 000000000..86e584067
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/NgramProcessor.cs
@@ -0,0 +1,131 @@
+using BotSharp.Plugin.FuzzySharp.FuzzSharp;
+using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
+
+namespace BotSharp.Plugin.FuzzySharp.Services.Processors;
+
+public class NgramProcessor : INgramProcessor
+{
+ private readonly List _matchers;
+
+ public NgramProcessor(IEnumerable matchers)
+ {
+ // Sort matchers by priority (highest first)
+ _matchers = matchers.OrderByDescending(m => m.Priority).ToList();
+ }
+
+ public List ProcessNgrams(
+ List tokens,
+ Dictionary> vocabulary,
+ Dictionary synonymMapping,
+ Dictionary Sources)> lookup,
+ int maxNgram,
+ double cutoff,
+ int topK)
+ {
+ var flagged = new List();
+
+ // Process n-grams from largest to smallest
+ for (int n = maxNgram; n >= 1; n--)
+ {
+ for (int i = 0; i <= tokens.Count - n; i++)
+ {
+ var item = ProcessSingleNgram(
+ tokens,
+ i,
+ n,
+ vocabulary,
+ synonymMapping,
+ lookup,
+ cutoff,
+ topK);
+
+ if (item != null)
+ {
+ flagged.Add(item);
+ }
+ }
+ }
+
+ return flagged;
+ }
+
+ ///
+ /// Process a single n-gram at the specified position
+ ///
+ private FlaggedItem? ProcessSingleNgram(
+ List tokens,
+ int startIdx,
+ int n,
+ Dictionary> vocabulary,
+ Dictionary synonymMapping,
+ Dictionary Sources)> lookup,
+ double cutoff,
+ int topK)
+ {
+ // Extract content span
+ var (contentSpan, spanTokens, contentIndices) = ExtractContentSpan(tokens, startIdx, n);
+ if (string.IsNullOrWhiteSpace(contentSpan))
+ {
+ return null;
+ }
+
+ var contentLow = contentSpan.ToLowerInvariant();
+
+ // Try matching in priority order using matchers
+ var context = new MatchContext(
+ contentSpan,
+ contentLow,
+ startIdx,
+ n,
+ vocabulary,
+ synonymMapping,
+ lookup,
+ cutoff,
+ topK);
+
+ foreach (var matcher in _matchers)
+ {
+ var matchResult = matcher.TryMatch(context);
+ if (matchResult != null)
+ {
+ return CreateFlaggedItem(matchResult, startIdx, contentSpan, n);
+ }
+ }
+
+ return null;
+ }
+
+ ///
+ /// Create a FlaggedItem from a MatchResult
+ ///
+ private FlaggedItem CreateFlaggedItem(
+ MatchResult matchResult,
+ int startIndex,
+ string contentSpan,
+ int ngramLength)
+ {
+ return new FlaggedItem
+ {
+ Index = startIndex,
+ Token = contentSpan,
+ Sources = matchResult.Sources,
+ MatchType = matchResult.MatchType,
+ CanonicalForm = matchResult.CanonicalForm,
+ Confidence = matchResult.Confidence,
+ NgramLength = ngramLength
+ };
+ }
+
+ ///
+ /// Extract content span
+ ///
+ private (string ContentSpan, List Tokens, List ContentIndices) ExtractContentSpan(
+ List tokens,
+ int startIdx,
+ int n)
+ {
+ var span = tokens.Skip(startIdx).Take(n).ToList();
+ var indices = Enumerable.Range(startIdx, n).ToList();
+ return (string.Join(" ", span), span, indices);
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/ResultProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/ResultProcessor.cs
new file mode 100644
index 000000000..ea402804d
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/ResultProcessor.cs
@@ -0,0 +1,102 @@
+using BotSharp.Plugin.FuzzySharp.FuzzSharp;
+using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
+using BotSharp.Plugin.FuzzySharp.Constants;
+
+namespace BotSharp.Plugin.FuzzySharp.Services.Processors;
+
+public class ResultProcessor : IResultProcessor
+{
+ public List ProcessResults(List flagged)
+ {
+ // Remove overlapping duplicates
+ var deduped = RemoveOverlappingDuplicates(flagged);
+
+ // Sort by confidence (descending), then match_type (alphabetically)
+ // This matches Python's _sort_and_format_results function
+ return deduped
+ .OrderByDescending(f => f.Confidence)
+ .ThenBy(f => f.MatchType)
+ .ToList();
+ }
+
+ ///
+ /// Remove overlapping detections with the same canonical form.
+ /// When multiple detections overlap and have the same canonical_form,
+ /// keep only the best one based on:
+ /// 1. Prefer synonym_match over exact_match over typo_correction (matches matcher priority)
+ /// 2. Highest confidence
+ /// 3. Shortest n-gram length
+ ///
+ private List RemoveOverlappingDuplicates(List flagged)
+ {
+ var deduped = new List();
+ var skipIndices = new HashSet();
+
+ for (int i = 0; i < flagged.Count; i++)
+ {
+ if (skipIndices.Contains(i))
+ {
+ continue;
+ }
+
+ var item = flagged[i];
+ var itemRange = (item.Index, item.Index + item.NgramLength);
+
+ // Find all overlapping items with same canonical_form (regardless of match_type)
+ var overlappingGroup = new List { item };
+ for (int j = i + 1; j < flagged.Count; j++)
+ {
+ if (skipIndices.Contains(j))
+ {
+ continue;
+ }
+
+ var other = flagged[j];
+ if (item.CanonicalForm == other.CanonicalForm)
+ {
+ var otherRange = (other.Index, other.Index + other.NgramLength);
+ if (RangesOverlap(itemRange, otherRange))
+ {
+ overlappingGroup.Add(other);
+ skipIndices.Add(j);
+ }
+ }
+ }
+
+ // Keep the best item from the overlapping group
+ // Priority: synonym_match (3) > exact_match (2) > typo_correction (1)
+ // Then highest confidence, then shortest ngram
+ var bestItem = overlappingGroup
+ .OrderByDescending(x => GetMatchTypePriority(x.MatchType))
+ .ThenByDescending(x => x.Confidence)
+ .ThenBy(x => x.NgramLength)
+ .First();
+ deduped.Add(bestItem);
+ }
+
+ return deduped;
+ }
+
+ ///
+ /// Get priority value for match type (higher is better)
+ /// Matches the priority order in matchers: synonym > exact > fuzzy
+ ///
+ private int GetMatchTypePriority(string matchType)
+ {
+ return matchType switch
+ {
+ MatchReason.SynonymMatch => 3, // Highest priority
+ MatchReason.ExactMatch => 2, // Second priority
+ MatchReason.TypoCorrection => 1, // Lowest priority
+ _ => 0 // Unknown types get lowest priority
+ };
+ }
+
+ ///
+ /// Check if two token ranges overlap.
+ ///
+ private bool RangesOverlap((int start, int end) range1, (int start, int end) range2)
+ {
+ return range1.start < range2.end && range2.start < range1.end;
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Using.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Using.cs
new file mode 100644
index 000000000..1a0fe1eab
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Using.cs
@@ -0,0 +1,5 @@
+global using System;
+global using System.Collections.Generic;
+global using System.Linq;
+global using System.Text;
+global using System.Threading.Tasks;
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TextTokenizer.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TextTokenizer.cs
new file mode 100644
index 000000000..8853733a2
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TextTokenizer.cs
@@ -0,0 +1,63 @@
+using BotSharp.Plugin.FuzzySharp.Constants;
+
+namespace BotSharp.Plugin.FuzzySharp.Utils;
+
+public static class TextTokenizer
+{
+ ///
+ /// Preprocess text: add spaces before and after characters that need to be separated
+ /// This allows subsequent simple whitespace tokenization to correctly separate these characters
+ /// Example: "(IH)" -> " ( IH ) " -> ["(", "IH", ")"]
+ ///
+ /// Text to preprocess
+ /// Preprocessed text
+ public static string PreprocessText(string text)
+ {
+ if (string.IsNullOrWhiteSpace(text))
+ {
+ return text;
+ }
+
+ var result = new StringBuilder(text.Length * 2);
+
+ foreach (var ch in text)
+ {
+ // If it's a character that needs to be separated, add spaces before and after
+ if (TextConstants.SeparatorChars.Contains(ch))
+ {
+ result.Append(' ');
+ result.Append(ch);
+ result.Append(' ');
+ }
+ else
+ {
+ result.Append(ch);
+ }
+ }
+
+ return result.ToString();
+ }
+
+ ///
+ /// Simple whitespace tokenization
+ /// Should be called after preprocessing text with PreprocessText
+ ///
+ /// Text to tokenize
+ /// List of tokens
+ public static List SimpleTokenize(string text)
+ {
+ return text.Split(TextConstants.TokenSeparators, StringSplitOptions.RemoveEmptyEntries).ToList();
+ }
+
+ ///
+ /// Complete tokenization flow: preprocessing + tokenization
+ /// This is the recommended usage
+ ///
+ /// Text to tokenize
+ /// List of tokens
+ public static List Tokenize(string text)
+ {
+ var preprocessed = PreprocessText(text);
+ return SimpleTokenize(preprocessed);
+ }
+}
diff --git a/src/WebStarter/WebStarter.csproj b/src/WebStarter/WebStarter.csproj
index 5a7c6eb7b..082ac578e 100644
--- a/src/WebStarter/WebStarter.csproj
+++ b/src/WebStarter/WebStarter.csproj
@@ -37,6 +37,7 @@
+
diff --git a/src/WebStarter/appsettings.json b/src/WebStarter/appsettings.json
index be70e0d47..5cd9c0d6d 100644
--- a/src/WebStarter/appsettings.json
+++ b/src/WebStarter/appsettings.json
@@ -890,7 +890,8 @@
"BotSharp.Plugin.ExcelHandler",
"BotSharp.Plugin.SqlDriver",
"BotSharp.Plugin.TencentCos",
- "BotSharp.Plugin.PythonInterpreter"
+ "BotSharp.Plugin.PythonInterpreter",
+ "BotSharp.Plugin.FuzzySharp"
]
}
}