using HtmlAgilityPack; using Microsoft.Recognizers.Text.Number; using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Web; namespace Beam.Dynamic { public static partial class StringCleaner { [GeneratedRegex("&#x?[\\d\\w]{1,4};")] public static partial Regex MochaBlendUnicodeEscapeSequence(); private static string UnicodeEscapeSequences(string text) { return MochaBlendUnicodeEscapeSequence().Replace(text, (x) => { int numOfDigits = x.Value.Length - 3; int sequence = 0; if (x.Value[2] == 'x') sequence = int.Parse(x.Value[3..(3 + (numOfDigits - 1))], System.Globalization.NumberStyles.HexNumber); else sequence = int.Parse(x.Value[2..(2 + numOfDigits)]); var uni = Encoding.Unicode.GetString(BitConverter.GetBytes(sequence)); return uni.Length == 1 ? uni : uni[0].ToString(); }); } public static List ParseNumbers(string text, string from) { var results = NumberRecognizer.RecognizeNumber(text, from, NumberOptions.None, false); var resolved = results.Select((x) => { if (x.Resolution.TryGetValue("value", out var value) && double.TryParse(value.ToString(), out var number)) return (int?)number; return null; }) .Where((x) => x.HasValue).ToList(); if (resolved.Count == 0) return []; return resolved.Select((x) => x!.Value).ToList(); } public static string Clean(string? onlineText) { if (string.IsNullOrWhiteSpace(onlineText)) return ""; var decoded = HttpUtility.HtmlDecode(onlineText); var escaped = UnicodeEscapeSequences(onlineText); return escaped; } } }