using HtmlAgilityPack;
using Microsoft.Recognizers.Text.Number;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Web;
namespace Beam.Dynamic {
public static partial class OnlineCleaner {
[GeneratedRegex("?[\\d\\w]{1,4};")]
public static partial Regex MochaBlendUnicodeEscapeSequence();
private static string UnicodeEscapeSequences(string text) {
return MochaBlendUnicodeEscapeSequence().Replace(text, (x) => {
int numOfDigits = x.Value.Length - 3;
int sequence = 0;
if (x.Value[2] == 'x')
sequence = int.Parse(x.Value[3..(3 + (numOfDigits - 1))], System.Globalization.NumberStyles.HexNumber);
else
sequence = int.Parse(x.Value[2..(2 + numOfDigits)]);
var uni = Encoding.Unicode.GetString(BitConverter.GetBytes(sequence));
return uni.Length == 1 ? uni : uni[0].ToString();
});
}
public static List ParseNumbers(string text, string from) {
var results = NumberRecognizer.RecognizeNumber(text, from, NumberOptions.None, false);
var resolved = results.Select((x) => {
if (x.Resolution.TryGetValue("value", out var value) && double.TryParse(value.ToString(), out var number))
return (int?)number;
return null;
})
.Where((x) => x.HasValue).ToList();
if (resolved.Count == 0)
return [];
return resolved.Select((x) => x!.Value).ToList();
}
public static string Clean(string? onlineText) {
if (string.IsNullOrWhiteSpace(onlineText))
return "";
var decoded = HttpUtility.HtmlDecode(onlineText);
var escaped = UnicodeEscapeSequences(onlineText);
return escaped;
}
}
}