7ed05abdb8
- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders. - Refactored existing classes into appropriate namespaces and projects. - Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.). - Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility. - Removed deprecated classes like SourceLinkBuilder and StateChangerFactory. - Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`. - Consolidated shared logic under Beam.Abstractions.
52 lines
2.0 KiB
C#
52 lines
2.0 KiB
C#
using HtmlAgilityPack;
|
|
using Microsoft.Recognizers.Text.Number;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Globalization;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
using System.Web;
|
|
|
|
namespace Beam.Dynamic {
|
|
public static partial class StringCleaner {
|
|
[GeneratedRegex("&#x?[\\d\\w]{1,4};")]
|
|
public static partial Regex MochaBlendUnicodeEscapeSequence();
|
|
|
|
private static string UnicodeEscapeSequences(string text) {
|
|
return MochaBlendUnicodeEscapeSequence().Replace(text, (x) => {
|
|
int numOfDigits = x.Value.Length - 3;
|
|
int sequence = 0;
|
|
if (x.Value[2] == 'x')
|
|
sequence = int.Parse(x.Value[3..(3 + (numOfDigits - 1))], System.Globalization.NumberStyles.HexNumber);
|
|
else
|
|
sequence = int.Parse(x.Value[2..(2 + numOfDigits)]);
|
|
var uni = Encoding.Unicode.GetString(BitConverter.GetBytes(sequence));
|
|
return uni.Length == 1 ? uni : uni[0].ToString();
|
|
});
|
|
}
|
|
|
|
public static List<int> ParseNumbers(string text, string from) {
|
|
var results = NumberRecognizer.RecognizeNumber(text, from, NumberOptions.None, false);
|
|
var resolved = results.Select((x) => {
|
|
if (x.Resolution.TryGetValue("value", out var value) && double.TryParse(value.ToString(), out var number))
|
|
return (int?)number;
|
|
return null;
|
|
})
|
|
.Where((x) => x.HasValue).ToList();
|
|
if (resolved.Count == 0)
|
|
return [];
|
|
return resolved.Select((x) => x!.Value).ToList();
|
|
}
|
|
|
|
public static string Clean(string? onlineText) {
|
|
if (string.IsNullOrWhiteSpace(onlineText))
|
|
return "";
|
|
var decoded = HttpUtility.HtmlDecode(onlineText);
|
|
var escaped = UnicodeEscapeSequences(onlineText);
|
|
return escaped;
|
|
}
|
|
}
|
|
}
|