using HtmlAgilityPack; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace Beam.Dynamic { public static partial class HtmlNodeExtensions { public static HtmlNode? ThenByClasses(this HtmlNode node, params string[] classes) { return node.DescendCollectionTree(ThenByClass, classes); } public static HtmlNode? ThenByClass(this HtmlNode node, string @class, int count = 1) { return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.HasClass(@class)), count); } public static HtmlNode? ThenByName(this HtmlNode node, string name, int count = 1) { return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.Name == name), count); } public static HtmlNode? ThenByNames(this HtmlNode node, params string[] name) { return node.DescendCollectionTree(ThenByName, name); } public static HtmlNode? DescendCollectionTree(this HtmlNode node, Func func, params T[] values) { return node.DescendCollectionTree((a, b) => func(a, b, 1), values); } public static HtmlNode? DescendCollectionTree(this HtmlNode node, Func func, params T[] values) { HtmlNode? result = node; foreach (var value in values) { if (result is null) return result; result = func(result, value); } return result; } public static HtmlNode? ThenByFunc(this HtmlNode node, Func func, int count = 1) { var ret = func(node); if (count <= 1) return ret; return ret?.ThenByFunc(func, count - 1); } public static string[]? SplitInnerText(this HtmlNode? node, string separators) => node?.InnerText.Split(separators, StringSplitOptions.RemoveEmptyEntries); public static bool TextNodesOnly(HtmlNode n) => TextNodesOnlyRegex().Match(n.Name).Success; public static string InnerLineSeparatedText(this HtmlNode? node, Func? filter = null) { if (node?.ChildNodes is null || node?.ChildNodes.Count == 0) return ""; return node?.ChildNodes .Where(filter ?? ((x) => true)) .DefaultIfEmpty() .Select((x) => x?.InnerText) .Where((x) => !string.IsNullOrWhiteSpace(x)) .DefaultIfEmpty() .Aggregate((x, y) => $"{x}\n{y}") ?? ""; } [GeneratedRegex("p|h\\d")] private static partial Regex TextNodesOnlyRegex(); } }