71 lines
2.7 KiB
C#
71 lines
2.7 KiB
C#
using HtmlAgilityPack;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace Beam.Dynamic {
|
|
public static partial class HtmlNodeExtensions {
|
|
public static HtmlNode? ThenByClasses(this HtmlNode node, params string[] classes) {
|
|
return node.DescendCollectionTree(ThenByClass, classes);
|
|
}
|
|
|
|
public static HtmlNode? ThenByClass(this HtmlNode node, string @class, int count = 1) {
|
|
return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.HasClass(@class)), count);
|
|
}
|
|
|
|
public static HtmlNode? ThenByName(this HtmlNode node, string name, int count = 1) {
|
|
return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.Name == name), count);
|
|
}
|
|
|
|
public static HtmlNode? ThenByNames(this HtmlNode node, params string[] name) {
|
|
return node.DescendCollectionTree(ThenByName, name);
|
|
}
|
|
|
|
public static HtmlNode? DescendCollectionTree<T>(this HtmlNode node, Func<HtmlNode, T, int, HtmlNode?> func, params T[] values) {
|
|
return node.DescendCollectionTree((a, b) => func(a, b, 1), values);
|
|
}
|
|
|
|
public static HtmlNode? DescendCollectionTree<T>(this HtmlNode node, Func<HtmlNode, T, HtmlNode?> func, params T[] values) {
|
|
HtmlNode? result = node;
|
|
foreach (var value in values) {
|
|
if (result is null)
|
|
return result;
|
|
result = func(result, value);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
public static HtmlNode? ThenByFunc(this HtmlNode node, Func<HtmlNode, HtmlNode?> func, int count = 1) {
|
|
var ret = func(node);
|
|
if (count <= 1)
|
|
return ret;
|
|
return ret?.ThenByFunc(func, count - 1);
|
|
}
|
|
|
|
public static string[]? SplitInnerText(this HtmlNode? node, string separators)
|
|
=> node?.InnerText.Split(separators, StringSplitOptions.RemoveEmptyEntries);
|
|
|
|
public static bool TextNodesOnly(HtmlNode n) => TextNodesOnlyRegex().Match(n.Name).Success;
|
|
|
|
public static string InnerLineSeparatedText(this HtmlNode? node, Func<HtmlNode, bool>? filter = null) {
|
|
if (node?.ChildNodes is null || node?.ChildNodes.Count == 0)
|
|
return "";
|
|
return node?.ChildNodes
|
|
.Where(filter ?? ((x) => true))
|
|
.DefaultIfEmpty()
|
|
.Select((x) => x?.InnerText)
|
|
.Where((x) => !string.IsNullOrWhiteSpace(x))
|
|
.DefaultIfEmpty()
|
|
.Aggregate((x, y) => $"{x}\n{y}")
|
|
?? "";
|
|
}
|
|
|
|
[GeneratedRegex("p|h\\d")]
|
|
private static partial Regex TextNodesOnlyRegex();
|
|
}
|
|
}
|