Refactor data providers and update abstractions
- Removed obsolete data providers: `AnchorCollectionDataProvider`, `ContentsDataProvider`, and others, consolidating logic into new composable providers. - Added `ComposeDataProviders`, `SelectDataProvider`, and `RelationalDataProvider` for improved flexibility and reusability. - Introduced `IManySelectionComposableDataProvider` interface to support multiple-node selection. - Enhanced `UnitDownloader` with more robust progress tracking. - Updated package references and project dependencies for consistency. - Improved error handling in `StealthConfig` initialization for better fallback on browser drivers. - Incremented project version to 2.4.5.
This commit is contained in:
@@ -0,0 +1,83 @@
|
||||
using Beam.Abstractions;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Dynamic;
|
||||
|
||||
/// <summary>
|
||||
/// Allows composition of different data providers to adapt to different types of data.
|
||||
/// </summary>
|
||||
/// <typeparam name="T"></typeparam>
|
||||
public class ComposeDataProviders<T> : IComposableDataProvider<T> {
|
||||
public required IComposableDataProvider<object>[] SelectWith { get; init; }
|
||||
public required IComposableDataProvider<T> GetWith { get; init; }
|
||||
|
||||
private ComposeDataProviders() {}
|
||||
|
||||
public static ComposeDataProviders<T> Create(IComposableDataProvider<object> selectWith, IComposableDataProvider<T> getWith) {
|
||||
return new ComposeDataProviders<T>() {
|
||||
GetWith = getWith,
|
||||
SelectWith = [selectWith]
|
||||
};
|
||||
}
|
||||
|
||||
public static ComposeDataProviders<T> Create(IComposableDataProvider<object>[] selectWiths, IComposableDataProvider<T> getWith) {
|
||||
return new ComposeDataProviders<T>() {
|
||||
GetWith = getWith,
|
||||
SelectWith = selectWiths
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Composes the data providers, first selecting a node with <see cref="SelectWith"/>, then getting the data with <see cref="GetWith"/>.
|
||||
/// </summary>
|
||||
/// <exception cref="Exception">Throws when <see cref="SelectWith"/> returns a null value.</exception>
|
||||
/// <param name="document"></param>
|
||||
/// <returns></returns>
|
||||
public T Get(HtmlDocument document) {
|
||||
var selected = Select(document);
|
||||
if (selected is null)
|
||||
throw new Exception("Selection operation failed.");
|
||||
return GetWith.Get(selected);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses the <see cref="GetWith"/> data provider to get the data from the supplied node.
|
||||
/// </summary>
|
||||
/// <param name="node"></param>
|
||||
/// <returns></returns>
|
||||
public T Get(HtmlNode node) {
|
||||
return GetWith.Get(node);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
|
||||
/// </summary>
|
||||
/// <param name="doc"></param>
|
||||
/// <returns></returns>
|
||||
public HtmlNode? Select(HtmlDocument doc) {
|
||||
var selected = SelectWith[0].Select(doc);
|
||||
foreach(var provider in SelectWith.Skip(1)) {
|
||||
if (selected is null)
|
||||
return null;
|
||||
selected = provider.Select(selected);
|
||||
}
|
||||
|
||||
return selected;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
|
||||
/// </summary>
|
||||
/// <param name="doc"></param>
|
||||
/// <returns></returns>
|
||||
public HtmlNode? Select(HtmlNode node) {
|
||||
var selected = SelectWith[0].Select(node);
|
||||
foreach(var provider in SelectWith.Skip(1)) {
|
||||
if (selected is null)
|
||||
return null;
|
||||
selected = provider.Select(selected);
|
||||
}
|
||||
|
||||
return selected;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
using Beam.Abstractions;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Dynamic;
|
||||
|
||||
public class ManyAnchorsDataProvider : AnchorDataProvider, IManySelectionComposableDataProvider<string[]> {
|
||||
public new string[] Get(HtmlDocument document) {
|
||||
return [base.Get(document)];
|
||||
}
|
||||
public string[] ManyGet(HtmlNode[] node) {
|
||||
return node.Select(x => base.Get(x)).ToArray();
|
||||
}
|
||||
public HtmlNode[]? SelectMany(HtmlDocument doc) {
|
||||
var k = Select(doc);
|
||||
return k == null ? null : [k];
|
||||
}
|
||||
public HtmlNode[]? SelectMany(HtmlNode[] node) {
|
||||
return node.Select(x => Select(x) ?? null).Where(x => x is not null).Cast<HtmlNode>().ToArray();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
using Beam.Abstractions;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Dynamic;
|
||||
|
||||
|
||||
public class ManyComposeDataProviders<T> : IManySelectionComposableDataProvider<T> {
|
||||
public required IManySelectionComposableDataProvider<object>[] SelectWith { get; init; }
|
||||
public required IManySelectionComposableDataProvider<T> GetWith { get; init; }
|
||||
|
||||
private ManyComposeDataProviders() {}
|
||||
|
||||
public static ManyComposeDataProviders<T> Create(IManySelectionComposableDataProvider<object> selectWith, IManySelectionComposableDataProvider<T> getWith) {
|
||||
return new ManyComposeDataProviders<T>() {
|
||||
GetWith = getWith,
|
||||
SelectWith = [selectWith]
|
||||
};
|
||||
}
|
||||
|
||||
public static ManyComposeDataProviders<T> Create(IManySelectionComposableDataProvider<object>[] selectWiths, IManySelectionComposableDataProvider<T> getWith) {
|
||||
return new ManyComposeDataProviders<T>() {
|
||||
GetWith = getWith,
|
||||
SelectWith = selectWiths
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Composes the data providers, first selecting a node with <see cref="SelectWith"/>, then getting the data with <see cref="GetWith"/>.
|
||||
/// </summary>
|
||||
/// <exception cref="Exception">Throws when <see cref="SelectWith"/> returns a null value.</exception>
|
||||
/// <param name="document"></param>
|
||||
/// <returns></returns>
|
||||
public T Get(HtmlDocument document) {
|
||||
var selected = SelectMany(document);
|
||||
if (selected is null)
|
||||
throw new Exception("Selection operation failed.");
|
||||
return GetWith.ManyGet(selected);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses the <see cref="GetWith"/> data provider to get the data from the supplied node.
|
||||
/// </summary>
|
||||
/// <param name="node"></param>
|
||||
/// <returns></returns>
|
||||
public T ManyGet(HtmlNode[] node) {
|
||||
return GetWith.ManyGet(node);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
|
||||
/// </summary>
|
||||
/// <param name="doc"></param>
|
||||
/// <returns></returns>
|
||||
public HtmlNode[]? SelectMany(HtmlDocument doc) {
|
||||
var selected = SelectWith[0].SelectMany(doc);
|
||||
foreach(var provider in SelectWith.Skip(1)) {
|
||||
if (selected is null)
|
||||
return null;
|
||||
selected = provider.SelectMany(selected);
|
||||
}
|
||||
|
||||
return selected;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
|
||||
/// </summary>
|
||||
/// <param name="doc"></param>
|
||||
/// <returns></returns>
|
||||
public HtmlNode[]? SelectMany(HtmlNode[] node) {
|
||||
var selected = SelectWith[0].SelectMany(node);
|
||||
foreach(var provider in SelectWith.Skip(1)) {
|
||||
if (selected is null)
|
||||
return null;
|
||||
selected = provider.SelectMany(selected);
|
||||
}
|
||||
|
||||
return selected;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
using Beam.Abstractions;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Dynamic;
|
||||
|
||||
public enum RelationType {
|
||||
Parent,
|
||||
Child,
|
||||
}
|
||||
|
||||
public class RelationalDataProvider : IComposableDataProvider<HtmlNode?> {
|
||||
|
||||
public RelationType RelationType { get; set; } = RelationType.Parent;
|
||||
public IBinding? Content { get; set; }
|
||||
|
||||
public HtmlNode? Get(HtmlDocument document) {
|
||||
return Select(document);
|
||||
}
|
||||
public HtmlNode? Get(HtmlNode node) {
|
||||
return Select(node);
|
||||
}
|
||||
public HtmlNode? Select(HtmlDocument doc) {
|
||||
return Select(Content?.Select(doc) ?? doc.DocumentNode);
|
||||
}
|
||||
public HtmlNode? Select(HtmlNode node) {
|
||||
return RelationType switch {
|
||||
RelationType.Parent => node.ParentNode,
|
||||
RelationType.Child => node.FirstChild,
|
||||
_ => throw new NotSupportedException()
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,155 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using Beam.Abstractions;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Dynamic;
|
||||
|
||||
public enum SearchStrategy {
|
||||
DepthFirst,
|
||||
BreadthFirst,
|
||||
}
|
||||
|
||||
public enum SearchStringDefaultSelection {
|
||||
First,
|
||||
Last,
|
||||
GreatestChildren,
|
||||
Any,
|
||||
None,
|
||||
}
|
||||
|
||||
public class SearchStringOptions {
|
||||
public required bool SearchStringIsRegex { get; set; }
|
||||
public required bool SearchInBody { get; set; }
|
||||
public required string? SearchInAttribute { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Only used when both <see cref="SearchInBody"/> and <see cref="SearchInAttribute"/> are false/null, or no match is found for the search criteria.
|
||||
/// </summary>
|
||||
public SearchStringDefaultSelection DefaultSelection { get; set; } = SearchStringDefaultSelection.First;
|
||||
|
||||
/// <summary>
|
||||
/// Only used when <see cref="SearchStringIsRegex"/> is false.
|
||||
/// </summary>
|
||||
public IEqualityComparer<string> UseComparer { get; set; } = StringComparer.CurrentCulture;
|
||||
}
|
||||
|
||||
public class SelectDataProvider : IComposableDataProvider<HtmlNode?>, IManySelectionComposableDataProvider<HtmlNode?> {
|
||||
public SearchStrategy SearchStrategyType { get; set; } = SearchStrategy.DepthFirst;
|
||||
public SearchStringOptions SearchStringOptions { get; set; } = new SearchStringOptions() {
|
||||
SearchStringIsRegex = false,
|
||||
SearchInBody = true,
|
||||
SearchInAttribute = null
|
||||
};
|
||||
|
||||
public string? SearchString { get; set; }
|
||||
public IBinding? Content { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Returns the first node that matches the search criteria.
|
||||
/// </summary>
|
||||
/// <param name="document"></param>
|
||||
/// <returns></returns>
|
||||
public HtmlNode? Get(HtmlDocument document) {
|
||||
return Select(document);
|
||||
}
|
||||
/// <summary>
|
||||
/// Returns the first child node that matches the search criteria.
|
||||
/// </summary>
|
||||
/// <param name="node"></param>
|
||||
/// <returns></returns>
|
||||
public HtmlNode? Get(HtmlNode node) {
|
||||
return Select(node);
|
||||
}
|
||||
|
||||
public HtmlNode? Get(HtmlNode[] node) {
|
||||
throw new NotSupportedException();
|
||||
}
|
||||
|
||||
public HtmlNode[]? _Select(HtmlNode node) {
|
||||
LinkedList<HtmlNode> searchSet = new();
|
||||
LinkedListNode<HtmlNode> currentNode = searchSet.AddLast(node);
|
||||
HashSet<HtmlNode> visited = [node];
|
||||
|
||||
void breadthFirst(HtmlNode node) {
|
||||
foreach (var child in node.ChildNodes) {
|
||||
if (visited.Contains(child))
|
||||
continue;
|
||||
searchSet.AddLast(child);
|
||||
visited.Add(child);
|
||||
}
|
||||
}
|
||||
|
||||
void depthFirst(HtmlNode node) {
|
||||
foreach (var child in node.ChildNodes.Reverse()) {
|
||||
if (visited.Contains(child))
|
||||
continue;
|
||||
searchSet.AddAfter(currentNode, child);
|
||||
visited.Add(child);
|
||||
}
|
||||
}
|
||||
|
||||
Action<HtmlNode> enqueueStartegy = SearchStrategyType switch {
|
||||
SearchStrategy.BreadthFirst => breadthFirst,
|
||||
SearchStrategy.DepthFirst => depthFirst,
|
||||
_ => throw new NotSupportedException()
|
||||
};
|
||||
|
||||
var bestCandidate = currentNode.Value;
|
||||
List<HtmlNode> selected = [];
|
||||
|
||||
do {
|
||||
var n = currentNode.Value;
|
||||
if (SearchStringOptions.SearchInBody)
|
||||
if (SearchStringOptions.SearchStringIsRegex && Regex.IsMatch(n.InnerText ?? "", SearchString ?? ""))
|
||||
selected.Add(n);
|
||||
else if (SearchStringOptions.UseComparer.Equals(n.InnerText, SearchString ?? ""))
|
||||
selected.Add(n);
|
||||
if (SearchStringOptions.SearchInAttribute is not null)
|
||||
if (SearchStringOptions.SearchStringIsRegex && n.GetAttributeValue(SearchStringOptions.SearchInAttribute, null) != null &&
|
||||
Regex.IsMatch(n.GetAttributeValue(SearchStringOptions.SearchInAttribute, ""), SearchString ?? ""))
|
||||
selected.Add(n);
|
||||
else if (SearchStringOptions.UseComparer.Equals(n.GetAttributeValue(SearchStringOptions.SearchInAttribute, null), SearchString ?? ""))
|
||||
selected.Add(n);
|
||||
|
||||
switch (SearchStringOptions.DefaultSelection) {
|
||||
case SearchStringDefaultSelection.GreatestChildren:
|
||||
if (n.ChildNodes.Count > bestCandidate.ChildNodes.Count)
|
||||
bestCandidate = n;
|
||||
break;
|
||||
case SearchStringDefaultSelection.Last:
|
||||
bestCandidate = n;
|
||||
break;
|
||||
case SearchStringDefaultSelection.Any:
|
||||
case SearchStringDefaultSelection.First:
|
||||
case SearchStringDefaultSelection.None:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
enqueueStartegy(n);
|
||||
|
||||
} while ((currentNode = currentNode.Next!) != null);
|
||||
|
||||
if (selected.Count == 0 && SearchStringOptions.DefaultSelection != SearchStringDefaultSelection.None)
|
||||
selected.Add(bestCandidate);
|
||||
|
||||
return selected.ToArray();
|
||||
}
|
||||
|
||||
public HtmlNode? Select(HtmlDocument document) {
|
||||
return Select(Content?.Select(document) ?? document.DocumentNode);
|
||||
}
|
||||
|
||||
public HtmlNode? Select(HtmlNode node) {
|
||||
return _Select(node)?.FirstOrDefault();
|
||||
}
|
||||
|
||||
public HtmlNode? ManyGet(HtmlNode[] node) {
|
||||
throw new NotSupportedException();
|
||||
}
|
||||
public HtmlNode[]? SelectMany(HtmlDocument doc) {
|
||||
return _Select(Content?.Select(doc) ?? doc.DocumentNode);
|
||||
}
|
||||
public HtmlNode[]? SelectMany(HtmlNode[] node) {
|
||||
return node.SelectMany(x => _Select(x) ?? []).ToArray();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user