Refactor data providers and update abstractions

- Removed obsolete data providers: `AnchorCollectionDataProvider`, `ContentsDataProvider`, and others, consolidating logic into new composable providers.
- Added `ComposeDataProviders`, `SelectDataProvider`, and `RelationalDataProvider` for improved flexibility and reusability.
- Introduced `IManySelectionComposableDataProvider` interface to support multiple-node selection.
- Enhanced `UnitDownloader` with more robust progress tracking.
- Updated package references and project dependencies for consistency.
- Improved error handling in `StealthConfig` initialization for better fallback on browser drivers.
- Incremented project version to 2.4.5.
This commit is contained in:
qwsdcvghyu89
2025-11-14 03:41:13 +11:00
parent 2958a26e4f
commit 18c5ad83da
27 changed files with 510 additions and 248 deletions
@@ -0,0 +1,12 @@
using HtmlAgilityPack;
namespace Beam.Abstractions;
public interface IComposableDataProvider : IComposableDataProvider<object> { }
public interface IComposableDataProvider<out T> : IDataProvider<T> {
public T Get(HtmlNode node);
public HtmlNode? Select(HtmlDocument doc);
public HtmlNode? Select(HtmlNode node);
}
@@ -0,0 +1,9 @@
using HtmlAgilityPack;
namespace Beam.Abstractions;
public interface IManySelectionComposableDataProvider<out T> : IDataProvider<T> {
public T ManyGet(HtmlNode[] node);
public HtmlNode[]? SelectMany(HtmlDocument doc);
public HtmlNode[]? SelectMany(HtmlNode[] node);
}
+3 -2
View File
@@ -3,12 +3,14 @@ using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net; using System.Net;
using System.Net.Http;
using System.Net.Http.Json; using System.Net.Http.Json;
using System.Text; using System.Text;
using System.Text.Json; using System.Text.Json;
using System.Threading;
using System.Threading.Tasks; using System.Threading.Tasks;
namespace Beam { namespace Beam.Api;
public class ApiCall(HttpClient client, string uri, HttpMethod method, KeyValuePair<string, string[]>[] headers, object? requestData, object? body, params HashSet<HttpStatusCode> successCodes) { public class ApiCall(HttpClient client, string uri, HttpMethod method, KeyValuePair<string, string[]>[] headers, object? requestData, object? body, params HashSet<HttpStatusCode> successCodes) {
public HttpClient Client { get; } = client; public HttpClient Client { get; } = client;
public object? RequestData { get; } = requestData; public object? RequestData { get; } = requestData;
@@ -63,4 +65,3 @@ namespace Beam {
public static async Task<ApiResponse> Get(HttpClient client, string url, ILoggerFactory factory) public static async Task<ApiResponse> Get(HttpClient client, string url, ILoggerFactory factory)
=> await new ApiCall(client, url, HttpMethod.Get, [], null, null).GetResponse(factory.CreateLogger<ApiResponse>()); => await new ApiCall(client, url, HttpMethod.Get, [], null, null).GetResponse(factory.CreateLogger<ApiResponse>());
} }
}
+3 -2
View File
@@ -2,11 +2,12 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net; using System.Net;
using System.Net.Http;
using System.Reflection.PortableExecutable; using System.Reflection.PortableExecutable;
using System.Text; using System.Text;
using System.Threading.Tasks; using System.Threading.Tasks;
namespace Beam { namespace Beam.Api;
public class ApiCallBuilder(HttpClient client) { public class ApiCallBuilder(HttpClient client) {
HttpClient Client = client; HttpClient Client = client;
string Uri; string Uri;
@@ -77,4 +78,4 @@ namespace Beam {
return new ApiCall(Client, Uri, Method, Headers.Select((x) => new KeyValuePair<string, string[]>(x.Key, x.Value.ToArray())).ToArray(), Data, Body, SuccessCodes); return new ApiCall(Client, Uri, Method, Headers.Select((x) => new KeyValuePair<string, string[]>(x.Key, x.Value.ToArray())).ToArray(), Data, Body, SuccessCodes);
} }
} }
}
+8 -2
View File
@@ -1,9 +1,15 @@
// ApiCalls.cs // ApiCalls.cs
using System;
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Net; using System.Net;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
namespace Beam { namespace Beam.Api;
/// <summary> /// <summary>
/// Executes a batch of <see cref="ApiCall"/>s using either sequential or parallel strategy. /// Executes a batch of <see cref="ApiCall"/>s using either sequential or parallel strategy.
/// </summary> /// </summary>
@@ -45,4 +51,4 @@ namespace Beam {
return bag.OrderBy(x => x.idx).Select(x => x.res).ToList(); return bag.OrderBy(x => x.idx).Select(x => x.res).ToList();
} }
} }
}
+4 -2
View File
@@ -1,7 +1,10 @@
// ApiCallsBuilder.cs // ApiCallsBuilder.cs
using System;
using System.Collections.Generic;
using System.Net; using System.Net;
namespace Beam { namespace Beam.Api;
/// <summary> /// <summary>
/// Fluent builder for <see cref="ApiCalls"/>. /// Fluent builder for <see cref="ApiCalls"/>.
/// </summary> /// </summary>
@@ -44,4 +47,3 @@ namespace Beam {
return new ApiCalls(_calls, _parallelism); return new ApiCalls(_calls, _parallelism);
} }
} }
}
+7 -3
View File
@@ -1,10 +1,15 @@
using Microsoft.Extensions.Logging; using System;
using System.IO;
using Microsoft.Extensions.Logging;
using System.Net; using System.Net;
using System.Net.Http;
using System.Net.Http.Json; using System.Net.Http.Json;
using System.Text; using System.Text;
using System.Text.Json; using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
namespace Beam { namespace Beam.Api;
/// <summary> /// <summary>
/// Wrapper that lets the response body be read any number of times (even concurrently). /// Wrapper that lets the response body be read any number of times (even concurrently).
/// </summary> /// </summary>
@@ -78,5 +83,4 @@ namespace Beam {
if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content."); if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content.");
return Task.FromResult<Stream>(new MemoryStream(_buffer, writable: false)); return Task.FromResult<Stream>(new MemoryStream(_buffer, writable: false));
} }
}
} }
+1 -1
View File
@@ -7,7 +7,7 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging" Version="10.0.0-rc.1.25451.107" /> <PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.9" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
+7 -1
View File
@@ -44,8 +44,14 @@ namespace Beam.Downloaders {
//Logger?.LogInformation("MoveNextAsync() \n\t -> Links.Current = {} ", LinksEnumerator.Current.Link.AbsoluteUri); //Logger?.LogInformation("MoveNextAsync() \n\t -> Links.Current = {} ", LinksEnumerator.Current.Link.AbsoluteUri);
links.Add(new Ordered<string>(LinksEnumerator.Current, LastOrder++)); links.Add(new Ordered<string>(LinksEnumerator.Current, LastOrder++));
while (LinksEnumerator.MoveNext() && !string.IsNullOrWhiteSpace(LinksEnumerator.Current) && links.Count < idealLinkCount) while (links.Count < idealLinkCount && LinksEnumerator.MoveNext()) {
if (string.IsNullOrWhiteSpace(LinksEnumerator.Current)) {
return false;
}
links.Add(new Ordered<string>(LinksEnumerator.Current, LastOrder++)); links.Add(new Ordered<string>(LinksEnumerator.Current, LastOrder++));
}
//Logger?.LogInformation("MoveNextAsync() \n\t -> links.Count = {} ", links.Count); //Logger?.LogInformation("MoveNextAsync() \n\t -> links.Count = {} ", links.Count);
if (links.Count == 0) { if (links.Count == 0) {
Logger?.LogInformation("Out of links!"); Logger?.LogInformation("Out of links!");
+11 -1
View File
@@ -28,12 +28,22 @@ namespace Beam.Downloaders {
byte[] buffer = new byte[bufferSize]; byte[] buffer = new byte[bufferSize];
int inBuffer = 0; int inBuffer = 0;
long downloaded = 0; long downloaded = 0;
long? remaining() {
try {
return stream.Length - downloaded;
}
catch {
return null;
}
}
while ((inBuffer = stream.Read(buffer)) > 0) { while ((inBuffer = stream.Read(buffer)) > 0) {
downloaded += inBuffer; downloaded += inBuffer;
await destinationStream.WriteAsync(buffer.AsMemory(0, inBuffer), ct); await destinationStream.WriteAsync(buffer.AsMemory(0, inBuffer), ct);
progress?.Report(new DownloadReport() { progress?.Report(new DownloadReport() {
BytesDownloaded = inBuffer, BytesDownloaded = inBuffer,
BytesRemaining = stream.Length - downloaded BytesRemaining = remaining()
}); });
ct.ThrowIfCancellationRequested(); ct.ThrowIfCancellationRequested();
@@ -1,40 +0,0 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class AnchorCollectionDataProvider : IDataProvider<string[]> {
public IBinding? Content { get; set; }
public string? RelativeTo { get; set; }
private string GetAbsolute(string? @base, string relative) {
if (@base is null)
return relative;
if (@base.EndsWith('/'))
@base = @base[..^1];
if (relative.StartsWith('/'))
relative = relative[1..];
return @base + '/' + relative;
}
public string[] Get(HtmlDocument document) {
if (Content is null)
return [];
var node = Content.Select(document);
if (node is null)
return [];
List<string> links = [];
foreach (var child in node.Descendants())
links.Add(child.GetAttributeValue("href", ""));
return links.Where(x => !string.IsNullOrWhiteSpace(x)).ToArray();
}
}
}
-21
View File
@@ -1,21 +0,0 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class AnchorDataProvider : IDataProvider<string> {
public IBinding? Content { get; set; }
public string Get(HtmlDocument document) {
if (Content is null)
return "";
return Content.Select(document)?.GetAttributeValue("href", "") ?? "";
}
}
}
-16
View File
@@ -1,16 +0,0 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic {
public class ContentsArrayDataProvider : ContentsDataProvider, IDataProvider<string[]> {
public string[] ArrayDelimiters { get; set; } = [";"];
string[] IDataProvider<string[]>.Get(HtmlDocument document) {
if (Content is null)
return [];
return Content.Select(document)?.InnerText?.Split(ArrayDelimiters, StringSplitOptions.RemoveEmptyEntries) ?? [];
}
}
}
-21
View File
@@ -1,21 +0,0 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class ContentsDataProvider : IDataProvider<string> {
public IBinding? Content { get; set; }
public string Get(HtmlDocument document) {
if (Content is null)
return "";
return Content.Select(document)?.InnerText ?? "";
}
}
}
@@ -28,7 +28,7 @@ public class DataProviderJsonTypeInfoResolver : DefaultJsonTypeInfoResolver {
new JsonDerivedType(typeof(ContentsDataProvider), "single"), new JsonDerivedType(typeof(ContentsDataProvider), "single"),
new JsonDerivedType(typeof(DropDownDataProvider), "dropdown"), new JsonDerivedType(typeof(DropDownDataProvider), "dropdown"),
new JsonDerivedType(typeof(AnchorCollectionDataProvider), "anchor-list"), new JsonDerivedType(typeof(AnchorCollectionDataProvider), "anchor-list"),
new JsonDerivedType(typeof(AnchorDataProvider), "anchor") new JsonDerivedType(typeof(AnchorDataProvider), "anchor"),
} }
}; };
} }
@@ -0,0 +1,83 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
/// <summary>
/// Allows composition of different data providers to adapt to different types of data.
/// </summary>
/// <typeparam name="T"></typeparam>
public class ComposeDataProviders<T> : IComposableDataProvider<T> {
public required IComposableDataProvider<object>[] SelectWith { get; init; }
public required IComposableDataProvider<T> GetWith { get; init; }
private ComposeDataProviders() {}
public static ComposeDataProviders<T> Create(IComposableDataProvider<object> selectWith, IComposableDataProvider<T> getWith) {
return new ComposeDataProviders<T>() {
GetWith = getWith,
SelectWith = [selectWith]
};
}
public static ComposeDataProviders<T> Create(IComposableDataProvider<object>[] selectWiths, IComposableDataProvider<T> getWith) {
return new ComposeDataProviders<T>() {
GetWith = getWith,
SelectWith = selectWiths
};
}
/// <summary>
/// Composes the data providers, first selecting a node with <see cref="SelectWith"/>, then getting the data with <see cref="GetWith"/>.
/// </summary>
/// <exception cref="Exception">Throws when <see cref="SelectWith"/> returns a null value.</exception>
/// <param name="document"></param>
/// <returns></returns>
public T Get(HtmlDocument document) {
var selected = Select(document);
if (selected is null)
throw new Exception("Selection operation failed.");
return GetWith.Get(selected);
}
/// <summary>
/// Uses the <see cref="GetWith"/> data provider to get the data from the supplied node.
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
public T Get(HtmlNode node) {
return GetWith.Get(node);
}
/// <summary>
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
/// </summary>
/// <param name="doc"></param>
/// <returns></returns>
public HtmlNode? Select(HtmlDocument doc) {
var selected = SelectWith[0].Select(doc);
foreach(var provider in SelectWith.Skip(1)) {
if (selected is null)
return null;
selected = provider.Select(selected);
}
return selected;
}
/// <summary>
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
/// </summary>
/// <param name="doc"></param>
/// <returns></returns>
public HtmlNode? Select(HtmlNode node) {
var selected = SelectWith[0].Select(node);
foreach(var provider in SelectWith.Skip(1)) {
if (selected is null)
return null;
selected = provider.Select(selected);
}
return selected;
}
}
@@ -0,0 +1,20 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public class ManyAnchorsDataProvider : AnchorDataProvider, IManySelectionComposableDataProvider<string[]> {
public new string[] Get(HtmlDocument document) {
return [base.Get(document)];
}
public string[] ManyGet(HtmlNode[] node) {
return node.Select(x => base.Get(x)).ToArray();
}
public HtmlNode[]? SelectMany(HtmlDocument doc) {
var k = Select(doc);
return k == null ? null : [k];
}
public HtmlNode[]? SelectMany(HtmlNode[] node) {
return node.Select(x => Select(x) ?? null).Where(x => x is not null).Cast<HtmlNode>().ToArray();
}
}
@@ -0,0 +1,80 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public class ManyComposeDataProviders<T> : IManySelectionComposableDataProvider<T> {
public required IManySelectionComposableDataProvider<object>[] SelectWith { get; init; }
public required IManySelectionComposableDataProvider<T> GetWith { get; init; }
private ManyComposeDataProviders() {}
public static ManyComposeDataProviders<T> Create(IManySelectionComposableDataProvider<object> selectWith, IManySelectionComposableDataProvider<T> getWith) {
return new ManyComposeDataProviders<T>() {
GetWith = getWith,
SelectWith = [selectWith]
};
}
public static ManyComposeDataProviders<T> Create(IManySelectionComposableDataProvider<object>[] selectWiths, IManySelectionComposableDataProvider<T> getWith) {
return new ManyComposeDataProviders<T>() {
GetWith = getWith,
SelectWith = selectWiths
};
}
/// <summary>
/// Composes the data providers, first selecting a node with <see cref="SelectWith"/>, then getting the data with <see cref="GetWith"/>.
/// </summary>
/// <exception cref="Exception">Throws when <see cref="SelectWith"/> returns a null value.</exception>
/// <param name="document"></param>
/// <returns></returns>
public T Get(HtmlDocument document) {
var selected = SelectMany(document);
if (selected is null)
throw new Exception("Selection operation failed.");
return GetWith.ManyGet(selected);
}
/// <summary>
/// Uses the <see cref="GetWith"/> data provider to get the data from the supplied node.
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
public T ManyGet(HtmlNode[] node) {
return GetWith.ManyGet(node);
}
/// <summary>
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
/// </summary>
/// <param name="doc"></param>
/// <returns></returns>
public HtmlNode[]? SelectMany(HtmlDocument doc) {
var selected = SelectWith[0].SelectMany(doc);
foreach(var provider in SelectWith.Skip(1)) {
if (selected is null)
return null;
selected = provider.SelectMany(selected);
}
return selected;
}
/// <summary>
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
/// </summary>
/// <param name="doc"></param>
/// <returns></returns>
public HtmlNode[]? SelectMany(HtmlNode[] node) {
var selected = SelectWith[0].SelectMany(node);
foreach(var provider in SelectWith.Skip(1)) {
if (selected is null)
return null;
selected = provider.SelectMany(selected);
}
return selected;
}
}
@@ -0,0 +1,32 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public enum RelationType {
Parent,
Child,
}
public class RelationalDataProvider : IComposableDataProvider<HtmlNode?> {
public RelationType RelationType { get; set; } = RelationType.Parent;
public IBinding? Content { get; set; }
public HtmlNode? Get(HtmlDocument document) {
return Select(document);
}
public HtmlNode? Get(HtmlNode node) {
return Select(node);
}
public HtmlNode? Select(HtmlDocument doc) {
return Select(Content?.Select(doc) ?? doc.DocumentNode);
}
public HtmlNode? Select(HtmlNode node) {
return RelationType switch {
RelationType.Parent => node.ParentNode,
RelationType.Child => node.FirstChild,
_ => throw new NotSupportedException()
};
}
}
@@ -0,0 +1,155 @@
using System.Text.RegularExpressions;
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public enum SearchStrategy {
DepthFirst,
BreadthFirst,
}
public enum SearchStringDefaultSelection {
First,
Last,
GreatestChildren,
Any,
None,
}
public class SearchStringOptions {
public required bool SearchStringIsRegex { get; set; }
public required bool SearchInBody { get; set; }
public required string? SearchInAttribute { get; set; }
/// <summary>
/// Only used when both <see cref="SearchInBody"/> and <see cref="SearchInAttribute"/> are false/null, or no match is found for the search criteria.
/// </summary>
public SearchStringDefaultSelection DefaultSelection { get; set; } = SearchStringDefaultSelection.First;
/// <summary>
/// Only used when <see cref="SearchStringIsRegex"/> is false.
/// </summary>
public IEqualityComparer<string> UseComparer { get; set; } = StringComparer.CurrentCulture;
}
public class SelectDataProvider : IComposableDataProvider<HtmlNode?>, IManySelectionComposableDataProvider<HtmlNode?> {
public SearchStrategy SearchStrategyType { get; set; } = SearchStrategy.DepthFirst;
public SearchStringOptions SearchStringOptions { get; set; } = new SearchStringOptions() {
SearchStringIsRegex = false,
SearchInBody = true,
SearchInAttribute = null
};
public string? SearchString { get; set; }
public IBinding? Content { get; set; }
/// <summary>
/// Returns the first node that matches the search criteria.
/// </summary>
/// <param name="document"></param>
/// <returns></returns>
public HtmlNode? Get(HtmlDocument document) {
return Select(document);
}
/// <summary>
/// Returns the first child node that matches the search criteria.
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
public HtmlNode? Get(HtmlNode node) {
return Select(node);
}
public HtmlNode? Get(HtmlNode[] node) {
throw new NotSupportedException();
}
public HtmlNode[]? _Select(HtmlNode node) {
LinkedList<HtmlNode> searchSet = new();
LinkedListNode<HtmlNode> currentNode = searchSet.AddLast(node);
HashSet<HtmlNode> visited = [node];
void breadthFirst(HtmlNode node) {
foreach (var child in node.ChildNodes) {
if (visited.Contains(child))
continue;
searchSet.AddLast(child);
visited.Add(child);
}
}
void depthFirst(HtmlNode node) {
foreach (var child in node.ChildNodes.Reverse()) {
if (visited.Contains(child))
continue;
searchSet.AddAfter(currentNode, child);
visited.Add(child);
}
}
Action<HtmlNode> enqueueStartegy = SearchStrategyType switch {
SearchStrategy.BreadthFirst => breadthFirst,
SearchStrategy.DepthFirst => depthFirst,
_ => throw new NotSupportedException()
};
var bestCandidate = currentNode.Value;
List<HtmlNode> selected = [];
do {
var n = currentNode.Value;
if (SearchStringOptions.SearchInBody)
if (SearchStringOptions.SearchStringIsRegex && Regex.IsMatch(n.InnerText ?? "", SearchString ?? ""))
selected.Add(n);
else if (SearchStringOptions.UseComparer.Equals(n.InnerText, SearchString ?? ""))
selected.Add(n);
if (SearchStringOptions.SearchInAttribute is not null)
if (SearchStringOptions.SearchStringIsRegex && n.GetAttributeValue(SearchStringOptions.SearchInAttribute, null) != null &&
Regex.IsMatch(n.GetAttributeValue(SearchStringOptions.SearchInAttribute, ""), SearchString ?? ""))
selected.Add(n);
else if (SearchStringOptions.UseComparer.Equals(n.GetAttributeValue(SearchStringOptions.SearchInAttribute, null), SearchString ?? ""))
selected.Add(n);
switch (SearchStringOptions.DefaultSelection) {
case SearchStringDefaultSelection.GreatestChildren:
if (n.ChildNodes.Count > bestCandidate.ChildNodes.Count)
bestCandidate = n;
break;
case SearchStringDefaultSelection.Last:
bestCandidate = n;
break;
case SearchStringDefaultSelection.Any:
case SearchStringDefaultSelection.First:
case SearchStringDefaultSelection.None:
default:
break;
}
enqueueStartegy(n);
} while ((currentNode = currentNode.Next!) != null);
if (selected.Count == 0 && SearchStringOptions.DefaultSelection != SearchStringDefaultSelection.None)
selected.Add(bestCandidate);
return selected.ToArray();
}
public HtmlNode? Select(HtmlDocument document) {
return Select(Content?.Select(document) ?? document.DocumentNode);
}
public HtmlNode? Select(HtmlNode node) {
return _Select(node)?.FirstOrDefault();
}
public HtmlNode? ManyGet(HtmlNode[] node) {
throw new NotSupportedException();
}
public HtmlNode[]? SelectMany(HtmlDocument doc) {
return _Select(Content?.Select(doc) ?? doc.DocumentNode);
}
public HtmlNode[]? SelectMany(HtmlNode[] node) {
return node.SelectMany(x => _Select(x) ?? []).ToArray();
}
}
-53
View File
@@ -1,53 +0,0 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Runtime.InteropServices.Marshalling;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class DropDownDataProvider
: IDataProvider<string>,
IDataProvider<string[]> {
public IBinding? Content { get; set; }
public string? RelativeTo { get; set; }
private string GetAbsolute(string? @base, string relative) {
if (@base is null)
return relative;
if (@base.EndsWith('/'))
@base = @base[..^1];
if (relative.StartsWith('/'))
relative = relative[1..];
return @base + '/' + relative;
}
public string[] Get(HtmlDocument document) {
if (Content is null)
return [];
var node = Content.Select(document);
if (node is null)
return [];
List<string> links = [];
foreach (var child in node.ChildNodes.Where(x => x.Name == "option")) {
var childValue = child.GetAttributeValue("value", null);
if (!Uri.TryCreate(GetAbsolute(RelativeTo, childValue), UriKind.Absolute, out _))
continue;
links.Add((GetAbsolute(RelativeTo, childValue)));
}
return links.ToArray();
}
string IDataProvider<string>.Get(HtmlDocument document) {
return JsonSerializer.Serialize(this.Get(document));
}
}
}
-28
View File
@@ -1,28 +0,0 @@
using HtmlAgilityPack;
using System.Text;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class ListContentDataProvider : IDataProvider<string> {
public IBinding? Content { get; set; }
public string Get(HtmlDocument document) {
if (Content is null)
return "";
var node = Content.Select(document);
if (node is null)
return "";
StringBuilder content = new();
foreach(var childNode in node.ChildNodes.SkipLast(1)) {
if (childNode.Name != "li")
continue;
content.Append(childNode.InnerText.Trim() + ";");
}
content.Append(node.ChildNodes.Last().InnerText.Trim());
return content.ToString();
}
}
}
@@ -1,31 +0,0 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class ParagraphedContentDataProvider : IDataProvider<string> {
public IBinding? Content { get; set; }
public string Get(HtmlDocument document) {
if (Content is null)
return "";
var node = Content.Select(document);
if (node is null)
return "";
StringBuilder content = new();
foreach(var childNode in node.ChildNodes) {
if (childNode.Name != "p")
continue;
content.AppendLine(childNode.InnerText);
}
return content.ToString();
}
}
}
+2
View File
@@ -14,5 +14,7 @@ public interface IContextStage<RawType, OutType> {
IContextStage<RawType, OutType> UseFragments(); IContextStage<RawType, OutType> UseFragments();
IContextStage<RawType, OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator); IContextStage<RawType, OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator);
IContextStage<RawType, OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config); IContextStage<RawType, OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config);
IContextStage<RawType, OutType> ConfigureUnitDownloaderOptions(
Action<UnitDownloaderOptionsBuilder<RawType, OutType>> configure);
DownloadEnumerable<OutType> Build(); DownloadEnumerable<OutType> Build();
} }
+32 -13
View File
@@ -8,10 +8,10 @@ using OpenQA.Selenium.Edge;
namespace Beam.Stealth { namespace Beam.Stealth {
public enum Browser { public enum Browser {
Firefox, Firefox = 0,
Chrome, Chrome = 1,
Chromium, Chromium = 2,
Edge Edge = 3
} }
public sealed class StealthConfig : IDisposable { public sealed class StealthConfig : IDisposable {
@@ -73,22 +73,41 @@ namespace Beam.Stealth {
bool showBrowser = false, bool showBrowser = false,
string? downloadDir = null, string? downloadDir = null,
TimeSpan? timeOut = null, TimeSpan? timeOut = null,
Browser browser = Browser.Firefox, Browser preferredBrowser = Browser.Firefox,
ILogger? logger = null) { ILogger? logger = null) {
// pick or create a dedicated download folder // pick or create a dedicated download folder
downloadDir ??= Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); downloadDir ??= Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
Directory.CreateDirectory(downloadDir); Directory.CreateDirectory(downloadDir);
bool headless = !showBrowser; bool headless = !showBrowser;
IWebDriver? driver = null;
List<Exception>? errors = [];
IWebDriver driver = browser switch { while (preferredBrowser <= Browser.Edge) {
Browser.Chrome or Browser.Chromium try {
=> new ChromeDriver(GetChromeOptions(downloadDir, headless)), driver = preferredBrowser switch {
Browser.Edge Browser.Chrome or Browser.Chromium
=> new EdgeDriver(GetEdgeOptions(downloadDir, headless)), => new ChromeDriver(GetChromeOptions(downloadDir, headless)),
Browser.Firefox or _ Browser.Edge
=> new FirefoxDriver(GetFirefoxOptions(downloadDir, headless)), => new EdgeDriver(GetEdgeOptions(downloadDir, headless)),
}; Browser.Firefox or _
=> new FirefoxDriver(GetFirefoxOptions(downloadDir, headless)),
};
}
catch (Exception e) {
if (preferredBrowser != Browser.Edge)
logger?.LogWarning(e, "Failed to create driver for browser {Browser}, falling back to {Fallback}", preferredBrowser, Enum.GetName(preferredBrowser + 1));
else {
logger?.LogCritical(e, "Failed to create driver for browser {Browser}, no more fallback remaining!", preferredBrowser);
}
preferredBrowser++;
errors.Add(e);
}
}
if (driver is null)
throw new AggregateException(errors);
return new StealthConfig(downloadDir) { return new StealthConfig(downloadDir) {
ShowBrowser = showBrowser, ShowBrowser = showBrowser,
+19 -4
View File
@@ -7,20 +7,29 @@
<Title>Beam</Title> <Title>Beam</Title>
<Authors>aeqw89</Authors> <Authors>aeqw89</Authors>
<Company>qwsdcvghyu</Company> <Company>qwsdcvghyu</Company>
<Version>2.2.0</Version> <Version>2.4.5</Version>
<Description>A library for downloading internet resources</Description> <Description>A library for downloading internet resources</Description>
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl> <PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl> <RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
<PackageId>aeqw89.Beam</PackageId> <PackageId>aeqw89.Beam</PackageId>
<PackageVersion>2.2.0</PackageVersion> <PackageVersion>2.4.5</PackageVersion>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\Beam.Api\Beam.Api.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam.Data\Beam.Data.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj"> <ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj"> <ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\Beam.Fluent\Beam.Fluent.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj"> <ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
</ProjectReference> </ProjectReference>
@@ -33,6 +42,9 @@
<ProjectReference Include="..\Beam\Beam.csproj"> <ProjectReference Include="..\Beam\Beam.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
</ProjectReference> </ProjectReference>
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.9">
<Transitive>true</Transitive>
</PackageReference>
<PackageReference Include="aeqw89.DataKeys" Version="2.1.1"> <PackageReference Include="aeqw89.DataKeys" Version="2.1.1">
<Transitive>true</Transitive> <Transitive>true</Transitive>
</PackageReference> </PackageReference>
@@ -45,10 +57,13 @@
<PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13"> <PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13">
<Transitive>true</Transitive> <Transitive>true</Transitive>
</PackageReference> </PackageReference>
<PackageReference Include="Microsoft.Playwright" Version="1.52.0"> <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9">
<Transitive>true</Transitive> <Transitive>true</Transitive>
</PackageReference> </PackageReference>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9"> <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.9">
<Transitive>true</Transitive>
</PackageReference>
<PackageReference Include="Microsoft.Playwright" Version="1.52.0">
<Transitive>true</Transitive> <Transitive>true</Transitive>
</PackageReference> </PackageReference>
<PackageReference Include="Selenium.WebDriver" Version="4.34.0"> <PackageReference Include="Selenium.WebDriver" Version="4.34.0">
+20 -5
View File
@@ -7,20 +7,29 @@
<Title>Beam</Title> <Title>Beam</Title>
<Authors>aeqw89</Authors> <Authors>aeqw89</Authors>
<Company>qwsdcvghyu</Company> <Company>qwsdcvghyu</Company>
<Version>2.1.6</Version> <Version>2.4.4</Version>
<Description>A library for downloading internet resources</Description> <Description>A library for downloading internet resources</Description>
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl> <PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl> <RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
<PackageId>aeqw89.Beam</PackageId> <PackageId>aeqw89.Beam</PackageId>
<PackageVersion>2.1.6</PackageVersion> <PackageVersion>2.4.4</PackageVersion>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\Beam.Api\Beam.Api.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam.Data\Beam.Data.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj"> <ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj"> <ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\Beam.Fluent\Beam.Fluent.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj"> <ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
</ProjectReference> </ProjectReference>
@@ -33,6 +42,9 @@
<ProjectReference Include="..\Beam\Beam.csproj"> <ProjectReference Include="..\Beam\Beam.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
</ProjectReference> </ProjectReference>
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.9">
<Transitive>true</Transitive>
</PackageReference>
<PackageReference Include="aeqw89.DataKeys" Version="2.1.1"> <PackageReference Include="aeqw89.DataKeys" Version="2.1.1">
<Transitive>true</Transitive> <Transitive>true</Transitive>
</PackageReference> </PackageReference>
@@ -45,16 +57,19 @@
<PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13"> <PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13">
<Transitive>true</Transitive> <Transitive>true</Transitive>
</PackageReference> </PackageReference>
<PackageReference Include="Microsoft.Playwright" Version="1.52.0"> <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9">
<Transitive>true</Transitive> <Transitive>true</Transitive>
</PackageReference> </PackageReference>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9"> <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.9">
<Transitive>true</Transitive>
</PackageReference>
<PackageReference Include="Microsoft.Playwright" Version="1.52.0">
<Transitive>true</Transitive> <Transitive>true</Transitive>
</PackageReference> </PackageReference>
<PackageReference Include="Selenium.WebDriver" Version="4.34.0"> <PackageReference Include="Selenium.WebDriver" Version="4.34.0">
<Transitive>true</Transitive> <Transitive>true</Transitive>
</PackageReference> </PackageReference>
<PackageReference Include="System.Linq.Async" Version="6.0.1"> <PackageReference Include="System.Linq.Async" Version="6.0.3">
<Transitive>true</Transitive> <Transitive>true</Transitive>
</PackageReference> </PackageReference>
<PackageReference Include="EntityFramework" Version="6.5.1"> <PackageReference Include="EntityFramework" Version="6.5.1">