diff --git a/Beam.Abstractions/IComposableDataProvider.cs b/Beam.Abstractions/IComposableDataProvider.cs new file mode 100644 index 0000000..9867c4b --- /dev/null +++ b/Beam.Abstractions/IComposableDataProvider.cs @@ -0,0 +1,12 @@ +using HtmlAgilityPack; + +namespace Beam.Abstractions; + +public interface IComposableDataProvider : IComposableDataProvider { } + + +public interface IComposableDataProvider : IDataProvider { + public T Get(HtmlNode node); + public HtmlNode? Select(HtmlDocument doc); + public HtmlNode? Select(HtmlNode node); +} \ No newline at end of file diff --git a/Beam.Abstractions/IManySelectionComposableDataProvider.cs b/Beam.Abstractions/IManySelectionComposableDataProvider.cs new file mode 100644 index 0000000..02c45a5 --- /dev/null +++ b/Beam.Abstractions/IManySelectionComposableDataProvider.cs @@ -0,0 +1,9 @@ +using HtmlAgilityPack; + +namespace Beam.Abstractions; + +public interface IManySelectionComposableDataProvider : IDataProvider { + public T ManyGet(HtmlNode[] node); + public HtmlNode[]? SelectMany(HtmlDocument doc); + public HtmlNode[]? SelectMany(HtmlNode[] node); +} \ No newline at end of file diff --git a/Beam.Api/ApiCall.cs b/Beam.Api/ApiCall.cs index 2a09e57..13608f5 100644 --- a/Beam.Api/ApiCall.cs +++ b/Beam.Api/ApiCall.cs @@ -3,12 +3,14 @@ using System; using System.Collections.Generic; using System.Linq; using System.Net; +using System.Net.Http; using System.Net.Http.Json; using System.Text; using System.Text.Json; +using System.Threading; using System.Threading.Tasks; -namespace Beam { +namespace Beam.Api; public class ApiCall(HttpClient client, string uri, HttpMethod method, KeyValuePair[] headers, object? requestData, object? body, params HashSet successCodes) { public HttpClient Client { get; } = client; public object? RequestData { get; } = requestData; @@ -63,4 +65,3 @@ namespace Beam { public static async Task Get(HttpClient client, string url, ILoggerFactory factory) => await new ApiCall(client, url, HttpMethod.Get, [], null, null).GetResponse(factory.CreateLogger()); } -} diff --git a/Beam.Api/ApiCallBuilder.cs b/Beam.Api/ApiCallBuilder.cs index 67745fc..665a5b9 100644 --- a/Beam.Api/ApiCallBuilder.cs +++ b/Beam.Api/ApiCallBuilder.cs @@ -2,11 +2,12 @@ using System.Collections.Generic; using System.Linq; using System.Net; +using System.Net.Http; using System.Reflection.PortableExecutable; using System.Text; using System.Threading.Tasks; -namespace Beam { +namespace Beam.Api; public class ApiCallBuilder(HttpClient client) { HttpClient Client = client; string Uri; @@ -77,4 +78,4 @@ namespace Beam { return new ApiCall(Client, Uri, Method, Headers.Select((x) => new KeyValuePair(x.Key, x.Value.ToArray())).ToArray(), Data, Body, SuccessCodes); } } -} + diff --git a/Beam.Api/ApiCalls.cs b/Beam.Api/ApiCalls.cs index 845cb0d..d45f6ed 100644 --- a/Beam.Api/ApiCalls.cs +++ b/Beam.Api/ApiCalls.cs @@ -1,9 +1,15 @@ // ApiCalls.cs + +using System; using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; using System.Net; +using System.Threading; +using System.Threading.Tasks; using Microsoft.Extensions.Logging; -namespace Beam { +namespace Beam.Api; /// /// Executes a batch of s using either sequential or parallel strategy. /// @@ -45,4 +51,4 @@ namespace Beam { return bag.OrderBy(x => x.idx).Select(x => x.res).ToList(); } } -} + diff --git a/Beam.Api/ApiCallsBuilder.cs b/Beam.Api/ApiCallsBuilder.cs index 760a55b..3724765 100644 --- a/Beam.Api/ApiCallsBuilder.cs +++ b/Beam.Api/ApiCallsBuilder.cs @@ -1,7 +1,10 @@ // ApiCallsBuilder.cs + +using System; +using System.Collections.Generic; using System.Net; -namespace Beam { +namespace Beam.Api; /// /// Fluent builder for . /// @@ -43,5 +46,4 @@ namespace Beam { throw new InvalidOperationException("At least one ApiCall is required."); return new ApiCalls(_calls, _parallelism); } - } -} + } \ No newline at end of file diff --git a/Beam.Api/ApiResponse.cs b/Beam.Api/ApiResponse.cs index 3fc2548..55a22d8 100644 --- a/Beam.Api/ApiResponse.cs +++ b/Beam.Api/ApiResponse.cs @@ -1,10 +1,15 @@ -using Microsoft.Extensions.Logging; +using System; +using System.IO; +using Microsoft.Extensions.Logging; using System.Net; +using System.Net.Http; using System.Net.Http.Json; using System.Text; using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; -namespace Beam { +namespace Beam.Api; /// /// Wrapper that lets the response body be read any number of times (even concurrently). /// @@ -78,5 +83,4 @@ namespace Beam { if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content."); return Task.FromResult(new MemoryStream(_buffer, writable: false)); } - } } diff --git a/Beam.Api/Beam.Api.csproj b/Beam.Api/Beam.Api.csproj index bcbfc5d..85abac3 100644 --- a/Beam.Api/Beam.Api.csproj +++ b/Beam.Api/Beam.Api.csproj @@ -7,7 +7,7 @@ - + diff --git a/Beam.Downloaders/SequentialDownloader.cs b/Beam.Downloaders/SequentialDownloader.cs index bc35f16..378000b 100644 --- a/Beam.Downloaders/SequentialDownloader.cs +++ b/Beam.Downloaders/SequentialDownloader.cs @@ -44,8 +44,14 @@ namespace Beam.Downloaders { //Logger?.LogInformation("MoveNextAsync() \n\t -> Links.Current = {} ", LinksEnumerator.Current.Link.AbsoluteUri); links.Add(new Ordered(LinksEnumerator.Current, LastOrder++)); - while (LinksEnumerator.MoveNext() && !string.IsNullOrWhiteSpace(LinksEnumerator.Current) && links.Count < idealLinkCount) + while (links.Count < idealLinkCount && LinksEnumerator.MoveNext()) { + if (string.IsNullOrWhiteSpace(LinksEnumerator.Current)) { + return false; + } + links.Add(new Ordered(LinksEnumerator.Current, LastOrder++)); + } + //Logger?.LogInformation("MoveNextAsync() \n\t -> links.Count = {} ", links.Count); if (links.Count == 0) { Logger?.LogInformation("Out of links!"); diff --git a/Beam.Downloaders/UnitDownloader.cs b/Beam.Downloaders/UnitDownloader.cs index 2d1c774..400f47a 100644 --- a/Beam.Downloaders/UnitDownloader.cs +++ b/Beam.Downloaders/UnitDownloader.cs @@ -28,12 +28,22 @@ namespace Beam.Downloaders { byte[] buffer = new byte[bufferSize]; int inBuffer = 0; long downloaded = 0; + + long? remaining() { + try { + return stream.Length - downloaded; + } + catch { + return null; + } + } + while ((inBuffer = stream.Read(buffer)) > 0) { downloaded += inBuffer; await destinationStream.WriteAsync(buffer.AsMemory(0, inBuffer), ct); progress?.Report(new DownloadReport() { BytesDownloaded = inBuffer, - BytesRemaining = stream.Length - downloaded + BytesRemaining = remaining() }); ct.ThrowIfCancellationRequested(); diff --git a/Beam.Dynamic/AnchorCollectionDataProvider.cs b/Beam.Dynamic/AnchorCollectionDataProvider.cs deleted file mode 100644 index 339c6ee..0000000 --- a/Beam.Dynamic/AnchorCollectionDataProvider.cs +++ /dev/null @@ -1,40 +0,0 @@ -using HtmlAgilityPack; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Beam.Abstractions; - -namespace Beam.Dynamic { - public class AnchorCollectionDataProvider : IDataProvider { - public IBinding? Content { get; set; } - public string? RelativeTo { get; set; } - - private string GetAbsolute(string? @base, string relative) { - if (@base is null) - return relative; - - if (@base.EndsWith('/')) - @base = @base[..^1]; - if (relative.StartsWith('/')) - relative = relative[1..]; - return @base + '/' + relative; - } - - public string[] Get(HtmlDocument document) { - if (Content is null) - return []; - - var node = Content.Select(document); - if (node is null) - return []; - - List links = []; - foreach (var child in node.Descendants()) - links.Add(child.GetAttributeValue("href", "")); - - return links.Where(x => !string.IsNullOrWhiteSpace(x)).ToArray(); - } - } -} diff --git a/Beam.Dynamic/AnchorDataProvider.cs b/Beam.Dynamic/AnchorDataProvider.cs deleted file mode 100644 index d4b91e6..0000000 --- a/Beam.Dynamic/AnchorDataProvider.cs +++ /dev/null @@ -1,21 +0,0 @@ -using HtmlAgilityPack; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Beam.Abstractions; - -namespace Beam.Dynamic { - public class AnchorDataProvider : IDataProvider { - public IBinding? Content { get; set; } - - public string Get(HtmlDocument document) { - if (Content is null) - return ""; - - return Content.Select(document)?.GetAttributeValue("href", "") ?? ""; - - } - } -} diff --git a/Beam.Dynamic/ContentsArrayDataProvider.cs b/Beam.Dynamic/ContentsArrayDataProvider.cs deleted file mode 100644 index 6bdd4ed..0000000 --- a/Beam.Dynamic/ContentsArrayDataProvider.cs +++ /dev/null @@ -1,16 +0,0 @@ -using Beam.Abstractions; -using HtmlAgilityPack; - -namespace Beam.Dynamic { - public class ContentsArrayDataProvider : ContentsDataProvider, IDataProvider { - public string[] ArrayDelimiters { get; set; } = [";"]; - - string[] IDataProvider.Get(HtmlDocument document) { - if (Content is null) - return []; - - return Content.Select(document)?.InnerText?.Split(ArrayDelimiters, StringSplitOptions.RemoveEmptyEntries) ?? []; - } - } - -} diff --git a/Beam.Dynamic/ContentsDataProvider.cs b/Beam.Dynamic/ContentsDataProvider.cs deleted file mode 100644 index 0ecb361..0000000 --- a/Beam.Dynamic/ContentsDataProvider.cs +++ /dev/null @@ -1,21 +0,0 @@ -using HtmlAgilityPack; -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Beam.Abstractions; - -namespace Beam.Dynamic { - public class ContentsDataProvider : IDataProvider { - public IBinding? Content { get; set; } - - public string Get(HtmlDocument document) { - if (Content is null) - return ""; - - return Content.Select(document)?.InnerText ?? ""; - } - } -} diff --git a/Beam.Dynamic/DataProviderJsonTypeInfoResolver.cs b/Beam.Dynamic/DataProviderJsonTypeInfoResolver.cs index 620ab38..909f7ce 100644 --- a/Beam.Dynamic/DataProviderJsonTypeInfoResolver.cs +++ b/Beam.Dynamic/DataProviderJsonTypeInfoResolver.cs @@ -28,7 +28,7 @@ public class DataProviderJsonTypeInfoResolver : DefaultJsonTypeInfoResolver { new JsonDerivedType(typeof(ContentsDataProvider), "single"), new JsonDerivedType(typeof(DropDownDataProvider), "dropdown"), new JsonDerivedType(typeof(AnchorCollectionDataProvider), "anchor-list"), - new JsonDerivedType(typeof(AnchorDataProvider), "anchor") + new JsonDerivedType(typeof(AnchorDataProvider), "anchor"), } }; } diff --git a/Beam.Dynamic/DataProviders/ComposeDataProviders.cs b/Beam.Dynamic/DataProviders/ComposeDataProviders.cs new file mode 100644 index 0000000..82bb210 --- /dev/null +++ b/Beam.Dynamic/DataProviders/ComposeDataProviders.cs @@ -0,0 +1,83 @@ +using Beam.Abstractions; +using HtmlAgilityPack; + +namespace Beam.Dynamic; + +/// +/// Allows composition of different data providers to adapt to different types of data. +/// +/// +public class ComposeDataProviders : IComposableDataProvider { + public required IComposableDataProvider[] SelectWith { get; init; } + public required IComposableDataProvider GetWith { get; init; } + + private ComposeDataProviders() {} + + public static ComposeDataProviders Create(IComposableDataProvider selectWith, IComposableDataProvider getWith) { + return new ComposeDataProviders() { + GetWith = getWith, + SelectWith = [selectWith] + }; + } + + public static ComposeDataProviders Create(IComposableDataProvider[] selectWiths, IComposableDataProvider getWith) { + return new ComposeDataProviders() { + GetWith = getWith, + SelectWith = selectWiths + }; + } + + /// + /// Composes the data providers, first selecting a node with , then getting the data with . + /// + /// Throws when returns a null value. + /// + /// + public T Get(HtmlDocument document) { + var selected = Select(document); + if (selected is null) + throw new Exception("Selection operation failed."); + return GetWith.Get(selected); + } + + /// + /// Uses the data provider to get the data from the supplied node. + /// + /// + /// + public T Get(HtmlNode node) { + return GetWith.Get(node); + } + + /// + /// Uses the data provider to select a node from the supplied document. + /// + /// + /// + public HtmlNode? Select(HtmlDocument doc) { + var selected = SelectWith[0].Select(doc); + foreach(var provider in SelectWith.Skip(1)) { + if (selected is null) + return null; + selected = provider.Select(selected); + } + + return selected; + } + + /// + /// Uses the data provider to select a node from the supplied document. + /// + /// + /// + public HtmlNode? Select(HtmlNode node) { + var selected = SelectWith[0].Select(node); + foreach(var provider in SelectWith.Skip(1)) { + if (selected is null) + return null; + selected = provider.Select(selected); + } + + return selected; + } +} \ No newline at end of file diff --git a/Beam.Dynamic/DataProviders/ManyAnchorsDataProvider.cs b/Beam.Dynamic/DataProviders/ManyAnchorsDataProvider.cs new file mode 100644 index 0000000..be14e77 --- /dev/null +++ b/Beam.Dynamic/DataProviders/ManyAnchorsDataProvider.cs @@ -0,0 +1,20 @@ +using Beam.Abstractions; +using HtmlAgilityPack; + +namespace Beam.Dynamic; + +public class ManyAnchorsDataProvider : AnchorDataProvider, IManySelectionComposableDataProvider { + public new string[] Get(HtmlDocument document) { + return [base.Get(document)]; + } + public string[] ManyGet(HtmlNode[] node) { + return node.Select(x => base.Get(x)).ToArray(); + } + public HtmlNode[]? SelectMany(HtmlDocument doc) { + var k = Select(doc); + return k == null ? null : [k]; + } + public HtmlNode[]? SelectMany(HtmlNode[] node) { + return node.Select(x => Select(x) ?? null).Where(x => x is not null).Cast().ToArray(); + } +} \ No newline at end of file diff --git a/Beam.Dynamic/DataProviders/ManyComposeDataProviders.cs b/Beam.Dynamic/DataProviders/ManyComposeDataProviders.cs new file mode 100644 index 0000000..80834b2 --- /dev/null +++ b/Beam.Dynamic/DataProviders/ManyComposeDataProviders.cs @@ -0,0 +1,80 @@ +using Beam.Abstractions; +using HtmlAgilityPack; + +namespace Beam.Dynamic; + + +public class ManyComposeDataProviders : IManySelectionComposableDataProvider { + public required IManySelectionComposableDataProvider[] SelectWith { get; init; } + public required IManySelectionComposableDataProvider GetWith { get; init; } + + private ManyComposeDataProviders() {} + + public static ManyComposeDataProviders Create(IManySelectionComposableDataProvider selectWith, IManySelectionComposableDataProvider getWith) { + return new ManyComposeDataProviders() { + GetWith = getWith, + SelectWith = [selectWith] + }; + } + + public static ManyComposeDataProviders Create(IManySelectionComposableDataProvider[] selectWiths, IManySelectionComposableDataProvider getWith) { + return new ManyComposeDataProviders() { + GetWith = getWith, + SelectWith = selectWiths + }; + } + + /// + /// Composes the data providers, first selecting a node with , then getting the data with . + /// + /// Throws when returns a null value. + /// + /// + public T Get(HtmlDocument document) { + var selected = SelectMany(document); + if (selected is null) + throw new Exception("Selection operation failed."); + return GetWith.ManyGet(selected); + } + + /// + /// Uses the data provider to get the data from the supplied node. + /// + /// + /// + public T ManyGet(HtmlNode[] node) { + return GetWith.ManyGet(node); + } + + /// + /// Uses the data provider to select a node from the supplied document. + /// + /// + /// + public HtmlNode[]? SelectMany(HtmlDocument doc) { + var selected = SelectWith[0].SelectMany(doc); + foreach(var provider in SelectWith.Skip(1)) { + if (selected is null) + return null; + selected = provider.SelectMany(selected); + } + + return selected; + } + + /// + /// Uses the data provider to select a node from the supplied document. + /// + /// + /// + public HtmlNode[]? SelectMany(HtmlNode[] node) { + var selected = SelectWith[0].SelectMany(node); + foreach(var provider in SelectWith.Skip(1)) { + if (selected is null) + return null; + selected = provider.SelectMany(selected); + } + + return selected; + } +} \ No newline at end of file diff --git a/Beam.Dynamic/DataProviders/RelationalDataProvider.cs b/Beam.Dynamic/DataProviders/RelationalDataProvider.cs new file mode 100644 index 0000000..6068412 --- /dev/null +++ b/Beam.Dynamic/DataProviders/RelationalDataProvider.cs @@ -0,0 +1,32 @@ +using Beam.Abstractions; +using HtmlAgilityPack; + +namespace Beam.Dynamic; + +public enum RelationType { + Parent, + Child, +} + +public class RelationalDataProvider : IComposableDataProvider { + + public RelationType RelationType { get; set; } = RelationType.Parent; + public IBinding? Content { get; set; } + + public HtmlNode? Get(HtmlDocument document) { + return Select(document); + } + public HtmlNode? Get(HtmlNode node) { + return Select(node); + } + public HtmlNode? Select(HtmlDocument doc) { + return Select(Content?.Select(doc) ?? doc.DocumentNode); + } + public HtmlNode? Select(HtmlNode node) { + return RelationType switch { + RelationType.Parent => node.ParentNode, + RelationType.Child => node.FirstChild, + _ => throw new NotSupportedException() + }; + } +} \ No newline at end of file diff --git a/Beam.Dynamic/DataProviders/SelectDataProvider.cs b/Beam.Dynamic/DataProviders/SelectDataProvider.cs new file mode 100644 index 0000000..184c241 --- /dev/null +++ b/Beam.Dynamic/DataProviders/SelectDataProvider.cs @@ -0,0 +1,155 @@ +using System.Text.RegularExpressions; +using Beam.Abstractions; +using HtmlAgilityPack; + +namespace Beam.Dynamic; + +public enum SearchStrategy { + DepthFirst, + BreadthFirst, +} + +public enum SearchStringDefaultSelection { + First, + Last, + GreatestChildren, + Any, + None, +} + +public class SearchStringOptions { + public required bool SearchStringIsRegex { get; set; } + public required bool SearchInBody { get; set; } + public required string? SearchInAttribute { get; set; } + + /// + /// Only used when both and are false/null, or no match is found for the search criteria. + /// + public SearchStringDefaultSelection DefaultSelection { get; set; } = SearchStringDefaultSelection.First; + + /// + /// Only used when is false. + /// + public IEqualityComparer UseComparer { get; set; } = StringComparer.CurrentCulture; +} + +public class SelectDataProvider : IComposableDataProvider, IManySelectionComposableDataProvider { + public SearchStrategy SearchStrategyType { get; set; } = SearchStrategy.DepthFirst; + public SearchStringOptions SearchStringOptions { get; set; } = new SearchStringOptions() { + SearchStringIsRegex = false, + SearchInBody = true, + SearchInAttribute = null + }; + + public string? SearchString { get; set; } + public IBinding? Content { get; set; } + + /// + /// Returns the first node that matches the search criteria. + /// + /// + /// + public HtmlNode? Get(HtmlDocument document) { + return Select(document); + } + /// + /// Returns the first child node that matches the search criteria. + /// + /// + /// + public HtmlNode? Get(HtmlNode node) { + return Select(node); + } + + public HtmlNode? Get(HtmlNode[] node) { + throw new NotSupportedException(); + } + + public HtmlNode[]? _Select(HtmlNode node) { + LinkedList searchSet = new(); + LinkedListNode currentNode = searchSet.AddLast(node); + HashSet visited = [node]; + + void breadthFirst(HtmlNode node) { + foreach (var child in node.ChildNodes) { + if (visited.Contains(child)) + continue; + searchSet.AddLast(child); + visited.Add(child); + } + } + + void depthFirst(HtmlNode node) { + foreach (var child in node.ChildNodes.Reverse()) { + if (visited.Contains(child)) + continue; + searchSet.AddAfter(currentNode, child); + visited.Add(child); + } + } + + Action enqueueStartegy = SearchStrategyType switch { + SearchStrategy.BreadthFirst => breadthFirst, + SearchStrategy.DepthFirst => depthFirst, + _ => throw new NotSupportedException() + }; + + var bestCandidate = currentNode.Value; + List selected = []; + + do { + var n = currentNode.Value; + if (SearchStringOptions.SearchInBody) + if (SearchStringOptions.SearchStringIsRegex && Regex.IsMatch(n.InnerText ?? "", SearchString ?? "")) + selected.Add(n); + else if (SearchStringOptions.UseComparer.Equals(n.InnerText, SearchString ?? "")) + selected.Add(n); + if (SearchStringOptions.SearchInAttribute is not null) + if (SearchStringOptions.SearchStringIsRegex && n.GetAttributeValue(SearchStringOptions.SearchInAttribute, null) != null && + Regex.IsMatch(n.GetAttributeValue(SearchStringOptions.SearchInAttribute, ""), SearchString ?? "")) + selected.Add(n); + else if (SearchStringOptions.UseComparer.Equals(n.GetAttributeValue(SearchStringOptions.SearchInAttribute, null), SearchString ?? "")) + selected.Add(n); + + switch (SearchStringOptions.DefaultSelection) { + case SearchStringDefaultSelection.GreatestChildren: + if (n.ChildNodes.Count > bestCandidate.ChildNodes.Count) + bestCandidate = n; + break; + case SearchStringDefaultSelection.Last: + bestCandidate = n; + break; + case SearchStringDefaultSelection.Any: + case SearchStringDefaultSelection.First: + case SearchStringDefaultSelection.None: + default: + break; + } + enqueueStartegy(n); + + } while ((currentNode = currentNode.Next!) != null); + + if (selected.Count == 0 && SearchStringOptions.DefaultSelection != SearchStringDefaultSelection.None) + selected.Add(bestCandidate); + + return selected.ToArray(); + } + + public HtmlNode? Select(HtmlDocument document) { + return Select(Content?.Select(document) ?? document.DocumentNode); + } + + public HtmlNode? Select(HtmlNode node) { + return _Select(node)?.FirstOrDefault(); + } + + public HtmlNode? ManyGet(HtmlNode[] node) { + throw new NotSupportedException(); + } + public HtmlNode[]? SelectMany(HtmlDocument doc) { + return _Select(Content?.Select(doc) ?? doc.DocumentNode); + } + public HtmlNode[]? SelectMany(HtmlNode[] node) { + return node.SelectMany(x => _Select(x) ?? []).ToArray(); + } +} \ No newline at end of file diff --git a/Beam.Dynamic/DropDownDataProvider.cs b/Beam.Dynamic/DropDownDataProvider.cs deleted file mode 100644 index 45e21b5..0000000 --- a/Beam.Dynamic/DropDownDataProvider.cs +++ /dev/null @@ -1,53 +0,0 @@ -using HtmlAgilityPack; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Runtime.InteropServices; -using System.Runtime.InteropServices.Marshalling; -using System.Text; -using System.Text.Json; -using System.Threading.Tasks; -using Beam.Abstractions; - -namespace Beam.Dynamic { - public class DropDownDataProvider - : IDataProvider, - IDataProvider { - public IBinding? Content { get; set; } - public string? RelativeTo { get; set; } - - private string GetAbsolute(string? @base, string relative) { - if (@base is null) - return relative; - - if (@base.EndsWith('/')) - @base = @base[..^1]; - if (relative.StartsWith('/')) - relative = relative[1..]; - return @base + '/' + relative; - } - - public string[] Get(HtmlDocument document) { - if (Content is null) - return []; - var node = Content.Select(document); - if (node is null) - return []; - List links = []; - foreach (var child in node.ChildNodes.Where(x => x.Name == "option")) { - var childValue = child.GetAttributeValue("value", null); - if (!Uri.TryCreate(GetAbsolute(RelativeTo, childValue), UriKind.Absolute, out _)) - continue; - links.Add((GetAbsolute(RelativeTo, childValue))); - } - - return links.ToArray(); - } - - - - string IDataProvider.Get(HtmlDocument document) { - return JsonSerializer.Serialize(this.Get(document)); - } - } -} diff --git a/Beam.Dynamic/ListContentDataProvider.cs b/Beam.Dynamic/ListContentDataProvider.cs deleted file mode 100644 index 997aadd..0000000 --- a/Beam.Dynamic/ListContentDataProvider.cs +++ /dev/null @@ -1,28 +0,0 @@ -using HtmlAgilityPack; -using System.Text; -using Beam.Abstractions; - -namespace Beam.Dynamic { - public class ListContentDataProvider : IDataProvider { - public IBinding? Content { get; set; } - - public string Get(HtmlDocument document) { - if (Content is null) - return ""; - - var node = Content.Select(document); - if (node is null) - return ""; - - StringBuilder content = new(); - foreach(var childNode in node.ChildNodes.SkipLast(1)) { - if (childNode.Name != "li") - continue; - content.Append(childNode.InnerText.Trim() + ";"); - } - - content.Append(node.ChildNodes.Last().InnerText.Trim()); - return content.ToString(); - } - } -} diff --git a/Beam.Dynamic/ParagraphedContentDataProvider.cs b/Beam.Dynamic/ParagraphedContentDataProvider.cs deleted file mode 100644 index ef81fab..0000000 --- a/Beam.Dynamic/ParagraphedContentDataProvider.cs +++ /dev/null @@ -1,31 +0,0 @@ -using HtmlAgilityPack; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Beam.Abstractions; - -namespace Beam.Dynamic { - public class ParagraphedContentDataProvider : IDataProvider { - public IBinding? Content { get; set; } - - public string Get(HtmlDocument document) { - if (Content is null) - return ""; - - var node = Content.Select(document); - if (node is null) - return ""; - - StringBuilder content = new(); - foreach(var childNode in node.ChildNodes) { - if (childNode.Name != "p") - continue; - content.AppendLine(childNode.InnerText); - } - - return content.ToString(); - } - } -} diff --git a/Beam.Fluent/Core/IContextStage.cs b/Beam.Fluent/Core/IContextStage.cs index c10728f..5ad4486 100644 --- a/Beam.Fluent/Core/IContextStage.cs +++ b/Beam.Fluent/Core/IContextStage.cs @@ -14,5 +14,7 @@ public interface IContextStage { IContextStage UseFragments(); IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator); IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config); + IContextStage ConfigureUnitDownloaderOptions( + Action> configure); DownloadEnumerable Build(); } \ No newline at end of file diff --git a/Beam.Stealth/StealthConfig.cs b/Beam.Stealth/StealthConfig.cs index feefeb7..8aa20c9 100644 --- a/Beam.Stealth/StealthConfig.cs +++ b/Beam.Stealth/StealthConfig.cs @@ -8,10 +8,10 @@ using OpenQA.Selenium.Edge; namespace Beam.Stealth { public enum Browser { - Firefox, - Chrome, - Chromium, - Edge + Firefox = 0, + Chrome = 1, + Chromium = 2, + Edge = 3 } public sealed class StealthConfig : IDisposable { @@ -73,23 +73,42 @@ namespace Beam.Stealth { bool showBrowser = false, string? downloadDir = null, TimeSpan? timeOut = null, - Browser browser = Browser.Firefox, + Browser preferredBrowser = Browser.Firefox, ILogger? logger = null) { // pick or create a dedicated download folder downloadDir ??= Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); Directory.CreateDirectory(downloadDir); bool headless = !showBrowser; + IWebDriver? driver = null; + List? errors = []; - IWebDriver driver = browser switch { - Browser.Chrome or Browser.Chromium - => new ChromeDriver(GetChromeOptions(downloadDir, headless)), - Browser.Edge - => new EdgeDriver(GetEdgeOptions(downloadDir, headless)), - Browser.Firefox or _ - => new FirefoxDriver(GetFirefoxOptions(downloadDir, headless)), - }; + while (preferredBrowser <= Browser.Edge) { + try { + driver = preferredBrowser switch { + Browser.Chrome or Browser.Chromium + => new ChromeDriver(GetChromeOptions(downloadDir, headless)), + Browser.Edge + => new EdgeDriver(GetEdgeOptions(downloadDir, headless)), + Browser.Firefox or _ + => new FirefoxDriver(GetFirefoxOptions(downloadDir, headless)), + }; + } + catch (Exception e) { + if (preferredBrowser != Browser.Edge) + logger?.LogWarning(e, "Failed to create driver for browser {Browser}, falling back to {Fallback}", preferredBrowser, Enum.GetName(preferredBrowser + 1)); + else { + logger?.LogCritical(e, "Failed to create driver for browser {Browser}, no more fallback remaining!", preferredBrowser); + } + + preferredBrowser++; + errors.Add(e); + } + } + if (driver is null) + throw new AggregateException(errors); + return new StealthConfig(downloadDir) { ShowBrowser = showBrowser, TimeOut = timeOut ?? Timeout.InfiniteTimeSpan, diff --git a/aeqw89.Beam/aeqw89.Beam.csproj b/aeqw89.Beam/aeqw89.Beam.csproj index b36423c..933028d 100644 --- a/aeqw89.Beam/aeqw89.Beam.csproj +++ b/aeqw89.Beam/aeqw89.Beam.csproj @@ -7,20 +7,29 @@ Beam aeqw89 qwsdcvghyu - 2.2.0 + 2.4.5 A library for downloading internet resources https://github.com/qwsdcvghyu89/Beam https://github.com/qwsdcvghyu89/Beam aeqw89.Beam - 2.2.0 + 2.4.5 + + all + + + all + all all + + all + all @@ -33,6 +42,9 @@ all + + true + true @@ -45,10 +57,13 @@ true - + true - + + true + + true diff --git a/aeqw89.Beam/aeqw89.Beam.csproj.bak b/aeqw89.Beam/aeqw89.Beam.csproj.bak index 20f0e68..6244b80 100644 --- a/aeqw89.Beam/aeqw89.Beam.csproj.bak +++ b/aeqw89.Beam/aeqw89.Beam.csproj.bak @@ -7,20 +7,29 @@ Beam aeqw89 qwsdcvghyu - 2.1.6 + 2.4.4 A library for downloading internet resources https://github.com/qwsdcvghyu89/Beam https://github.com/qwsdcvghyu89/Beam aeqw89.Beam - 2.1.6 + 2.4.4 + + all + + + all + all all + + all + all @@ -33,6 +42,9 @@ all + + true + true @@ -45,16 +57,19 @@ true - + true - + + true + + true true - + true