diff --git a/.idea/.idea.Beam/.idea/workspace.xml b/.idea/.idea.Beam/.idea/workspace.xml
index e82588c..b703e10 100644
--- a/.idea/.idea.Beam/.idea/workspace.xml
+++ b/.idea/.idea.Beam/.idea/workspace.xml
@@ -8,6 +8,10 @@
+
+
+
+
@@ -44,11 +48,13 @@
+
+
@@ -121,6 +127,8 @@
+
+
diff --git a/Beam.Api/ApiCall.cs b/Beam.Api/ApiCall.cs
index 13608f5..d559725 100644
--- a/Beam.Api/ApiCall.cs
+++ b/Beam.Api/ApiCall.cs
@@ -22,7 +22,7 @@ namespace Beam.Api;
private string? ContentType = "application/json";
- public async Task GetResponse(ILogger? logger, (int @try, int max)? tries = null, CancellationToken ct = default) {
+ public async Task GetResponse(ILogger? logger, (int @try, int max)? tries = null, bool readToBuffer = true, CancellationToken ct = default) {
SanitizeHeaders();
var request = new HttpRequestMessage(Method, Uri);
@@ -40,10 +40,10 @@ namespace Beam.Api;
if (tries is not null && tries?.@try < tries?.max && !SuccessCodes.Contains(response.StatusCode)) {
await Task.Delay((int)Math.Min(Math.Pow(2, tries.Value.@try), 60) * 1000, ct);
- return await GetResponse(logger, (tries.Value.@try + 1, tries.Value.max), ct);
+ return await GetResponse(logger, (tries.Value.@try + 1, tries.Value.max), readToBuffer, ct);
}
- return await ApiResponse.CreateAsync(response, logger, RequestData, ct);
+ return await ApiResponse.CreateAsync(response, logger, RequestData, readToBuffer, ct);
}
private void SanitizeHeaders() {
diff --git a/Beam.Api/ApiCalls.cs b/Beam.Api/ApiCalls.cs
index d45f6ed..93b28fd 100644
--- a/Beam.Api/ApiCalls.cs
+++ b/Beam.Api/ApiCalls.cs
@@ -33,7 +33,7 @@ namespace Beam.Api;
// sequential
var sequential = new List(_calls.Count);
foreach (var call in _calls)
- sequential.Add(await call.GetResponse(logger, tries, ct));
+ sequential.Add(await call.GetResponse(logger, tries, true, ct));
return sequential;
}
@@ -43,7 +43,7 @@ namespace Beam.Api;
_calls.Select((c, i) => (call: c, idx: i)),
new ParallelOptions { MaxDegreeOfParallelism = _maxDegree, CancellationToken = ct },
async (item, token) => {
- var response = await item.call.GetResponse(logger, tries, token);
+ var response = await item.call.GetResponse(logger, tries, true, token);
bag.Add((item.idx, response));
});
diff --git a/Beam.Api/ApiResponse.cs b/Beam.Api/ApiResponse.cs
index 55a22d8..b0f7fb5 100644
--- a/Beam.Api/ApiResponse.cs
+++ b/Beam.Api/ApiResponse.cs
@@ -14,11 +14,13 @@ namespace Beam.Api;
/// Wrapper that lets the response body be read any number of times (even concurrently).
///
public sealed class ApiResponse {
- private readonly byte[] _buffer;
+ private byte[] _buffer;
+ private bool _read_has_been_deferred;
private ApiResponse(HttpResponseMessage response, byte[] buffer, ILogger? logger, object? requestData = null) {
Response = response;
_buffer = buffer;
+ _read_has_been_deferred = _buffer.Length == 0;
Logger = logger;
RequestData = requestData;
}
@@ -33,8 +35,10 @@ namespace Beam.Api;
HttpResponseMessage response,
ILogger? logger = null,
object? requestData = null,
+ bool readToBuffer = true,
CancellationToken ct = default) {
if (response is null) throw new ArgumentNullException(nameof(response));
+ if (!readToBuffer) return new ApiResponse(response, [], logger, requestData);
var buffer = response.Content is null
? []
@@ -55,32 +59,54 @@ namespace Beam.Api;
if (!Is200) errorHandler(Response.StatusCode);
return this;
}
-
/* ---------- content helpers ---------- */
- public Task AsSerializedObject(CancellationToken ct = default) {
+ private async Task ReadToBuffer(CancellationToken ct = default) {
+ if (!_read_has_been_deferred) return;
+ _buffer = Response.Content is null
+ ? []
+ : await Response.Content.ReadAsByteArrayAsync(ct).ConfigureAwait(false);
+ _read_has_been_deferred = false;
+ }
+
+ public async Task AsSerializedObject(CancellationToken ct = default) {
if (!Is200) throw new InvalidOperationException();
if (Response.Content?.Headers.ContentType?.MediaType != "application/json")
Logger?.LogWarning("Content-Type is not JSON, yet JSON deserialization was requested.");
- return Task.FromResult(JsonSerializer.Deserialize(_buffer));
+ if (_read_has_been_deferred) {
+ return await JsonSerializer.DeserializeAsync(await Response.Content!.ReadAsStreamAsync(ct), (JsonSerializerOptions?)null, ct);
+ } else {
+ return JsonSerializer.Deserialize(_buffer);
+ }
}
public Task AsDynamicObject(T _, CancellationToken ct = default)
=> AsSerializedObject(ct);
- public Task AsString(CancellationToken ct = default) {
+ public async Task AsString(CancellationToken ct = default) {
if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content.");
- return Task.FromResult(Encoding.UTF8.GetString(_buffer));
+ if (_read_has_been_deferred) {
+ await ReadToBuffer(ct);
+ }
+
+ return Encoding.UTF8.GetString(_buffer);
}
- public Task AsBinary(CancellationToken ct = default) {
+ public async Task AsBinary(CancellationToken ct = default) {
if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content.");
- return Task.FromResult(_buffer);
+ if (_read_has_been_deferred) {
+ await ReadToBuffer(ct);
+ }
+ return _buffer;
}
- public Task AsStream(CancellationToken ct = default) {
+ public async Task AsStream(CancellationToken ct = default) {
if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content.");
- return Task.FromResult(new MemoryStream(_buffer, writable: false));
+ if (_read_has_been_deferred) {
+ return await Response.Content!.ReadAsStreamAsync(ct);
+ } else {
+ return new MemoryStream(_buffer, writable: false);
+ }
}
}
diff --git a/Beam.Dynamic/DataProviders/TableDataProvider.cs b/Beam.Dynamic/DataProviders/TableDataProvider.cs
index c0693a6..6dfaf59 100644
--- a/Beam.Dynamic/DataProviders/TableDataProvider.cs
+++ b/Beam.Dynamic/DataProviders/TableDataProvider.cs
@@ -1,5 +1,154 @@
+
+using System.Text.Json;
+using Beam.Abstractions;
+using HtmlAgilityPack;
+
namespace Beam.Dynamic;
-public class TableDataProvider {
-
-}
\ No newline at end of file
+public class TableDataProvider
+ : IComposableDataProvider,
+ IComposableDataProvider
+{
+ public IBinding? Content { get; set; }
+
+ ///
+ /// One provider per column. Each provider is executed per row.
+ /// Missing columns are filled with defaults that return the td/th text at that column index.
+ ///
+ public IDataProvider[]? ColumnProviders { get; set; }
+
+ public string[][] Get(HtmlDocument document)
+ {
+ if (Content is null)
+ return [];
+
+ var node = Select(document);
+ if (node is null)
+ return [];
+
+ return Get(node);
+ }
+
+ string IDataProvider.Get(HtmlDocument document)
+ {
+ var node = Select(document);
+ return node is null ? "" : (this as IComposableDataProvider).Get(node);
+ }
+
+ public string[][] Get(HtmlNode node)
+ {
+ var rows = node.Descendants("tr").ToList();
+ if (rows.Count == 0)
+ return [];
+
+ // Determine how many columns we should output:
+ // max of provided providers length and max cell count across rows.
+ var maxCellsInAnyRow = rows
+ .Select(r => r.ChildNodes.Count(n => n.Name == "td" || n.Name == "th"))
+ .DefaultIfEmpty(0)
+ .Max();
+
+ var providedCount = ColumnProviders?.Length ?? 0;
+ var columnCount = Math.Max(providedCount, maxCellsInAnyRow);
+
+ if (columnCount == 0)
+ return [];
+
+ var effectiveProviders = BuildEffectiveProviders(columnCount);
+
+ var result = new string[rows.Count][];
+ for (int r = 0; r < rows.Count; r++)
+ {
+ var rowNode = rows[r];
+ var rowOut = new string[columnCount];
+
+ for (int c = 0; c < columnCount; c++)
+ {
+ var provider = effectiveProviders[c];
+
+ if (provider is IComposableDataProvider composable)
+ {
+ // Execute with row context.
+ rowOut[c] = composable.Get(rowNode);
+ }
+ else
+ {
+ // Fallback to document context.
+ rowOut[c] = provider.Get(rowNode.OwnerDocument);
+ }
+
+ rowOut[c] ??= "";
+ }
+
+ result[r] = rowOut;
+ }
+
+ return result;
+ }
+
+ string IComposableDataProvider.Get(HtmlNode node)
+ {
+ return JsonSerializer.Serialize(Get(node));
+ }
+
+ public HtmlNode? Select(HtmlDocument doc) => Content?.Select(doc);
+
+ HtmlNode? IComposableDataProvider.Select(HtmlNode node) => node;
+ HtmlNode? IComposableDataProvider.Select(HtmlNode node) => node;
+
+ private IDataProvider[] BuildEffectiveProviders(int columnCount)
+ {
+ var effective = new IDataProvider[columnCount];
+
+ if (ColumnProviders is null || ColumnProviders.Length == 0)
+ {
+ for (int i = 0; i < columnCount; i++)
+ effective[i] = new ColumnCellContentsProvider(i);
+ return effective;
+ }
+
+ var maxCopy = Math.Min(ColumnProviders.Length, columnCount);
+ for (int i = 0; i < maxCopy; i++)
+ effective[i] = ColumnProviders[i] ?? new ColumnCellContentsProvider(i);
+
+ for (int i = maxCopy; i < columnCount; i++)
+ effective[i] = new ColumnCellContentsProvider(i);
+
+ return effective;
+ }
+
+ ///
+ /// Default column provider: for a given row, returns text of td/th at ColumnIndex.
+ ///
+ private sealed class ColumnCellContentsProvider : IComposableDataProvider
+ {
+ public int ColumnIndex { get; }
+
+ public ColumnCellContentsProvider(int columnIndex)
+ {
+ ColumnIndex = columnIndex;
+ }
+
+ public string Get(HtmlDocument document)
+ {
+ var node = Select(document);
+ return node is null ? "" : Get(node);
+ }
+
+ public string Get(HtmlNode rowNode)
+ {
+ var cells = rowNode
+ .ChildNodes
+ .Where(n => n.Name == "td" || n.Name == "th")
+ .ToList();
+
+ if (ColumnIndex < 0 || ColumnIndex >= cells.Count)
+ return "";
+
+ return cells[ColumnIndex].InnerText;
+ }
+
+ public HtmlNode? Select(HtmlDocument doc) => doc.DocumentNode;
+ public HtmlNode? Select(HtmlNode node) => node;
+ }
+}
diff --git a/Beam.Stealth/StealthConfig.cs b/Beam.Stealth/StealthConfig.cs
index a53e119..006cf71 100644
--- a/Beam.Stealth/StealthConfig.cs
+++ b/Beam.Stealth/StealthConfig.cs
@@ -41,6 +41,8 @@ namespace Beam.Stealth {
o.SetPreference("pdfjs.disabled", true); // open PDFs externally
o.SetPreference("browser.download.manager.showWhenStarting", false);
+ o.PageLoadTimeout = TimeSpan.FromSeconds(10);
+
return o;
}
@@ -56,6 +58,8 @@ namespace Beam.Stealth {
// common stability flags
o.AddArgument("--no-sandbox");
o.AddArgument("--disable-dev-shm-usage");
+
+ o.PageLoadTimeout = TimeSpan.FromSeconds(10);
return o;
}
@@ -68,6 +72,8 @@ namespace Beam.Stealth {
o.AddUserProfilePreference("download.prompt_for_download", false);
o.AddUserProfilePreference("safebrowsing.enabled", false);
+ o.PageLoadTimeout = TimeSpan.FromSeconds(10);
+
return o;
}
@@ -80,7 +86,8 @@ namespace Beam.Stealth {
Browser preferredBrowser = Browser.Firefox,
string? remoteAddress = null,
Addon[]? utilityAddons = null,
- ILogger? logger = null) {
+ ILogger? logger = null,
+ IWebDriver? copyCookiesFrom = null) {
// pick or create a dedicated download folder
downloadDir ??= Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
Directory.CreateDirectory(downloadDir);
@@ -130,6 +137,11 @@ namespace Beam.Stealth {
if (driver is null)
throw new AggregateException(errors);
+
+ if (copyCookiesFrom != null) {
+ foreach (var cookie in copyCookiesFrom.Manage().Cookies.AllCookies)
+ driver.Manage().Cookies.AddCookie(new Cookie(cookie.Name, cookie.Value, cookie.Domain, cookie.Path, cookie.Expiry));
+ }
return new StealthConfig(downloadDir) {
ShowBrowser = showBrowser,
@@ -139,7 +151,7 @@ namespace Beam.Stealth {
Driver = driver
};
}
-
+
public void Dispose() {
Driver.Dispose();
}
diff --git a/Beam.Stealth/StealthUnitDownloader.cs b/Beam.Stealth/StealthUnitDownloader.cs
index b489c41..8792eb2 100644
--- a/Beam.Stealth/StealthUnitDownloader.cs
+++ b/Beam.Stealth/StealthUnitDownloader.cs
@@ -10,6 +10,8 @@ using Beam.Abstractions;
using Beam.Downloaders;
using Beam.Models;
using Beam.Stealth.Strategies;
+using OpenQA.Selenium;
+using OpenQA.Selenium.Firefox;
namespace Beam.Stealth {
using File = System.IO.File;
@@ -35,9 +37,15 @@ namespace Beam.Stealth {
protected override async Task DownloadToStream(string url, int bufferSize, Stream destinationStream,
IProgress progress, CancellationToken ct) {
var driver = Config.Driver;
- await driver.Navigate().GoToUrlAsync(url);
- await Manipulator(driver);
+ try {
+ await driver.Navigate().GoToUrlAsync(url);
+ }
+ catch (WebDriverTimeoutException) {
+ Logger?.LogWarning("Timeout navigating to {url}", url);
+ }
+ await Manipulator(driver);
+
await _downloadStrategy.DownloadToStream(url, bufferSize, destinationStream, progress, Config, Logger, ct);
}
diff --git a/aeqw89.Beam/aeqw89.Beam.csproj b/aeqw89.Beam/aeqw89.Beam.csproj
index 4ea2ed4..47c070c 100644
--- a/aeqw89.Beam/aeqw89.Beam.csproj
+++ b/aeqw89.Beam/aeqw89.Beam.csproj
@@ -7,12 +7,12 @@
Beam
aeqw89
qwsdcvghyu
- 2.7.0
+ 2.9.0
A library for downloading internet resources
https://github.com/qwsdcvghyu89/Beam
https://github.com/qwsdcvghyu89/Beam
aeqw89.Beam
- 2.7.0
+ 2.9.0
diff --git a/aeqw89.Beam/aeqw89.Beam.csproj.bak b/aeqw89.Beam/aeqw89.Beam.csproj.bak
index f3ff4e3..25703e6 100644
--- a/aeqw89.Beam/aeqw89.Beam.csproj.bak
+++ b/aeqw89.Beam/aeqw89.Beam.csproj.bak
@@ -7,12 +7,12 @@
Beam
aeqw89
qwsdcvghyu
- 2.6.3
+ 2.8.4
A library for downloading internet resources
https://github.com/qwsdcvghyu89/Beam
https://github.com/qwsdcvghyu89/Beam
aeqw89.Beam
- 2.6.3
+ 2.8.4