feat: add deferred response buffering, TableDataProvider, and stealth improvements
- ApiResponse: add readToBuffer option to defer/stream body instead of eagerly buffering - TableDataProvider: implement HTML table parser with per-column provider support - StealthConfig: add 10s page load timeout and copyCookiesFrom parameter for cookie sharing - StealthUnitDownloader: catch WebDriverTimeoutException on navigation, log warning instead of throwing - Bump version to 2.9.0
This commit is contained in:
@@ -1,5 +1,154 @@
|
||||
|
||||
using System.Text.Json;
|
||||
using Beam.Abstractions;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Dynamic;
|
||||
|
||||
public class TableDataProvider {
|
||||
|
||||
}
|
||||
public class TableDataProvider
|
||||
: IComposableDataProvider<string>,
|
||||
IComposableDataProvider<string[][]>
|
||||
{
|
||||
public IBinding? Content { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// One provider per column. Each provider is executed per row.
|
||||
/// Missing columns are filled with defaults that return the td/th text at that column index.
|
||||
/// </summary>
|
||||
public IDataProvider<string>[]? ColumnProviders { get; set; }
|
||||
|
||||
public string[][] Get(HtmlDocument document)
|
||||
{
|
||||
if (Content is null)
|
||||
return [];
|
||||
|
||||
var node = Select(document);
|
||||
if (node is null)
|
||||
return [];
|
||||
|
||||
return Get(node);
|
||||
}
|
||||
|
||||
string IDataProvider<string>.Get(HtmlDocument document)
|
||||
{
|
||||
var node = Select(document);
|
||||
return node is null ? "" : (this as IComposableDataProvider<string>).Get(node);
|
||||
}
|
||||
|
||||
public string[][] Get(HtmlNode node)
|
||||
{
|
||||
var rows = node.Descendants("tr").ToList();
|
||||
if (rows.Count == 0)
|
||||
return [];
|
||||
|
||||
// Determine how many columns we should output:
|
||||
// max of provided providers length and max cell count across rows.
|
||||
var maxCellsInAnyRow = rows
|
||||
.Select(r => r.ChildNodes.Count(n => n.Name == "td" || n.Name == "th"))
|
||||
.DefaultIfEmpty(0)
|
||||
.Max();
|
||||
|
||||
var providedCount = ColumnProviders?.Length ?? 0;
|
||||
var columnCount = Math.Max(providedCount, maxCellsInAnyRow);
|
||||
|
||||
if (columnCount == 0)
|
||||
return [];
|
||||
|
||||
var effectiveProviders = BuildEffectiveProviders(columnCount);
|
||||
|
||||
var result = new string[rows.Count][];
|
||||
for (int r = 0; r < rows.Count; r++)
|
||||
{
|
||||
var rowNode = rows[r];
|
||||
var rowOut = new string[columnCount];
|
||||
|
||||
for (int c = 0; c < columnCount; c++)
|
||||
{
|
||||
var provider = effectiveProviders[c];
|
||||
|
||||
if (provider is IComposableDataProvider<string> composable)
|
||||
{
|
||||
// Execute with row context.
|
||||
rowOut[c] = composable.Get(rowNode);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fallback to document context.
|
||||
rowOut[c] = provider.Get(rowNode.OwnerDocument);
|
||||
}
|
||||
|
||||
rowOut[c] ??= "";
|
||||
}
|
||||
|
||||
result[r] = rowOut;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
string IComposableDataProvider<string>.Get(HtmlNode node)
|
||||
{
|
||||
return JsonSerializer.Serialize(Get(node));
|
||||
}
|
||||
|
||||
public HtmlNode? Select(HtmlDocument doc) => Content?.Select(doc);
|
||||
|
||||
HtmlNode? IComposableDataProvider<string[][]>.Select(HtmlNode node) => node;
|
||||
HtmlNode? IComposableDataProvider<string>.Select(HtmlNode node) => node;
|
||||
|
||||
private IDataProvider<string>[] BuildEffectiveProviders(int columnCount)
|
||||
{
|
||||
var effective = new IDataProvider<string>[columnCount];
|
||||
|
||||
if (ColumnProviders is null || ColumnProviders.Length == 0)
|
||||
{
|
||||
for (int i = 0; i < columnCount; i++)
|
||||
effective[i] = new ColumnCellContentsProvider(i);
|
||||
return effective;
|
||||
}
|
||||
|
||||
var maxCopy = Math.Min(ColumnProviders.Length, columnCount);
|
||||
for (int i = 0; i < maxCopy; i++)
|
||||
effective[i] = ColumnProviders[i] ?? new ColumnCellContentsProvider(i);
|
||||
|
||||
for (int i = maxCopy; i < columnCount; i++)
|
||||
effective[i] = new ColumnCellContentsProvider(i);
|
||||
|
||||
return effective;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default column provider: for a given row, returns text of td/th at ColumnIndex.
|
||||
/// </summary>
|
||||
private sealed class ColumnCellContentsProvider : IComposableDataProvider<string>
|
||||
{
|
||||
public int ColumnIndex { get; }
|
||||
|
||||
public ColumnCellContentsProvider(int columnIndex)
|
||||
{
|
||||
ColumnIndex = columnIndex;
|
||||
}
|
||||
|
||||
public string Get(HtmlDocument document)
|
||||
{
|
||||
var node = Select(document);
|
||||
return node is null ? "" : Get(node);
|
||||
}
|
||||
|
||||
public string Get(HtmlNode rowNode)
|
||||
{
|
||||
var cells = rowNode
|
||||
.ChildNodes
|
||||
.Where(n => n.Name == "td" || n.Name == "th")
|
||||
.ToList();
|
||||
|
||||
if (ColumnIndex < 0 || ColumnIndex >= cells.Count)
|
||||
return "";
|
||||
|
||||
return cells[ColumnIndex].InnerText;
|
||||
}
|
||||
|
||||
public HtmlNode? Select(HtmlDocument doc) => doc.DocumentNode;
|
||||
public HtmlNode? Select(HtmlNode node) => node;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user