Files
qwsdcvghyu89 2965270928 feat: add deferred response buffering, TableDataProvider, and stealth improvements
- ApiResponse: add readToBuffer option to defer/stream body instead of eagerly buffering
- TableDataProvider: implement HTML table parser with per-column provider support
- StealthConfig: add 10s page load timeout and copyCookiesFrom parameter for cookie sharing
- StealthUnitDownloader: catch WebDriverTimeoutException on navigation, log warning instead of throwing
- Bump version to 2.9.0
2026-04-03 11:51:19 +11:00

155 lines
4.5 KiB
C#

using System.Text.Json;
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public class TableDataProvider
: IComposableDataProvider<string>,
IComposableDataProvider<string[][]>
{
public IBinding? Content { get; set; }
/// <summary>
/// One provider per column. Each provider is executed per row.
/// Missing columns are filled with defaults that return the td/th text at that column index.
/// </summary>
public IDataProvider<string>[]? ColumnProviders { get; set; }
public string[][] Get(HtmlDocument document)
{
if (Content is null)
return [];
var node = Select(document);
if (node is null)
return [];
return Get(node);
}
string IDataProvider<string>.Get(HtmlDocument document)
{
var node = Select(document);
return node is null ? "" : (this as IComposableDataProvider<string>).Get(node);
}
public string[][] Get(HtmlNode node)
{
var rows = node.Descendants("tr").ToList();
if (rows.Count == 0)
return [];
// Determine how many columns we should output:
// max of provided providers length and max cell count across rows.
var maxCellsInAnyRow = rows
.Select(r => r.ChildNodes.Count(n => n.Name == "td" || n.Name == "th"))
.DefaultIfEmpty(0)
.Max();
var providedCount = ColumnProviders?.Length ?? 0;
var columnCount = Math.Max(providedCount, maxCellsInAnyRow);
if (columnCount == 0)
return [];
var effectiveProviders = BuildEffectiveProviders(columnCount);
var result = new string[rows.Count][];
for (int r = 0; r < rows.Count; r++)
{
var rowNode = rows[r];
var rowOut = new string[columnCount];
for (int c = 0; c < columnCount; c++)
{
var provider = effectiveProviders[c];
if (provider is IComposableDataProvider<string> composable)
{
// Execute with row context.
rowOut[c] = composable.Get(rowNode);
}
else
{
// Fallback to document context.
rowOut[c] = provider.Get(rowNode.OwnerDocument);
}
rowOut[c] ??= "";
}
result[r] = rowOut;
}
return result;
}
string IComposableDataProvider<string>.Get(HtmlNode node)
{
return JsonSerializer.Serialize(Get(node));
}
public HtmlNode? Select(HtmlDocument doc) => Content?.Select(doc);
HtmlNode? IComposableDataProvider<string[][]>.Select(HtmlNode node) => node;
HtmlNode? IComposableDataProvider<string>.Select(HtmlNode node) => node;
private IDataProvider<string>[] BuildEffectiveProviders(int columnCount)
{
var effective = new IDataProvider<string>[columnCount];
if (ColumnProviders is null || ColumnProviders.Length == 0)
{
for (int i = 0; i < columnCount; i++)
effective[i] = new ColumnCellContentsProvider(i);
return effective;
}
var maxCopy = Math.Min(ColumnProviders.Length, columnCount);
for (int i = 0; i < maxCopy; i++)
effective[i] = ColumnProviders[i] ?? new ColumnCellContentsProvider(i);
for (int i = maxCopy; i < columnCount; i++)
effective[i] = new ColumnCellContentsProvider(i);
return effective;
}
/// <summary>
/// Default column provider: for a given row, returns text of td/th at ColumnIndex.
/// </summary>
private sealed class ColumnCellContentsProvider : IComposableDataProvider<string>
{
public int ColumnIndex { get; }
public ColumnCellContentsProvider(int columnIndex)
{
ColumnIndex = columnIndex;
}
public string Get(HtmlDocument document)
{
var node = Select(document);
return node is null ? "" : Get(node);
}
public string Get(HtmlNode rowNode)
{
var cells = rowNode
.ChildNodes
.Where(n => n.Name == "td" || n.Name == "th")
.ToList();
if (ColumnIndex < 0 || ColumnIndex >= cells.Count)
return "";
return cells[ColumnIndex].InnerText;
}
public HtmlNode? Select(HtmlDocument doc) => doc.DocumentNode;
public HtmlNode? Select(HtmlNode node) => node;
}
}