2965270928
- ApiResponse: add readToBuffer option to defer/stream body instead of eagerly buffering - TableDataProvider: implement HTML table parser with per-column provider support - StealthConfig: add 10s page load timeout and copyCookiesFrom parameter for cookie sharing - StealthUnitDownloader: catch WebDriverTimeoutException on navigation, log warning instead of throwing - Bump version to 2.9.0
155 lines
4.5 KiB
C#
155 lines
4.5 KiB
C#
|
|
using System.Text.Json;
|
|
using Beam.Abstractions;
|
|
using HtmlAgilityPack;
|
|
|
|
namespace Beam.Dynamic;
|
|
|
|
public class TableDataProvider
|
|
: IComposableDataProvider<string>,
|
|
IComposableDataProvider<string[][]>
|
|
{
|
|
public IBinding? Content { get; set; }
|
|
|
|
/// <summary>
|
|
/// One provider per column. Each provider is executed per row.
|
|
/// Missing columns are filled with defaults that return the td/th text at that column index.
|
|
/// </summary>
|
|
public IDataProvider<string>[]? ColumnProviders { get; set; }
|
|
|
|
public string[][] Get(HtmlDocument document)
|
|
{
|
|
if (Content is null)
|
|
return [];
|
|
|
|
var node = Select(document);
|
|
if (node is null)
|
|
return [];
|
|
|
|
return Get(node);
|
|
}
|
|
|
|
string IDataProvider<string>.Get(HtmlDocument document)
|
|
{
|
|
var node = Select(document);
|
|
return node is null ? "" : (this as IComposableDataProvider<string>).Get(node);
|
|
}
|
|
|
|
public string[][] Get(HtmlNode node)
|
|
{
|
|
var rows = node.Descendants("tr").ToList();
|
|
if (rows.Count == 0)
|
|
return [];
|
|
|
|
// Determine how many columns we should output:
|
|
// max of provided providers length and max cell count across rows.
|
|
var maxCellsInAnyRow = rows
|
|
.Select(r => r.ChildNodes.Count(n => n.Name == "td" || n.Name == "th"))
|
|
.DefaultIfEmpty(0)
|
|
.Max();
|
|
|
|
var providedCount = ColumnProviders?.Length ?? 0;
|
|
var columnCount = Math.Max(providedCount, maxCellsInAnyRow);
|
|
|
|
if (columnCount == 0)
|
|
return [];
|
|
|
|
var effectiveProviders = BuildEffectiveProviders(columnCount);
|
|
|
|
var result = new string[rows.Count][];
|
|
for (int r = 0; r < rows.Count; r++)
|
|
{
|
|
var rowNode = rows[r];
|
|
var rowOut = new string[columnCount];
|
|
|
|
for (int c = 0; c < columnCount; c++)
|
|
{
|
|
var provider = effectiveProviders[c];
|
|
|
|
if (provider is IComposableDataProvider<string> composable)
|
|
{
|
|
// Execute with row context.
|
|
rowOut[c] = composable.Get(rowNode);
|
|
}
|
|
else
|
|
{
|
|
// Fallback to document context.
|
|
rowOut[c] = provider.Get(rowNode.OwnerDocument);
|
|
}
|
|
|
|
rowOut[c] ??= "";
|
|
}
|
|
|
|
result[r] = rowOut;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
string IComposableDataProvider<string>.Get(HtmlNode node)
|
|
{
|
|
return JsonSerializer.Serialize(Get(node));
|
|
}
|
|
|
|
public HtmlNode? Select(HtmlDocument doc) => Content?.Select(doc);
|
|
|
|
HtmlNode? IComposableDataProvider<string[][]>.Select(HtmlNode node) => node;
|
|
HtmlNode? IComposableDataProvider<string>.Select(HtmlNode node) => node;
|
|
|
|
private IDataProvider<string>[] BuildEffectiveProviders(int columnCount)
|
|
{
|
|
var effective = new IDataProvider<string>[columnCount];
|
|
|
|
if (ColumnProviders is null || ColumnProviders.Length == 0)
|
|
{
|
|
for (int i = 0; i < columnCount; i++)
|
|
effective[i] = new ColumnCellContentsProvider(i);
|
|
return effective;
|
|
}
|
|
|
|
var maxCopy = Math.Min(ColumnProviders.Length, columnCount);
|
|
for (int i = 0; i < maxCopy; i++)
|
|
effective[i] = ColumnProviders[i] ?? new ColumnCellContentsProvider(i);
|
|
|
|
for (int i = maxCopy; i < columnCount; i++)
|
|
effective[i] = new ColumnCellContentsProvider(i);
|
|
|
|
return effective;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Default column provider: for a given row, returns text of td/th at ColumnIndex.
|
|
/// </summary>
|
|
private sealed class ColumnCellContentsProvider : IComposableDataProvider<string>
|
|
{
|
|
public int ColumnIndex { get; }
|
|
|
|
public ColumnCellContentsProvider(int columnIndex)
|
|
{
|
|
ColumnIndex = columnIndex;
|
|
}
|
|
|
|
public string Get(HtmlDocument document)
|
|
{
|
|
var node = Select(document);
|
|
return node is null ? "" : Get(node);
|
|
}
|
|
|
|
public string Get(HtmlNode rowNode)
|
|
{
|
|
var cells = rowNode
|
|
.ChildNodes
|
|
.Where(n => n.Name == "td" || n.Name == "th")
|
|
.ToList();
|
|
|
|
if (ColumnIndex < 0 || ColumnIndex >= cells.Count)
|
|
return "";
|
|
|
|
return cells[ColumnIndex].InnerText;
|
|
}
|
|
|
|
public HtmlNode? Select(HtmlDocument doc) => doc.DocumentNode;
|
|
public HtmlNode? Select(HtmlNode node) => node;
|
|
}
|
|
}
|