feat: introduce new composable data providers and increment version

- Added `AnchorDataProvider`, `AnchorCollectionDataProvider`, `ContentsDataProvider`, `ContentsArrayDataProvider`, `DropDownDataProvider`, `ListContentDataProvider`, and `ParagraphedContentDataProvider` for enhanced data extraction flexibility.
- Updated project version to 2.5.0.
This commit is contained in:
qwsdcvghyu89
2025-11-15 20:51:18 +11:00
parent b5faf58b1a
commit 647b2b0f37
11 changed files with 279 additions and 4 deletions
@@ -0,0 +1,46 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class AnchorCollectionDataProvider : IComposableDataProvider<string[]> {
public IBinding? Content { get; set; }
public Uri? RelativeTo { get; set; }
public string[] Get(HtmlDocument document) {
var node = Select(document);
return node is null ? [] : Get(node);
}
public string[] Get(HtmlNode node) {
List<string> links = [];
foreach (var child in node.Descendants()) {
var href = child.GetAttributeValue("href", "");
if (Uri.TryCreate(RelativeTo, href, out var uri))
links.Add(uri.AbsoluteUri);
}
return links.Where(x => !string.IsNullOrWhiteSpace(x)).ToArray();
}
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
public HtmlNode? Select(HtmlNode node) {
return node;
}
public HtmlNode[]? SelectMany(HtmlDocument doc) {
throw new NotImplementedException();
}
public HtmlNode[]? SelectMany(HtmlNode[] node) {
throw new NotImplementedException();
}
}
}
@@ -0,0 +1,32 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class AnchorDataProvider : IComposableDataProvider<string> {
public IBinding? Content { get; set; }
public Uri? RelativeTo { get; set; }
public string Get(HtmlDocument document) {
var node = Select(document);
return node is null ? "" : Get(node);
}
public virtual string Get(HtmlNode node) {
if (Uri.TryCreate(RelativeTo, node.GetAttributeValue("href", ""), out var uri))
return uri.AbsoluteUri;
return "";
}
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
public HtmlNode? Select(HtmlNode node) {
return node;
}
}
}
@@ -0,0 +1,18 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic {
public class ContentsArrayDataProvider : ContentsDataProvider, IComposableDataProvider<string[]> {
public string[] ArrayDelimiters { get; set; } = [";"];
string[] IDataProvider<string[]>.Get(HtmlDocument document) {
var node = Select(document);
return node is null ? [] : Get(node);
}
public new string[] Get(HtmlNode node) {
return node.InnerText?.Split(ArrayDelimiters, StringSplitOptions.RemoveEmptyEntries) ?? [];
}
}
}
@@ -0,0 +1,30 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class ContentsDataProvider : IComposableDataProvider<string> {
public IBinding? Content { get; set; }
public string Get(HtmlDocument document) {
var node = Select(document);
return node is null ? "" : Get(node);
}
public string Get(HtmlNode node) {
return node.InnerText;
}
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
public HtmlNode? Select(HtmlNode node) {
return node;
}
}
}
@@ -0,0 +1,59 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Runtime.InteropServices.Marshalling;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic;
public class DropDownDataProvider : IComposableDataProvider<string>, IComposableDataProvider<string[]> {
public IBinding? Content { get; set; }
public Uri? RelativeTo { get; set; }
public string[] Get(HtmlDocument document) {
if (Content is null)
return [];
var node = Select(document);
if (node is null)
return [];
return Get(node);
}
string IDataProvider<string>.Get(HtmlDocument document) {
var node = Select(document);
return node is null ? "" : (this as IComposableDataProvider<string>).Get(node);
}
public string[] Get(HtmlNode node) {
List<string> links = [];
foreach (var child in node.ChildNodes.Where(x => x.Name == "option")) {
var childValue = child.GetAttributeValue("value", null);
if (!Uri.TryCreate(RelativeTo, childValue, out var uri))
continue;
links.Add(uri.AbsoluteUri);
}
return links.ToArray();
}
string IComposableDataProvider<string>.Get(HtmlNode node) {
return JsonSerializer.Serialize(Get(node));
}
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
HtmlNode? IComposableDataProvider<string[]>.Select(HtmlNode node) {
return node;
}
HtmlNode? IComposableDataProvider<string>.Select(HtmlNode node) {
return node;
}
}
@@ -0,0 +1,37 @@
using HtmlAgilityPack;
using System.Text;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class ListContentDataProvider : IComposableDataProvider<string> {
public IBinding? Content { get; set; }
public string Get(HtmlDocument document) {
if (Content is null)
return "";
var node = Select(document);
return node is null ? "" : Get(node);
}
public string Get(HtmlNode node) {
StringBuilder content = new();
foreach(var childNode in node.ChildNodes.SkipLast(1)) {
if (childNode.Name != "li")
continue;
content.Append(childNode.InnerText.Trim() + ";");
}
content.Append(node.ChildNodes.Last().InnerText.Trim());
return content.ToString();
}
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
public HtmlNode? Select(HtmlNode node) {
return node;
}
}
}
@@ -0,0 +1,39 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class ParagraphedContentDataProvider : IComposableDataProvider<string> {
public IBinding? Content { get; set; }
public string Get(HtmlDocument document) {
if (Content is null)
return "";
var node = Content.Select(document);
return node is null ? "" : Get(node);
}
public string Get(HtmlNode node) {
StringBuilder content = new();
foreach(var childNode in node.ChildNodes) {
if (childNode.Name != "p")
continue;
content.AppendLine(childNode.InnerText);
}
return content.ToString();
}
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
public HtmlNode? Select(HtmlNode node) {
return node;
}
}
}