Add project files.
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Reference Include="aeqw89.DataKeys">
|
||||
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="aeqw89.PersistentData">
|
||||
<HintPath>..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll</HintPath>
|
||||
</Reference>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,68 @@
|
||||
|
||||
using aeqw89.DataKeys;
|
||||
using HtmlAgilityPack;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class Binding(DataKey<Binding> key) : IKeyed<Binding> {
|
||||
public Binding(string key) : this(new DataKey<Binding>(key)) { }
|
||||
public Binding() : this("") { }
|
||||
|
||||
[JsonRequired]
|
||||
public DataKey<Binding> Key { get; set; } = key;
|
||||
[JsonRequired]
|
||||
public BindingType Type { get; set; }
|
||||
|
||||
public string? ArrayDelimiters { get; set; }
|
||||
public string? XPath { get; set; }
|
||||
public string? CssPath { get; set; }
|
||||
private IDataProvider? Provider_;
|
||||
public IDataProvider? Provider {
|
||||
get => Provider_;
|
||||
set {
|
||||
if (value is null)
|
||||
return;
|
||||
if (value is not IDataProvider)
|
||||
throw new InvalidOperationException();
|
||||
var constructor = value.GetType().GetConstructor([]);
|
||||
if (!constructor?.IsPublic ?? true)
|
||||
throw new InvalidOperationException();
|
||||
Provider_ = value;
|
||||
}
|
||||
}
|
||||
|
||||
public HtmlNode? ResolveNode(HtmlDocument doc) {
|
||||
if (XPath is not null)
|
||||
return doc.DocumentNode.SelectSingleNode(XPath);
|
||||
if (CssPath is not null)
|
||||
return doc.DocumentNode.ThenByClasses(CssPath.Split('/'));
|
||||
if (Provider is not null)
|
||||
return Provider.GetNode(doc);
|
||||
return null;
|
||||
}
|
||||
|
||||
public string ResolveString(HtmlDocument doc) {
|
||||
if (XPath is not null)
|
||||
return doc.DocumentNode.SelectSingleNode(XPath)?.InnerText ?? "";
|
||||
if (CssPath is not null)
|
||||
return doc.DocumentNode.ThenByClasses(CssPath.Split('/'))?.InnerText ?? "";
|
||||
if (Provider is not null)
|
||||
return Provider.Get(doc);
|
||||
return "";
|
||||
}
|
||||
|
||||
public string[] ResolveArray(HtmlDocument doc) {
|
||||
if (Type is not BindingType.Array)
|
||||
return [];
|
||||
var str = ResolveString(doc);
|
||||
return str.Split(ArrayDelimiters);
|
||||
}
|
||||
|
||||
public dynamic? Resolve(HtmlDocument doc) => Type switch {
|
||||
BindingType.Single => ResolveString(doc),
|
||||
BindingType.Array => ResolveArray(doc),
|
||||
BindingType.UseProvider => Provider?.Get(doc),
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace Beam.Dynamic {
|
||||
public enum BindingType {
|
||||
Single,
|
||||
Array,
|
||||
UseProvider
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class DataBindings {
|
||||
public Binding? Title { get; set; }
|
||||
public Binding? Authors { get; set; }
|
||||
public Binding? Description { get; set; }
|
||||
public Binding? Content { get; set; }
|
||||
public Binding? Language { get; set; }
|
||||
public Binding? Tags { get; set; }
|
||||
|
||||
public ResolvedBindings Resolve(HtmlDocument doc) {
|
||||
return new ResolvedBindings() {
|
||||
Title = Title?.Resolve(doc),
|
||||
Authors = Authors?.Resolve(doc) ?? Array.Empty<string>(),
|
||||
Language = Language?.Resolve(doc) ?? Array.Empty<string>(),
|
||||
Content = Content?.Resolve(doc),
|
||||
Description = Description?.Resolve(doc),
|
||||
Tags = Tags?.Resolve(doc) ?? Array.Empty<string>()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public class ResolvedBindings {
|
||||
public string? Title { get; set; }
|
||||
public string[]? Authors { get; set; }
|
||||
public string? Description { get; set; }
|
||||
public string? Content { get; set; }
|
||||
public string[]? Language { get; set; }
|
||||
public string[]? Tags { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public static partial class HtmlNodeExtensions {
|
||||
public static HtmlNode? ThenByClasses(this HtmlNode node, params string[] classes) {
|
||||
return node.DescendCollectionTree(ThenByClass, classes);
|
||||
}
|
||||
|
||||
public static HtmlNode? ThenByClass(this HtmlNode node, string @class, int count = 1) {
|
||||
return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.HasClass(@class)), count);
|
||||
}
|
||||
|
||||
public static HtmlNode? ThenByName(this HtmlNode node, string name, int count = 1) {
|
||||
return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.Name == name), count);
|
||||
}
|
||||
|
||||
public static HtmlNode? ThenByNames(this HtmlNode node, params string[] name) {
|
||||
return node.DescendCollectionTree(ThenByName, name);
|
||||
}
|
||||
|
||||
public static HtmlNode? DescendCollectionTree<T>(this HtmlNode node, Func<HtmlNode, T, int, HtmlNode?> func, params T[] values) {
|
||||
return node.DescendCollectionTree((a, b) => func(a, b, 1), values);
|
||||
}
|
||||
|
||||
public static HtmlNode? DescendCollectionTree<T>(this HtmlNode node, Func<HtmlNode, T, HtmlNode?> func, params T[] values) {
|
||||
HtmlNode? result = node;
|
||||
foreach (var value in values) {
|
||||
if (result is null)
|
||||
return result;
|
||||
result = func(result, value);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static HtmlNode? ThenByFunc(this HtmlNode node, Func<HtmlNode, HtmlNode?> func, int count = 1) {
|
||||
var ret = func(node);
|
||||
if (count <= 1)
|
||||
return ret;
|
||||
return ret?.ThenByFunc(func, count - 1);
|
||||
}
|
||||
|
||||
public static string[]? SplitInnerText(this HtmlNode? node, string separators)
|
||||
=> node?.InnerText.Split(separators, StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
public static bool TextNodesOnly(HtmlNode n) => TextNodesOnlyRegex().Match(n.Name).Success;
|
||||
|
||||
public static string InnerLineSeparatedText(this HtmlNode? node, Func<HtmlNode, bool>? filter = null) {
|
||||
if (node?.ChildNodes is null || node?.ChildNodes.Count == 0)
|
||||
return "";
|
||||
return node?.ChildNodes
|
||||
.Where(filter ?? ((x) => true))
|
||||
.DefaultIfEmpty()
|
||||
.Select((x) => x?.InnerText)
|
||||
.Where((x) => !string.IsNullOrWhiteSpace(x))
|
||||
.DefaultIfEmpty()
|
||||
.Aggregate((x, y) => $"{x}\n{y}")
|
||||
?? "";
|
||||
}
|
||||
|
||||
[GeneratedRegex("p|h\\d")]
|
||||
private static partial Regex TextNodesOnlyRegex();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
[System.Text.Json.Serialization.JsonDerivedType(typeof(ParagraphedContentDataProvider), 20)]
|
||||
[System.Text.Json.Serialization.JsonDerivedType(typeof(ListContentDataProvider), 21)]
|
||||
public interface IDataProvider {
|
||||
public string Get(HtmlDocument document);
|
||||
public HtmlNode? GetNode(HtmlDocument document);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
using HtmlAgilityPack;
|
||||
using System.Text;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class ListContentDataProvider : IDataProvider {
|
||||
public Binding? Content { get; set; }
|
||||
|
||||
public string Get(HtmlDocument document) {
|
||||
if (Content is null)
|
||||
return "";
|
||||
|
||||
var node = Content.ResolveNode(document);
|
||||
if (node is null)
|
||||
return "";
|
||||
|
||||
StringBuilder content = new();
|
||||
foreach(var childNode in node.ChildNodes.SkipLast(1)) {
|
||||
if (childNode.Name != "li")
|
||||
continue;
|
||||
content.Append(childNode.InnerText.Trim() + ";");
|
||||
}
|
||||
|
||||
content.Append(node.ChildNodes.Last().InnerText.Trim());
|
||||
return content.ToString();
|
||||
}
|
||||
|
||||
public HtmlNode? GetNode(HtmlDocument document) {
|
||||
return Content?.ResolveNode(document);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using System.Web;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public static partial class OnlineCleaner {
|
||||
[GeneratedRegex("&#x?[\\d\\w]{1,4};")]
|
||||
public static partial Regex MochaBlendUnicodeEscapeSequence();
|
||||
|
||||
private static string UnicodeEscapeSequences(string text) {
|
||||
return MochaBlendUnicodeEscapeSequence().Replace(text, (x) => {
|
||||
int numOfDigits = x.Value.Length - 3;
|
||||
int sequence = 0;
|
||||
if (x.Value[2] == 'x')
|
||||
sequence = int.Parse(x.Value[3..(3 + (numOfDigits - 1))], System.Globalization.NumberStyles.HexNumber);
|
||||
else
|
||||
sequence = int.Parse(x.Value[2..(2 + numOfDigits)]);
|
||||
var uni = Encoding.Unicode.GetString(BitConverter.GetBytes(sequence));
|
||||
return uni.Length == 1 ? uni : uni[0].ToString();
|
||||
});
|
||||
}
|
||||
|
||||
public static string Clean(string? onlineText) {
|
||||
if (string.IsNullOrWhiteSpace(onlineText))
|
||||
return "";
|
||||
var decoded = HttpUtility.HtmlDecode(onlineText);
|
||||
var escaped = UnicodeEscapeSequences(onlineText);
|
||||
return escaped;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class ParagraphedContentDataProvider : IDataProvider {
|
||||
public Binding? Content { get; set; }
|
||||
|
||||
public string Get(HtmlDocument document) {
|
||||
if (Content is null)
|
||||
return "";
|
||||
|
||||
var node = Content.ResolveNode(document);
|
||||
if (node is null)
|
||||
return "";
|
||||
|
||||
StringBuilder content = new();
|
||||
foreach(var childNode in node.ChildNodes) {
|
||||
if (childNode.Name != "p")
|
||||
continue;
|
||||
content.AppendLine(childNode.InnerText);
|
||||
}
|
||||
|
||||
return content.ToString();
|
||||
}
|
||||
|
||||
public HtmlNode? GetNode(HtmlDocument document) {
|
||||
return Content?.ResolveNode(document);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user