Add project files.
This commit is contained in:
@@ -0,0 +1,22 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Beam\Beam.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<Reference Include="aeqw89.DataKeys">
|
||||||
|
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
|
<Reference Include="aeqw89.PersistentData">
|
||||||
|
<HintPath>..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
@@ -0,0 +1,68 @@
|
|||||||
|
|
||||||
|
using aeqw89.DataKeys;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using System.Text.Json.Serialization;
|
||||||
|
|
||||||
|
namespace Beam.Dynamic {
|
||||||
|
public class Binding(DataKey<Binding> key) : IKeyed<Binding> {
|
||||||
|
public Binding(string key) : this(new DataKey<Binding>(key)) { }
|
||||||
|
public Binding() : this("") { }
|
||||||
|
|
||||||
|
[JsonRequired]
|
||||||
|
public DataKey<Binding> Key { get; set; } = key;
|
||||||
|
[JsonRequired]
|
||||||
|
public BindingType Type { get; set; }
|
||||||
|
|
||||||
|
public string? ArrayDelimiters { get; set; }
|
||||||
|
public string? XPath { get; set; }
|
||||||
|
public string? CssPath { get; set; }
|
||||||
|
private IDataProvider? Provider_;
|
||||||
|
public IDataProvider? Provider {
|
||||||
|
get => Provider_;
|
||||||
|
set {
|
||||||
|
if (value is null)
|
||||||
|
return;
|
||||||
|
if (value is not IDataProvider)
|
||||||
|
throw new InvalidOperationException();
|
||||||
|
var constructor = value.GetType().GetConstructor([]);
|
||||||
|
if (!constructor?.IsPublic ?? true)
|
||||||
|
throw new InvalidOperationException();
|
||||||
|
Provider_ = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public HtmlNode? ResolveNode(HtmlDocument doc) {
|
||||||
|
if (XPath is not null)
|
||||||
|
return doc.DocumentNode.SelectSingleNode(XPath);
|
||||||
|
if (CssPath is not null)
|
||||||
|
return doc.DocumentNode.ThenByClasses(CssPath.Split('/'));
|
||||||
|
if (Provider is not null)
|
||||||
|
return Provider.GetNode(doc);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string ResolveString(HtmlDocument doc) {
|
||||||
|
if (XPath is not null)
|
||||||
|
return doc.DocumentNode.SelectSingleNode(XPath)?.InnerText ?? "";
|
||||||
|
if (CssPath is not null)
|
||||||
|
return doc.DocumentNode.ThenByClasses(CssPath.Split('/'))?.InnerText ?? "";
|
||||||
|
if (Provider is not null)
|
||||||
|
return Provider.Get(doc);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public string[] ResolveArray(HtmlDocument doc) {
|
||||||
|
if (Type is not BindingType.Array)
|
||||||
|
return [];
|
||||||
|
var str = ResolveString(doc);
|
||||||
|
return str.Split(ArrayDelimiters);
|
||||||
|
}
|
||||||
|
|
||||||
|
public dynamic? Resolve(HtmlDocument doc) => Type switch {
|
||||||
|
BindingType.Single => ResolveString(doc),
|
||||||
|
BindingType.Array => ResolveArray(doc),
|
||||||
|
BindingType.UseProvider => Provider?.Get(doc),
|
||||||
|
_ => null
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
namespace Beam.Dynamic {
|
||||||
|
public enum BindingType {
|
||||||
|
Single,
|
||||||
|
Array,
|
||||||
|
UseProvider
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
|
||||||
|
namespace Beam.Dynamic {
|
||||||
|
public class DataBindings {
|
||||||
|
public Binding? Title { get; set; }
|
||||||
|
public Binding? Authors { get; set; }
|
||||||
|
public Binding? Description { get; set; }
|
||||||
|
public Binding? Content { get; set; }
|
||||||
|
public Binding? Language { get; set; }
|
||||||
|
public Binding? Tags { get; set; }
|
||||||
|
|
||||||
|
public ResolvedBindings Resolve(HtmlDocument doc) {
|
||||||
|
return new ResolvedBindings() {
|
||||||
|
Title = Title?.Resolve(doc),
|
||||||
|
Authors = Authors?.Resolve(doc) ?? Array.Empty<string>(),
|
||||||
|
Language = Language?.Resolve(doc) ?? Array.Empty<string>(),
|
||||||
|
Content = Content?.Resolve(doc),
|
||||||
|
Description = Description?.Resolve(doc),
|
||||||
|
Tags = Tags?.Resolve(doc) ?? Array.Empty<string>()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class ResolvedBindings {
|
||||||
|
public string? Title { get; set; }
|
||||||
|
public string[]? Authors { get; set; }
|
||||||
|
public string? Description { get; set; }
|
||||||
|
public string? Content { get; set; }
|
||||||
|
public string[]? Language { get; set; }
|
||||||
|
public string[]? Tags { get; set; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Dynamic {
|
||||||
|
public static partial class HtmlNodeExtensions {
|
||||||
|
public static HtmlNode? ThenByClasses(this HtmlNode node, params string[] classes) {
|
||||||
|
return node.DescendCollectionTree(ThenByClass, classes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HtmlNode? ThenByClass(this HtmlNode node, string @class, int count = 1) {
|
||||||
|
return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.HasClass(@class)), count);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HtmlNode? ThenByName(this HtmlNode node, string name, int count = 1) {
|
||||||
|
return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.Name == name), count);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HtmlNode? ThenByNames(this HtmlNode node, params string[] name) {
|
||||||
|
return node.DescendCollectionTree(ThenByName, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HtmlNode? DescendCollectionTree<T>(this HtmlNode node, Func<HtmlNode, T, int, HtmlNode?> func, params T[] values) {
|
||||||
|
return node.DescendCollectionTree((a, b) => func(a, b, 1), values);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HtmlNode? DescendCollectionTree<T>(this HtmlNode node, Func<HtmlNode, T, HtmlNode?> func, params T[] values) {
|
||||||
|
HtmlNode? result = node;
|
||||||
|
foreach (var value in values) {
|
||||||
|
if (result is null)
|
||||||
|
return result;
|
||||||
|
result = func(result, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HtmlNode? ThenByFunc(this HtmlNode node, Func<HtmlNode, HtmlNode?> func, int count = 1) {
|
||||||
|
var ret = func(node);
|
||||||
|
if (count <= 1)
|
||||||
|
return ret;
|
||||||
|
return ret?.ThenByFunc(func, count - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string[]? SplitInnerText(this HtmlNode? node, string separators)
|
||||||
|
=> node?.InnerText.Split(separators, StringSplitOptions.RemoveEmptyEntries);
|
||||||
|
|
||||||
|
public static bool TextNodesOnly(HtmlNode n) => TextNodesOnlyRegex().Match(n.Name).Success;
|
||||||
|
|
||||||
|
public static string InnerLineSeparatedText(this HtmlNode? node, Func<HtmlNode, bool>? filter = null) {
|
||||||
|
if (node?.ChildNodes is null || node?.ChildNodes.Count == 0)
|
||||||
|
return "";
|
||||||
|
return node?.ChildNodes
|
||||||
|
.Where(filter ?? ((x) => true))
|
||||||
|
.DefaultIfEmpty()
|
||||||
|
.Select((x) => x?.InnerText)
|
||||||
|
.Where((x) => !string.IsNullOrWhiteSpace(x))
|
||||||
|
.DefaultIfEmpty()
|
||||||
|
.Aggregate((x, y) => $"{x}\n{y}")
|
||||||
|
?? "";
|
||||||
|
}
|
||||||
|
|
||||||
|
[GeneratedRegex("p|h\\d")]
|
||||||
|
private static partial Regex TextNodesOnlyRegex();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
|
||||||
|
namespace Beam.Dynamic {
|
||||||
|
[System.Text.Json.Serialization.JsonDerivedType(typeof(ParagraphedContentDataProvider), 20)]
|
||||||
|
[System.Text.Json.Serialization.JsonDerivedType(typeof(ListContentDataProvider), 21)]
|
||||||
|
public interface IDataProvider {
|
||||||
|
public string Get(HtmlDocument document);
|
||||||
|
public HtmlNode? GetNode(HtmlDocument document);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace Beam.Dynamic {
|
||||||
|
public class ListContentDataProvider : IDataProvider {
|
||||||
|
public Binding? Content { get; set; }
|
||||||
|
|
||||||
|
public string Get(HtmlDocument document) {
|
||||||
|
if (Content is null)
|
||||||
|
return "";
|
||||||
|
|
||||||
|
var node = Content.ResolveNode(document);
|
||||||
|
if (node is null)
|
||||||
|
return "";
|
||||||
|
|
||||||
|
StringBuilder content = new();
|
||||||
|
foreach(var childNode in node.ChildNodes.SkipLast(1)) {
|
||||||
|
if (childNode.Name != "li")
|
||||||
|
continue;
|
||||||
|
content.Append(childNode.InnerText.Trim() + ";");
|
||||||
|
}
|
||||||
|
|
||||||
|
content.Append(node.ChildNodes.Last().InnerText.Trim());
|
||||||
|
return content.ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public HtmlNode? GetNode(HtmlDocument document) {
|
||||||
|
return Content?.ResolveNode(document);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using System.Web;
|
||||||
|
|
||||||
|
namespace Beam.Dynamic {
|
||||||
|
public static partial class OnlineCleaner {
|
||||||
|
[GeneratedRegex("&#x?[\\d\\w]{1,4};")]
|
||||||
|
public static partial Regex MochaBlendUnicodeEscapeSequence();
|
||||||
|
|
||||||
|
private static string UnicodeEscapeSequences(string text) {
|
||||||
|
return MochaBlendUnicodeEscapeSequence().Replace(text, (x) => {
|
||||||
|
int numOfDigits = x.Value.Length - 3;
|
||||||
|
int sequence = 0;
|
||||||
|
if (x.Value[2] == 'x')
|
||||||
|
sequence = int.Parse(x.Value[3..(3 + (numOfDigits - 1))], System.Globalization.NumberStyles.HexNumber);
|
||||||
|
else
|
||||||
|
sequence = int.Parse(x.Value[2..(2 + numOfDigits)]);
|
||||||
|
var uni = Encoding.Unicode.GetString(BitConverter.GetBytes(sequence));
|
||||||
|
return uni.Length == 1 ? uni : uni[0].ToString();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string Clean(string? onlineText) {
|
||||||
|
if (string.IsNullOrWhiteSpace(onlineText))
|
||||||
|
return "";
|
||||||
|
var decoded = HttpUtility.HtmlDecode(onlineText);
|
||||||
|
var escaped = UnicodeEscapeSequences(onlineText);
|
||||||
|
return escaped;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Dynamic {
|
||||||
|
public class ParagraphedContentDataProvider : IDataProvider {
|
||||||
|
public Binding? Content { get; set; }
|
||||||
|
|
||||||
|
public string Get(HtmlDocument document) {
|
||||||
|
if (Content is null)
|
||||||
|
return "";
|
||||||
|
|
||||||
|
var node = Content.ResolveNode(document);
|
||||||
|
if (node is null)
|
||||||
|
return "";
|
||||||
|
|
||||||
|
StringBuilder content = new();
|
||||||
|
foreach(var childNode in node.ChildNodes) {
|
||||||
|
if (childNode.Name != "p")
|
||||||
|
continue;
|
||||||
|
content.AppendLine(childNode.InnerText);
|
||||||
|
}
|
||||||
|
|
||||||
|
return content.ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public HtmlNode? GetNode(HtmlDocument document) {
|
||||||
|
return Content?.ResolveNode(document);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Beam\Beam.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Exports {
|
||||||
|
public class PlainTextExporter : IExporter, IAsyncExporter {
|
||||||
|
public PlainTextExporter(IDocument document) {
|
||||||
|
Document = document;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IDocument Document { get; }
|
||||||
|
|
||||||
|
protected virtual string Convert() {
|
||||||
|
return Document.ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected virtual Task<string> ConvertAsync() {
|
||||||
|
return Task.FromResult(Document.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public virtual void Write(string path) {
|
||||||
|
var text = Convert();
|
||||||
|
if (!Directory.Exists(Path.GetDirectoryName(path)))
|
||||||
|
throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path));
|
||||||
|
File.WriteAllText(path, text, Encoding.Unicode);
|
||||||
|
}
|
||||||
|
|
||||||
|
public virtual async Task WriteAsync(string path) {
|
||||||
|
var text = await ConvertAsync();
|
||||||
|
if (!Directory.Exists(path))
|
||||||
|
throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path));
|
||||||
|
await File.WriteAllTextAsync(path, text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace Beam.Exports {
|
||||||
|
public class HtmlExporter : PlainTextExporter {
|
||||||
|
public HtmlExporter(IDocument document,
|
||||||
|
ArticleData? meta = null,
|
||||||
|
Dictionary<string, string>? linkButtons = null,
|
||||||
|
string? eofHtml = null) : base(document) {
|
||||||
|
Meta = meta;
|
||||||
|
LinkButtons = linkButtons;
|
||||||
|
EofHtml = eofHtml;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArticleData? Meta { get; }
|
||||||
|
public Dictionary<string, string>? LinkButtons { get; }
|
||||||
|
public string? EofHtml { get; }
|
||||||
|
|
||||||
|
protected override string Convert() {
|
||||||
|
var text = Document.ToString();
|
||||||
|
// Convert newlines to <p></p> tags
|
||||||
|
text = "<p>" + text.Replace("\n", "</p><p>") + "</p>";
|
||||||
|
if (Meta is null)
|
||||||
|
return text;
|
||||||
|
text = $"<h1>{Meta.Name}</h1>" + text;
|
||||||
|
if (LinkButtons is null || LinkButtons.Count == 0)
|
||||||
|
return text;
|
||||||
|
StringBuilder buttons = new();
|
||||||
|
foreach(var (btnText, btnLink) in LinkButtons.Select((x) => (x.Key, x.Value))) {
|
||||||
|
buttons.AppendLine($"<a href=\"{btnLink}\">{btnText}</a>");
|
||||||
|
}
|
||||||
|
var buttonsDiv = $"<div class=\"controls\">{buttons}</div>";
|
||||||
|
text = buttonsDiv + text + buttonsDiv;
|
||||||
|
text += EofHtml ?? "";
|
||||||
|
text = "<!DOCTYPE html>\n<html>" + text + "</html>";
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected override Task<string> ConvertAsync() {
|
||||||
|
return Task.FromResult(Convert());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
namespace Beam.Exports {
|
||||||
|
public interface IAsyncExporter : IExporter {
|
||||||
|
/// <summary>
|
||||||
|
/// Asynchronously writes the object to the desired path, creating it if it does not exist.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="path">The path of the exported object</param>
|
||||||
|
/// <returns></returns>
|
||||||
|
public Task WriteAsync(string path);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
namespace Beam.Exports {
|
||||||
|
public interface IExporter {
|
||||||
|
/// <summary>
|
||||||
|
/// Synchronously writes the object to the desired path, creating it if it does not exist.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="path">The path of the exported object</param>
|
||||||
|
public void Write(string path);
|
||||||
|
|
||||||
|
protected void EnsurePathExists(string path) {
|
||||||
|
if (File.Exists(path)) {
|
||||||
|
File.Delete(path);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (!Directory.Exists(path))
|
||||||
|
Directory.CreateDirectory(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
namespace Beam.Exports {
|
||||||
|
public interface IStreamExporter : IAsyncExporter {
|
||||||
|
/// <summary>
|
||||||
|
/// Asynchronously writes the object to the desired path in many parts, returning the path
|
||||||
|
/// of each written file as a stream
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="path">The path of the exported object</param>
|
||||||
|
/// <returns>The async enumerator of each written file</returns>
|
||||||
|
public IAsyncEnumerator<string> WriteAsyncStream(string path);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Exports {
|
||||||
|
internal static class S {
|
||||||
|
internal static class M {
|
||||||
|
internal const string FileDirectoryDoesNotExist = "Part of the path supplied does not exist.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
using aeqw89.DataKeys;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
/// <summary>
|
||||||
|
/// <para>
|
||||||
|
/// A collection of specific useful methods and constants that facilitate the use of the application; allows other parts of the application to depend on architecture-specific arbitrary choices without compromising the Single-Responsibility principle or increasing redundant code.
|
||||||
|
/// </para>
|
||||||
|
/// </summary>
|
||||||
|
partial interface IArchitecture {
|
||||||
|
/// <summary>
|
||||||
|
/// Gets the metadata associated with a <see cref="TextResource"/>
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||||
|
/// <param name="pieceKey">The key of the <see cref="TextResource"/> stored in the <paramref name="sdd"/></param>
|
||||||
|
/// <param name="sdd">The <see cref="SharedDataDictionary"/> to be used to retrieve information</param>
|
||||||
|
/// <param name="logger">Optional logger for logging debug information</param>
|
||||||
|
/// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||||
|
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, SharedDataDictionary sdd, ILogger? logger = null);
|
||||||
|
/// <summary>
|
||||||
|
/// Gets the <see cref="DownloadContext{T}"/> of the text record associated with <see cref="TextResource"/>
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||||
|
/// <param name="pieceKey">The key of the <see cref="TextResource"/> stored in the <paramref name="sdd"/></param>
|
||||||
|
/// <param name="sdd">The <see cref="SharedDataDictionary"/> to be used to retrieve information</param>
|
||||||
|
/// <param name="metadata">Optional book metadata to include with the final text record</param>
|
||||||
|
/// <param name="logger">Optional logger for logging debug information</param>
|
||||||
|
/// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||||
|
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> pieceKey, SharedDataDictionary sdd, IDocumentMetaData? metadata = null, ILogger? logger = null);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The <see cref="DataKey{IDocumentMetaData}"/> to use when looking for the chapter metadata
|
||||||
|
/// </summary>
|
||||||
|
public DataKey<IDocumentMetaData> ChapterKey { get; set; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The <see cref="DataKey{IDocumentMetaData}"/> to use when looking for the book metadata
|
||||||
|
/// </summary>
|
||||||
|
public DataKey<IDocumentMetaData> BookKey { get; set; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The default architecture
|
||||||
|
/// </summary>
|
||||||
|
public static IArchitecture Default => new MainArchitecture();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1" />
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1" />
|
||||||
|
<PackageReference Include="Spectre.Console" Version="0.49.2-preview.0.70" />
|
||||||
|
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj" />
|
||||||
|
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj" />
|
||||||
|
<ProjectReference Include="..\Beam\Beam.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<Reference Include="aeqw89.DataKeys">
|
||||||
|
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
|
<Reference Include="aeqw89.PersistentData">
|
||||||
|
<HintPath>..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
public class CssData {
|
||||||
|
// Primary background color (e.g., for the body)
|
||||||
|
public string PrimaryColor { get; set; } = "#f5f5f5";
|
||||||
|
|
||||||
|
// Secondary color (e.g., for header background)
|
||||||
|
public string SecondaryColor { get; set; } = "#e0e0e0";
|
||||||
|
|
||||||
|
// Tertiary color (e.g., for content sections)
|
||||||
|
public string TertiaryColor { get; set; } = "#ffffff";
|
||||||
|
|
||||||
|
// Button background color
|
||||||
|
public string ButtonColor { get; set; } = "#007bff";
|
||||||
|
|
||||||
|
// Foreground text color
|
||||||
|
public string ForegroundColor { get; set; } = "#333333";
|
||||||
|
|
||||||
|
// Font family for main content
|
||||||
|
public string ContentFont { get; set; } = "Arial, sans-serif";
|
||||||
|
|
||||||
|
// Font size for main content
|
||||||
|
public string ContentFontSize { get; set; } = "16px";
|
||||||
|
|
||||||
|
// Font family for titles
|
||||||
|
public string TitleFont { get; set; } = "Georgia, serif";
|
||||||
|
|
||||||
|
// Font size for titles
|
||||||
|
public string TitleFontSize { get; set; } = "32px";
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
|
||||||
|
|
||||||
|
using aeqw89.DataKeys;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
internal static class DataKeyExtensions {
|
||||||
|
public static DataKey WithNamespace(this DataKey dk, string @namespace) {
|
||||||
|
string[] names = @namespace.Split(':');
|
||||||
|
var agg = (string x, string y) => $"{x}:{y}";
|
||||||
|
for (int i = 0; i < names.Length; i++) {
|
||||||
|
string test = names.SkipLast(i).Aggregate(agg);
|
||||||
|
if (dk.Identifier.StartsWith(test)) {
|
||||||
|
return new DataKey(dk.Identifier.Replace(test, @namespace));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new DataKey(@namespace + ":" + dk.Identifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DataKey<T> WithNamespace<T>(this DataKey<T> dk, string @namespace) {
|
||||||
|
return ((DataKey)dk).WithNamespace(@namespace).As<T>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DataKey<T> WithSuffix<T>(this DataKey<T> dk, string suffix) {
|
||||||
|
return new DataKey<T>(dk.Identifier + suffix);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DataKey ToAggregator(this DataKey dk)
|
||||||
|
=> dk.WithNamespace("aeqw89:document:aggregators");
|
||||||
|
public static DataKey ToAuxiliary(this DataKey dk)
|
||||||
|
=> dk.WithNamespace("aeqw89:document:auxillaries");
|
||||||
|
public static DataKey<T> As<T>(this DataKey dk) => new DataKey<T>(dk.Identifier);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
internal class File(string path, params string[] tags) {
|
||||||
|
public string Path { get; set; } = path;
|
||||||
|
public string[] Tags { get; set; } = tags;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,132 @@
|
|||||||
|
//using aeqw89.DataKeys;
|
||||||
|
//using System;
|
||||||
|
//using System.Collections.Generic;
|
||||||
|
//using System.Linq;
|
||||||
|
//using System.Text;
|
||||||
|
//using System.Threading.Tasks;
|
||||||
|
|
||||||
|
//namespace Beam.Temporary.Cli {
|
||||||
|
// internal class HtmlBook : Document {
|
||||||
|
// public class Keys {
|
||||||
|
// public static DataKey<File> ContentPage => new DataKey<File>("content_page");
|
||||||
|
// public static DataKey<File> NoContentPage => new DataKey<File>("no_content_page");
|
||||||
|
// public static DataKey<File> TitlePage => new DataKey<File>("title_page");
|
||||||
|
// public static DataKey<File> StylesPage => new DataKey<File>("styles_page");
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public List<Tracked<IDocument>> Documents { get; set; }
|
||||||
|
// public IReadOnlyList<string> Pages => _Pages;
|
||||||
|
// private List<string> _Pages { get; set; } = [];
|
||||||
|
|
||||||
|
// private const string EMTPY_PAGE = "EMPTY";
|
||||||
|
|
||||||
|
// public CssData CssData { get; }
|
||||||
|
// public ArticleData BookData { get; set; }
|
||||||
|
// public HtmlBookTemplates Templates { get; set; }
|
||||||
|
|
||||||
|
// public HtmlBook(string bookname, CssData cssData, ArticleData bookData, HtmlBookTemplates templates, List<IDocument>? documents = null, Encoding? encoding = null)
|
||||||
|
// : base(bookname, encoding) {
|
||||||
|
// Documents = [];
|
||||||
|
// CssData = cssData;
|
||||||
|
// BookData = bookData;
|
||||||
|
// Templates = templates;
|
||||||
|
// if (documents is not null)
|
||||||
|
// Documents = documents.Select((x) => new Tracked<IDocument>(x)).ToList();
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public void Update(bool ignoreDirty = false) {
|
||||||
|
// if (!Directory.Exists(Filename))
|
||||||
|
// Directory.CreateDirectory(Filename);
|
||||||
|
|
||||||
|
// //System.IO.File.WriteAllLines(Path.Combine(Filename, "styles.css"), Format())
|
||||||
|
|
||||||
|
// List<string> newpages = [];
|
||||||
|
// if (Pages.Count < Documents.Count)
|
||||||
|
// _Pages.AddRange(Enumerable.Repeat(EMTPY_PAGE, Documents.Count - Pages.Count));
|
||||||
|
// foreach (var (doc, page) in Documents.Zip(Pages)) {
|
||||||
|
// if (!doc.IsDirty)
|
||||||
|
// newpages.Add(page);
|
||||||
|
// else if (doc.TrackedObject.MetaData.Count == 0)
|
||||||
|
// newpages.Add(PlainPage(doc.TrackedObject));
|
||||||
|
// else if (doc.TrackedObject.MetaData.TryGetValue(Program.Architecture.ChapterKey, out var meta) && meta is ArticleData articleData)
|
||||||
|
// newpages.Add(ArticlePage(doc.TrackedObject, articleData));
|
||||||
|
// else {
|
||||||
|
// Console.WriteLine("Unhandlable Metadata detected!");
|
||||||
|
// newpages.Add(PlainPage(doc.TrackedObject));
|
||||||
|
// }
|
||||||
|
|
||||||
|
// System.IO.File.WriteAllText(Path.Combine(Filename, Path.GetRandomFileName() + ".html"), newpages[^1]);
|
||||||
|
// doc.IsDirty = false;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// _Pages = newpages;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public void UpdateCss() {
|
||||||
|
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public void UpateTitle() {
|
||||||
|
|
||||||
|
// }
|
||||||
|
|
||||||
|
// private string Format(string template, Dictionary<string, string> table) {
|
||||||
|
// ArgumentNullException.ThrowIfNull(template);
|
||||||
|
// ArgumentNullException.ThrowIfNull(table);
|
||||||
|
|
||||||
|
// foreach (var kvp in table) {
|
||||||
|
// template = template.Replace(kvp.Key, kvp.Value);
|
||||||
|
// }
|
||||||
|
// return template;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// private Dictionary<string, string> GetDocumentTable(IDocument doc, bool keepPlaceholders = false) {
|
||||||
|
// var table = new Dictionary<string, string>() {
|
||||||
|
// { "{" + nameof(doc.Filename) + "}", doc.Filename },
|
||||||
|
// { "{Content}", doc.ToString() }
|
||||||
|
// };
|
||||||
|
|
||||||
|
// return SolvePlaceholders(table, keepPlaceholders);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// private Dictionary<string, string> GetArticleDataTable(IDocument doc, ArticleData ad, bool keepPlaceholders = false) {
|
||||||
|
// var table = new Dictionary<string, string>() {
|
||||||
|
// { "{" + nameof(ad.Language) + "}", ad.Language ?? "" },
|
||||||
|
// { "{" + nameof(ad.Authors) + "}", ad.Authors.Aggregate("; ")},
|
||||||
|
// { "{" + nameof(ad.Categories) + "}", ad.Categories.Aggregate("; ") },
|
||||||
|
// { "{" + nameof(ad.Version) + "}", ad.Version ?? "" },
|
||||||
|
// { "{" + nameof(ad.Description) + "}", ad.Description ?? "" },
|
||||||
|
// { "{" + nameof(ad.Name) + "}", ad.Name ?? "" },
|
||||||
|
// { "{" + nameof(doc.Filename) + "}", doc.Filename },
|
||||||
|
// { "{Content}", doc.ToString() }
|
||||||
|
// };
|
||||||
|
|
||||||
|
// return SolvePlaceholders(table, keepPlaceholders);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// private Dictionary<string, string> SolvePlaceholders(Dictionary<string, string> table, bool keepPlaceholders) {
|
||||||
|
// if (keepPlaceholders)
|
||||||
|
// return table.Select(
|
||||||
|
// (x) => new KeyValuePair<string, string>(x.Key, x.Value == "" ? $"{x.Key}" : x.Value))
|
||||||
|
// .ToDictionary();
|
||||||
|
// return table;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// private string PlainPage(IDocument doc, bool keepPlaceholders = false) {
|
||||||
|
// return Format(Templates.ContentPageTemplate, GetDocumentTable(doc, keepPlaceholders));
|
||||||
|
// }
|
||||||
|
|
||||||
|
// private string ArticlePage(IDocument doc, ArticleData data, bool keepPlaceholders = false) {
|
||||||
|
// return Format(Templates.ContentPageTemplate, GetArticleDataTable(doc, data, keepPlaceholders));
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public override byte[] ToBytes() {
|
||||||
|
// throw new NotImplementedException();
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public override string ToString() {
|
||||||
|
// throw new NotImplementedException();
|
||||||
|
// }
|
||||||
|
|
||||||
|
// }
|
||||||
|
//}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
internal struct HtmlBookTemplates {
|
||||||
|
public string TitlePageTemplate { get; set; }
|
||||||
|
public string ContentPageTemplate { get; set; }
|
||||||
|
public string CssTemplate { get; set; }
|
||||||
|
public string NoContentTemplate { get; set; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
using aeqw89.DataKeys;
|
||||||
|
using Beam.Dynamic;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
partial interface IArchitecture {
|
||||||
|
private class MainArchitecture : IArchitecture {
|
||||||
|
public MainArchitecture() { }
|
||||||
|
|
||||||
|
public DataKey<IDocumentMetaData> ChapterKey { get; set; } = new("ma:chapter");
|
||||||
|
public DataKey<IDocumentMetaData> BookKey { get; set; } = new("ma:book");
|
||||||
|
|
||||||
|
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, SharedDataDictionary sdd, ILogger? logger = null) {
|
||||||
|
var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
|
||||||
|
var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
|
||||||
|
|
||||||
|
// null checks
|
||||||
|
if (auxiliary is null) // aux is required to get metadata
|
||||||
|
return null;
|
||||||
|
if (piece?.Resource?.MetaTemplateInitialData is null) // sanity check to avoid null warnings
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// gets the link for the novel's metadata using the auxillary data retrieved from the sdd
|
||||||
|
var link = sdd.Templates[auxiliary.Resource.Key].GenerateLink(piece?.Resource?.MetaTemplateInitialData!);
|
||||||
|
var binding = auxiliary.Bindings;
|
||||||
|
|
||||||
|
return new DownloadContext<IDocumentMetaData>(web, [link], downloadLogger: logger, transformer: (x) => {
|
||||||
|
return new ArticleData() {
|
||||||
|
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
|
||||||
|
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
|
||||||
|
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
|
||||||
|
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> resKey, SharedDataDictionary sdd, IDocumentMetaData? metaData = null, ILogger? logger = null) {
|
||||||
|
var res = sdd.Novels[resKey].ToRecord(sdd); // retrieves the novel data from the sdd
|
||||||
|
var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd
|
||||||
|
|
||||||
|
if (aggregator is null) // ensure aggergator data was retrieved successfully
|
||||||
|
return null;
|
||||||
|
if (res is null) // ensure novel data was retrieved successfully
|
||||||
|
return null;
|
||||||
|
|
||||||
|
var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator
|
||||||
|
|
||||||
|
// creates a generative enumerable of type link from 'template'
|
||||||
|
var sle = SourceLinkEnumerable.FromGenerator(new DataBackedSourceLinkGenerator(
|
||||||
|
template, res.Resource.TemplateInitialData));
|
||||||
|
|
||||||
|
return new DownloadContext<IDocument>(web, sle,
|
||||||
|
transformer: (x) => {
|
||||||
|
var resolved = aggregator.Bindings.Resolve(x);
|
||||||
|
var articleData = new ArticleData() {
|
||||||
|
Name = OnlineCleaner.Clean(resolved.Title),
|
||||||
|
};
|
||||||
|
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
|
||||||
|
meta.Add(ChapterKey, articleData);
|
||||||
|
if (metaData is not null)
|
||||||
|
meta.Add(BookKey, metaData);
|
||||||
|
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved.Content)) {
|
||||||
|
MetaData = meta
|
||||||
|
};
|
||||||
|
},
|
||||||
|
retryReporter: new Progress<int>((x) => Console.WriteLine($"Retrying download ({x})")),
|
||||||
|
downloadReporter: new Progress<IDocument>((x) => Console.WriteLine($"Downloaded ({x.Filename})")),
|
||||||
|
asyncFailurePredicates: [
|
||||||
|
(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
|
||||||
|
],
|
||||||
|
timeOut: TimeSpan.FromSeconds(15),
|
||||||
|
downloadLogger: logger
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,144 @@
|
|||||||
|
|
||||||
|
|
||||||
|
using aeqw89.DataKeys;
|
||||||
|
using Beam.Dynamic;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
|
||||||
|
internal static class NovelStatics {
|
||||||
|
public static void Define_LightNovelWorld_Novel_TheLegendaryMechanic(SharedDataDictionary sdd) {
|
||||||
|
var lnwAggregator = new DataKey<WebResource>("aeqw89:document:aggregators:light_novel_world");
|
||||||
|
var lnwAuxiliary = new DataKey<WebResource>("aeqw89:document:auxillaries:light_novel_world");
|
||||||
|
var novel = new TextResource() {
|
||||||
|
Key = new DataKey<TextResource>("novels:the_legendary_mechanic"),
|
||||||
|
AssociatedSource = lnwAggregator,
|
||||||
|
AssociatedMetaSource = lnwAuxiliary,
|
||||||
|
TemplateInitialData = ["the-legendary-mechanic-245", "1"],
|
||||||
|
MetaTemplateInitialData = ["the-legendary-mechanic"]
|
||||||
|
};
|
||||||
|
sdd.Novels.TryAdd(novel.Key, novel);
|
||||||
|
|
||||||
|
sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void Define_LightNovelWorl_Novel_IAloneLevelUp(SharedDataDictionary sdd) {
|
||||||
|
var lnwAggregator = new DataKey("light_novel_world").ToAggregator().As<WebResource>();
|
||||||
|
var lnwAuxiliary = new DataKey("light_novel_world").ToAuxiliary().As<WebResource>();
|
||||||
|
var novel = new TextResource() {
|
||||||
|
Key = new DataKey<TextResource>("novels:i_alone_level_up"),
|
||||||
|
AssociatedSource = lnwAggregator,
|
||||||
|
AssociatedMetaSource = lnwAuxiliary,
|
||||||
|
TemplateInitialData = ["i-alone-level-up-236", "1"],
|
||||||
|
MetaTemplateInitialData = ["i-alone-level-up-solo-leveling-05122225"]
|
||||||
|
};
|
||||||
|
|
||||||
|
sdd.Novels.TryAdd(novel.Key, novel);
|
||||||
|
|
||||||
|
sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void Define_NovelFull(SharedDataDictionary sdd) {
|
||||||
|
var docNamespace = "aeqw89:document";
|
||||||
|
var nfAgg = new DataKey<WebResource>("aggregators:novel_full").WithNamespace(docNamespace);
|
||||||
|
var nfAux = new DataKey<WebResource>("auxillaries:novel_full").WithNamespace(docNamespace);
|
||||||
|
var nfBindings = new DataKey<DataBindings>("aeqw89:bindings:light_novel_world");
|
||||||
|
var aggregator = new WebResource(nfAgg) {
|
||||||
|
Name = "Novel Full",
|
||||||
|
Description = "A novel aggregator site",
|
||||||
|
Domain = "https://novelfull.net",
|
||||||
|
Bindings = nfBindings
|
||||||
|
};
|
||||||
|
var auxiliary = new WebResource(nfAux) {
|
||||||
|
Name = "Novel Full",
|
||||||
|
Description = "A novel aggregator site",
|
||||||
|
Domain = "https://novelfull.net",
|
||||||
|
Bindings = nfBindings.WithSuffix("_aux")
|
||||||
|
};
|
||||||
|
|
||||||
|
sdd.Templates.TryAdd(nfAgg, new() {
|
||||||
|
Template = ""
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void Define_LightNovelWorld(SharedDataDictionary sdd) {
|
||||||
|
var lnwAggregator = new DataKey<WebResource>("aeqw89:document:aggregators:light_novel_world");
|
||||||
|
var lnwAuxiliary = new DataKey<WebResource>("aeqw89:document:auxillaries:light_novel_world");
|
||||||
|
const string lnwBindingsA = "aeqw89:bindings:light_novel_world";
|
||||||
|
var aggregator = new WebResource(lnwAggregator) {
|
||||||
|
Name = "Light Novel World",
|
||||||
|
Description = "A novel aggregator site maintained by NetherClaw",
|
||||||
|
Domain = "https://www.lightnovelworld.co",
|
||||||
|
Bindings = new DataKey<DataBindings>(lnwBindingsA)
|
||||||
|
};
|
||||||
|
const string lnwBindingsB = "aeqw89:bindings:light_novel_world_aux";
|
||||||
|
var auxiliary = new WebResource(lnwAuxiliary) {
|
||||||
|
Name = "Light Novel World",
|
||||||
|
Description = "A novel aggregator site maintained by NetherClaw",
|
||||||
|
Domain = "https://www.lightnovelworld.co",
|
||||||
|
Bindings = new DataKey<DataBindings>(lnwBindingsB)
|
||||||
|
};
|
||||||
|
|
||||||
|
sdd.Templates.TryAdd(lnwAuxiliary, new() {
|
||||||
|
Template = "https://www.lightnovelworld.co/novel/{0}",
|
||||||
|
IndexOfChapterIndex = -1
|
||||||
|
});
|
||||||
|
sdd.Templates.TryAdd(lnwAggregator, new() {
|
||||||
|
Template = "https://www.lightnovelworld.co/novel/{0}/chapter-{1}",
|
||||||
|
IndexOfChapterIndex = 1
|
||||||
|
});
|
||||||
|
|
||||||
|
sdd.Aggregators.TryAdd(aggregator.Key, aggregator);
|
||||||
|
sdd.Auxillaries.TryAdd(auxiliary.Key, auxiliary);
|
||||||
|
|
||||||
|
var lnwBindings = new DataKey<DataBindings>(lnwBindingsA);
|
||||||
|
var lnwBindingsAux = new DataKey<DataBindings>(lnwBindingsB);
|
||||||
|
sdd.Bindings.TryAdd(lnwBindings, new DataBindings() {
|
||||||
|
Title = new Binding("aeqw89:binding:light_novel_world:title") {
|
||||||
|
XPath = "/html/body/main/article/section/div[1]/h1/span[2]",
|
||||||
|
Type = BindingType.Single
|
||||||
|
},
|
||||||
|
Content = new("aeqw89:binding:light_novel_world:content") {
|
||||||
|
Provider = new ParagraphedContentDataProvider() {
|
||||||
|
Content = new Binding() {
|
||||||
|
XPath = "//*[@id=\"chapter-container\"]"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Type = BindingType.UseProvider
|
||||||
|
},
|
||||||
|
});
|
||||||
|
sdd.Bindings.TryAdd(lnwBindingsAux, new DataBindings() {
|
||||||
|
Title = new("aeqw89:binding:light_novel_world_aux:title") {
|
||||||
|
XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/h1",
|
||||||
|
Type = BindingType.Single
|
||||||
|
},
|
||||||
|
Authors = new("aeqw89:binding:light_novel_world_aux:authors") {
|
||||||
|
XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/div[1]/a",
|
||||||
|
Type = BindingType.Single
|
||||||
|
},
|
||||||
|
Description = new("aeqw89:binding:light_novel_world_aux:description") {
|
||||||
|
Provider = new ParagraphedContentDataProvider() {
|
||||||
|
Content = new() {
|
||||||
|
XPath = "/html/body/main/article/div/section/div[1]/div"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Type = BindingType.UseProvider
|
||||||
|
},
|
||||||
|
Tags = new("aeqw89:binding:light_novel_world_aux:tags") {
|
||||||
|
Provider = new ListContentDataProvider() {
|
||||||
|
Content = new() {
|
||||||
|
XPath = "/html/body/main/article/header/div[2]/div[2]/div[3]/ul"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Type = BindingType.UseProvider
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,135 @@
|
|||||||
|
using aeqw89.PersistentData;
|
||||||
|
using aeqw89.DataKeys;
|
||||||
|
using Beam.Dynamic;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Text.Json.Serialization;
|
||||||
|
using System.Text.Json.Serialization.Metadata;
|
||||||
|
using Beam.Temporary.Cli.Templates.Classic;
|
||||||
|
using Beam.Exports;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
internal class Program {
|
||||||
|
|
||||||
|
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
|
||||||
|
|
||||||
|
public static SharedDataDictionary Shared { get; set; } = [];
|
||||||
|
|
||||||
|
public static IArchitecture Architecture = IArchitecture.Default;
|
||||||
|
|
||||||
|
const string SharedDataPath = "data/.dat";
|
||||||
|
|
||||||
|
static async Task Main(string[] args) {
|
||||||
|
ConversionOptions.Converters.AddPersistentDataRequiredConverters();
|
||||||
|
ConversionOptions.WriteIndented = true;
|
||||||
|
|
||||||
|
var web = new HtmlWeb();
|
||||||
|
|
||||||
|
var lf = LoggerFactory.Create((x) => {
|
||||||
|
x.AddConsole();
|
||||||
|
});
|
||||||
|
|
||||||
|
ILogger logger = lf
|
||||||
|
.CreateLogger("Program");
|
||||||
|
|
||||||
|
await using var sharedContext = await DataDictionaryContext<SharedDataDictionary>.Create(
|
||||||
|
SharedDataPath,
|
||||||
|
DataKind.Shared,
|
||||||
|
logger,
|
||||||
|
ConversionOptions
|
||||||
|
);
|
||||||
|
|
||||||
|
Shared = sharedContext.Data;
|
||||||
|
|
||||||
|
Shared.Clear();
|
||||||
|
NovelStatics.Define_LightNovelWorld(Shared);
|
||||||
|
NovelStatics.Define_LightNovelWorld_Novel_TheLegendaryMechanic(Shared);
|
||||||
|
NovelStatics.Define_LightNovelWorl_Novel_IAloneLevelUp(Shared);
|
||||||
|
ClassicTemplates.Register(Shared);
|
||||||
|
|
||||||
|
var novel = new DataKey<TextResource>("novels:i_alone_level_up");
|
||||||
|
var context_aux = Architecture.GetMeta(web, novel, Shared);
|
||||||
|
var metaDownloader = new DownloadEnumerable<IDocumentMetaData>(
|
||||||
|
new SequentialFragmentDownloader<IDocumentMetaData>(
|
||||||
|
context_aux,
|
||||||
|
(c) => new UnitFragmentDownloader<IDocumentMetaData>(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger),
|
||||||
|
logger)
|
||||||
|
.UnwrapFragmented());
|
||||||
|
var metadata = (await metaDownloader.FirstAsync());
|
||||||
|
|
||||||
|
var context = Architecture.GetTextRecord(web, novel, Shared, metadata.Data);
|
||||||
|
context.DownloadReporter = new Progress<IDocument>((x) => Console.WriteLine(x.Filename));
|
||||||
|
var downloader = new DownloadEnumerable<IDocument>(
|
||||||
|
new SequentialFragmentDownloader<IDocument>(
|
||||||
|
context,
|
||||||
|
(c) => new UnitFragmentDownloader<IDocument>(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger),
|
||||||
|
logger)
|
||||||
|
.UnwrapFragmented());
|
||||||
|
|
||||||
|
List<Ordered<IDocument>> documents = [];
|
||||||
|
|
||||||
|
await foreach (var download in downloader.Take(20)) {
|
||||||
|
if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
|
||||||
|
continue;
|
||||||
|
if (meta is not ArticleData articleMetaData)
|
||||||
|
continue;
|
||||||
|
//Console.WriteLine($"Title: {data.Name}");
|
||||||
|
//Console.WriteLine($"Description: {data.Description}");
|
||||||
|
//Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
|
||||||
|
//Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
|
||||||
|
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
||||||
|
//Console.WriteLine($"Content: {download}");
|
||||||
|
|
||||||
|
documents.Add(download);
|
||||||
|
}
|
||||||
|
|
||||||
|
string testDir = Path.Combine("txt", Path.GetRandomFileName());
|
||||||
|
Directory.CreateDirectory(testDir);
|
||||||
|
|
||||||
|
int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
|
||||||
|
foreach (var document in documents.OrderBy((x) => x.Order)) {
|
||||||
|
document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
|
||||||
|
Dictionary<string, string> linkButtons = new();
|
||||||
|
if (document.Order != 0)
|
||||||
|
linkButtons.Add("Previous", $"{document.Order - 1}.html");
|
||||||
|
if (document.Order != len)
|
||||||
|
linkButtons.Add("Next", $"{document.Order + 1}.html");
|
||||||
|
new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.ReadKey();
|
||||||
|
|
||||||
|
//foreach (var download in documents.OrderBy((x) => x.Order)) {
|
||||||
|
// if (download.Data.TryGetTaggedMetaData<ArticleData>(Architecture.ChapterKey, out var meta))
|
||||||
|
// Console.WriteLine($"{download.Order}:{meta.Name}");
|
||||||
|
//}
|
||||||
|
|
||||||
|
//string[] templates = new DataKey<File>[] {
|
||||||
|
// HtmlBook.Keys.ContentPage,
|
||||||
|
// HtmlBook.Keys.NoContentPage,
|
||||||
|
// HtmlBook.Keys.TitlePage,
|
||||||
|
// HtmlBook.Keys.StylesPage,
|
||||||
|
//}.Select(
|
||||||
|
// (x) => Shared.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
|
||||||
|
//).ToArray();
|
||||||
|
|
||||||
|
//HtmlBook book = new(
|
||||||
|
// bookname: Path.Combine(Path.GetRandomFileName(), "I Alone Level Up"),
|
||||||
|
// new CssData(),
|
||||||
|
// new ArticleData(),
|
||||||
|
// new HtmlBookTemplates() {
|
||||||
|
// ContentPageTemplate = templates[0],
|
||||||
|
// NoContentTemplate = templates[1],
|
||||||
|
// TitlePageTemplate = templates[2],
|
||||||
|
// CssTemplate = templates[3],
|
||||||
|
// },
|
||||||
|
// documents: documents.Select((x) => x.Data).ToList()
|
||||||
|
//);
|
||||||
|
|
||||||
|
//book.Update();
|
||||||
|
//Console.WriteLine("One variable!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
using aeqw89.PersistentData;
|
||||||
|
using aeqw89.DataKeys;
|
||||||
|
using Beam.Dynamic;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
public class SharedDataDictionary : BaseDataDictionary {
|
||||||
|
public Dictionary<DataKey<WebResource>, PackagedSourceLinkGenerationData> Templates {
|
||||||
|
get => GetOrCreateDictionary<DataKey<WebResource>, PackagedSourceLinkGenerationData>(nameof(Templates));
|
||||||
|
set => Data[nameof(Templates)] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Dictionary<DataKey<WebResource>, WebResource> Aggregators {
|
||||||
|
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Aggregators));
|
||||||
|
set => Data[nameof(Aggregators)] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Dictionary<DataKey<WebResource>, WebResource> Auxillaries {
|
||||||
|
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Auxillaries));
|
||||||
|
set => Data[nameof(Auxillaries)] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Dictionary<DataKey<DataBindings>, DataBindings> Bindings {
|
||||||
|
get => GetOrCreateDictionary<DataKey<DataBindings>, DataBindings>(nameof(Bindings));
|
||||||
|
set => Data[nameof(Bindings)] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Dictionary<DataKey<WebResource>, HashSet<DataKey<TextResource>>> AggregatorNovels {
|
||||||
|
get => GetOrCreateDictionary<DataKey<WebResource>, HashSet<DataKey<TextResource>>>(nameof(AggregatorNovels));
|
||||||
|
set => Data[nameof(AggregatorNovels)] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Dictionary<DataKey<TextResource>, TextResource> Novels {
|
||||||
|
get => GetOrCreateDictionary<DataKey<TextResource>, TextResource>(nameof(Novels));
|
||||||
|
set => Data[nameof(Novels)] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
internal Dictionary<DataKey<File>, File> Files {
|
||||||
|
get => GetOrCreateDictionary<DataKey<File>, File>(nameof(Files));
|
||||||
|
set => Data[nameof(Files)] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
public static class StringExtensions {
|
||||||
|
public static string Aggregate(this IEnumerable<string> str, string separator) {
|
||||||
|
if (!str.Any())
|
||||||
|
return string.Empty;
|
||||||
|
return str.Aggregate((x, y) => $"{x}{separator}{y}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli.Templates.Classic {
|
||||||
|
internal class ClassicTemplates {
|
||||||
|
public static void Register(SharedDataDictionary sdd) {
|
||||||
|
sdd.Files.TryAdd(
|
||||||
|
new("aeqw89:files:templates:classic:content_page"),
|
||||||
|
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Content.template.html", "htmlpage", "templates"));
|
||||||
|
sdd.Files.TryAdd(
|
||||||
|
new("aeqw89:files:templates:classic:title_page"),
|
||||||
|
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Title.template.html", "htmlpage", "templates"));
|
||||||
|
sdd.Files.TryAdd(
|
||||||
|
new("aeqw89:files:templates:classic:styles_page"),
|
||||||
|
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Styles.template.css", "styles", "templates"));
|
||||||
|
sdd.Files.TryAdd(
|
||||||
|
new("aeqw89:files:templates:classic:no_content_page"),
|
||||||
|
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\NoContent.template.html", "htmlpage", "templates"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal static class DictionaryOfFileExtensions {
|
||||||
|
public static string ReadToString<T>(this Dictionary<T, File> dict, T key) where T: notnull {
|
||||||
|
return System.IO.File.ReadAllText(dict[key].Path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>{Name}</title>
|
||||||
|
<link rel="stylesheet" href="styles.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>{Name}</h1>
|
||||||
|
<p><em>{Description}</em></p>
|
||||||
|
<div>
|
||||||
|
<span><strong>Authors:</strong> {Authors}</span> |
|
||||||
|
<span><strong>Language:</strong> {Language}</span> |
|
||||||
|
<span><strong>Categories:</strong> {Categories}</span> |
|
||||||
|
<span><strong>Version:</strong> {Version}</span>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
<article>
|
||||||
|
{Content}
|
||||||
|
</article>
|
||||||
|
<div class="navigation">
|
||||||
|
<button id="prev">Previous</button>
|
||||||
|
<button id="next">Next</button>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>404 - Not Found</title>
|
||||||
|
<link rel="stylesheet" href="styles.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="error-container">
|
||||||
|
<h1>404 - Content Not Found</h1>
|
||||||
|
<p>The file <strong>{Filename}</strong> was not found.</p>
|
||||||
|
<p>{Content}</p>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -0,0 +1,60 @@
|
|||||||
|
/* styles.css */
|
||||||
|
/* Placeholders:
|
||||||
|
{PrimaryColor}, {SecondaryColor}, {TertiaryColor}, {ButtonColor},
|
||||||
|
{ForegroundColor}, {ContentFont}, {ContentFontSize}, {TitleFont}, {TitleFontSize}
|
||||||
|
*/
|
||||||
|
body {
|
||||||
|
font-family: {ContentFont};
|
||||||
|
font-size: {ContentFontSize};
|
||||||
|
background-color: {PrimaryColor};
|
||||||
|
color: {ForegroundColor};
|
||||||
|
margin: 0;
|
||||||
|
padding: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
header {
|
||||||
|
background-color: {SecondaryColor};
|
||||||
|
padding: 20px;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
header h1 {
|
||||||
|
font-family: {TitleFont};
|
||||||
|
font-size: {TitleFontSize};
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
header p {
|
||||||
|
font-style: italic;
|
||||||
|
margin: 5px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
section, article, nav {
|
||||||
|
background: {TertiaryColor};
|
||||||
|
padding: 15px;
|
||||||
|
border-radius: 8px;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
margin: 20px auto;
|
||||||
|
max-width: 800px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.navigation {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
max-width: 800px;
|
||||||
|
margin: 20px auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
button {
|
||||||
|
background-color: {ButtonColor};
|
||||||
|
color: {ForegroundColor};
|
||||||
|
border: none;
|
||||||
|
padding: 10px 20px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: {ContentFontSize};
|
||||||
|
border-radius: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
nav h2 {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>{Name}</title>
|
||||||
|
<link rel="stylesheet" href="styles.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>{Name}</h1>
|
||||||
|
<p><em>{Description}</em></p>
|
||||||
|
</header>
|
||||||
|
<section>
|
||||||
|
<div><strong>Authors:</strong> {Authors}</div>
|
||||||
|
<div><strong>Language:</strong> {Language}</div>
|
||||||
|
<div><strong>Categories:</strong> {Categories}</div>
|
||||||
|
<div><strong>Version:</strong> {Version}</div>
|
||||||
|
</section>
|
||||||
|
<nav>
|
||||||
|
<h2>Table of Contents</h2>
|
||||||
|
<ul>
|
||||||
|
{TOC} <!-- Expected to be a list of items (e.g. <li>Chapter 1</li>, etc.) -->
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
|
||||||
|
|
||||||
|
using aeqw89.DataKeys;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
public class TextResource : IKeyed<TextResource> {
|
||||||
|
public required DataKey<TextResource> Key { get; set; }
|
||||||
|
public DataKey<WebResource>? AssociatedSource { get; set; }
|
||||||
|
public DataKey<WebResource>? AssociatedMetaSource { get; set; }
|
||||||
|
public required string[] TemplateInitialData { get; set; }
|
||||||
|
public string?[]? MetaTemplateInitialData { get; set; }
|
||||||
|
|
||||||
|
public TextResourceRecord ToRecord(SharedDataDictionary sdd) {
|
||||||
|
return new(this,
|
||||||
|
AssociatedSource is null ? null : sdd.Aggregators[AssociatedSource],
|
||||||
|
AssociatedMetaSource is null ? null : sdd.Auxillaries[AssociatedMetaSource]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record TextResourceRecord(TextResource Resource, WebResource? AssociatedSource, WebResource? AssociatedMetaSource);
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
internal class Tracked<T>(T obj) {
|
||||||
|
public T TrackedObject { get; set; } = obj;
|
||||||
|
public bool IsDirty { get; set; } = true;
|
||||||
|
|
||||||
|
public Tracked<T> SetDirty() {
|
||||||
|
IsDirty = true;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
using aeqw89.PersistentData;
|
||||||
|
using aeqw89.DataKeys;
|
||||||
|
using Beam.Dynamic;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
public class WebResource(DataKey<WebResource> key) : IKeyed<WebResource> {
|
||||||
|
public DataKey<WebResource> Key { get; set; } = key;
|
||||||
|
|
||||||
|
public required DataKey<DataBindings> Bindings { get; set; }
|
||||||
|
public string? Name { get; set; }
|
||||||
|
public string? Domain { get; set; }
|
||||||
|
public string? Description { get; set; }
|
||||||
|
|
||||||
|
|
||||||
|
public WebResource() : this(new(string.Empty)) { }
|
||||||
|
|
||||||
|
public WebResourceRecord ToRecord(SharedDataDictionary sdd) {
|
||||||
|
return new WebResourceRecord(this, sdd.Bindings[Bindings]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record WebResourceRecord(WebResource Resource, DataBindings Bindings);
|
||||||
|
}
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio Version 17
|
||||||
|
VisualStudioVersion = 17.12.35506.116
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam", "Beam\Beam.csproj", "{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}"
|
||||||
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Temporary.Cli", "Beam.Temporary.Cli\Beam.Temporary.Cli.csproj", "{8F650BBA-3800-4B5E-A6FF-9057633601EE}"
|
||||||
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Dynamic", "Beam.Dynamic\Beam.Dynamic.csproj", "{DDEABE82-096C-4799-87F1-56F494D35FAA}"
|
||||||
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Exports", "Beam.Exports\Beam.Exports.csproj", "{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|Any CPU = Debug|Any CPU
|
||||||
|
Release|Any CPU = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public class ArticleData : IDocumentMetaData {
|
||||||
|
public string? Name { get; set; }
|
||||||
|
public string[] Authors { get; set; } = [];
|
||||||
|
public string? Language { get; set; }
|
||||||
|
public string[] Categories { get; set; } = [];
|
||||||
|
public string? Version { get; set; }
|
||||||
|
public string? Description { get; set; }
|
||||||
|
|
||||||
|
public string AsJson(JsonSerializerOptions? options = null) {
|
||||||
|
return JsonSerializer.Serialize(this, options);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="FluentBuilder" Version="0.10.0">
|
||||||
|
<PrivateAssets>all</PrivateAssets>
|
||||||
|
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||||
|
</PackageReference>
|
||||||
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||||
|
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<Reference Include="aeqw89.DataKeys">
|
||||||
|
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
|
||||||
|
</Reference>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
internal class ByteDocument(string filename, byte[] content, Encoding? encoding = null) : Document(filename, encoding) {
|
||||||
|
public byte[] Content { get; set; } = content;
|
||||||
|
|
||||||
|
public override byte[] ToBytes() {
|
||||||
|
return Content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override string ToString() {
|
||||||
|
return Encoding.GetString(Content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public class DataBackedSourceLinkGenerator(PackagedSourceLinkGenerationData data, params object[] initialState) : DelegateBackedSourceLinkGenerator(data.GenerateLink, data.GetBehaviour(), initialState) {}
|
||||||
|
}
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Diagnostics;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public delegate DocumentSourceLink LinkGenerator(params object[] ps);
|
||||||
|
public delegate object Incrementor(object obj, int amount);
|
||||||
|
|
||||||
|
public class DelegateBackedSourceLinkGenerator : IEnumerator<DocumentSourceLink> {
|
||||||
|
public LinkGenerator Generator { get; set; }
|
||||||
|
public IncrementationBehaviour Behaviour { get; }
|
||||||
|
private object[] InitialState;
|
||||||
|
|
||||||
|
public DelegateBackedSourceLinkGenerator(LinkGenerator generator, IncrementationBehaviour behaviour, params object[] initialState) {
|
||||||
|
Generator = generator;
|
||||||
|
Behaviour = behaviour;
|
||||||
|
InitialState = (object[])initialState.Clone();
|
||||||
|
State = (object[])initialState.Clone();
|
||||||
|
|
||||||
|
Reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
public object[] State { get; set; }
|
||||||
|
public DocumentSourceLink Current { get; private set; }
|
||||||
|
|
||||||
|
object IEnumerator.Current => Current;
|
||||||
|
|
||||||
|
public void Dispose() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool MoveNext() {
|
||||||
|
Behaviour.Apply(State, 1);
|
||||||
|
Current = Generator(State);
|
||||||
|
return Current.HasValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Reset() {
|
||||||
|
State = (object[])InitialState.Clone();
|
||||||
|
Behaviour.Apply(State, -1);
|
||||||
|
Current = Generator(State);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
using aeqw89.DataKeys;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public abstract class Document(string filename, Encoding? encoding = null) : IDocument {
|
||||||
|
public string Filename { get; set; } = filename;
|
||||||
|
public Encoding Encoding { get; set; } = encoding ?? Encoding.UTF8;
|
||||||
|
public Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> MetaData { get; set; } = [];
|
||||||
|
|
||||||
|
public abstract byte[] ToBytes();
|
||||||
|
public override abstract string ToString();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
/// <summary>
|
||||||
|
/// Holds a collection of <see cref="IDocument"/> objects in memory to facilitate lazy loading
|
||||||
|
/// </summary>
|
||||||
|
public class DocumentCache : Dictionary<object, IDocument>, IDisposable {
|
||||||
|
private bool disposedValue;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Calculates memory usage and checks if it does not exceed a certain limit
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="allocatedSpaceInBytes">The memory limit</param>
|
||||||
|
/// <returns></returns>
|
||||||
|
public bool IsCapacityLessThan(int allocatedSpaceInBytes) {
|
||||||
|
return this.Count < CalculateMemorySpaceUsage();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets an estimate of the space used by the IDocument objects (disregarding metadata) in bytes.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>Estimated memory usage in bytes</returns>
|
||||||
|
public long CalculateMemorySpaceUsage() {
|
||||||
|
return this.Select((x) => (x.Value.ToBytes().LongLength)).Aggregate((x, y) => x + y);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected virtual void Dispose(bool disposing) {
|
||||||
|
if (!disposedValue) {
|
||||||
|
if (disposing) {
|
||||||
|
// TODO: dispose managed state (managed objects)
|
||||||
|
this.Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: free unmanaged resources (unmanaged objects) and override finalizer
|
||||||
|
// TODO: set large fields to null
|
||||||
|
disposedValue = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources
|
||||||
|
// ~DocumentCache()
|
||||||
|
// {
|
||||||
|
// // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
||||||
|
// Dispose(disposing: false);
|
||||||
|
// }
|
||||||
|
|
||||||
|
public void Dispose() {
|
||||||
|
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
||||||
|
Dispose(disposing: true);
|
||||||
|
GC.SuppressFinalize(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public readonly struct DocumentSourceLink(string link) {
|
||||||
|
private readonly string Link_ { get; } = link;
|
||||||
|
public readonly Uri Link => new(Link_);
|
||||||
|
|
||||||
|
public bool HasValue => !string.IsNullOrWhiteSpace(Link_);
|
||||||
|
|
||||||
|
public static DocumentSourceLink InvalidLink { get; } = new("https://invalid.link");
|
||||||
|
|
||||||
|
public static bool operator ==(DocumentSourceLink lhs, DocumentSourceLink rhs) {
|
||||||
|
return lhs.Link == rhs.Link;
|
||||||
|
}
|
||||||
|
public static bool operator !=(DocumentSourceLink lhs, DocumentSourceLink rhs) {
|
||||||
|
return lhs.Link != rhs.Link;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool Equals(object? obj) {
|
||||||
|
return GetHashCode() == obj?.GetHashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
public override int GetHashCode() {
|
||||||
|
return Link.GetHashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Concurrent;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using FluentBuilder;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public delegate T HtmlTransformer<out T>(HtmlDocument doc);
|
||||||
|
public delegate Task<T> AsyncHtmlTransformer<T>(HtmlDocument doc);
|
||||||
|
|
||||||
|
public class DownloadContext<T> : IDisposable {
|
||||||
|
private bool disposedValue;
|
||||||
|
|
||||||
|
public HtmlWeb Web { get; }
|
||||||
|
public HtmlTransformer<T> Transformer { get; }
|
||||||
|
public AsyncHtmlTransformer<T> AsyncTranformer { get; }
|
||||||
|
public IProgress<T>? DownloadReporter { get; set; }
|
||||||
|
public IProgress<int>? RetryReporter { get; set; }
|
||||||
|
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? AsyncFailurePredicates { get; }
|
||||||
|
public TimeSpan TimeOut { get; set; }
|
||||||
|
public IEnumerable<DocumentSourceLink> Links { get; }
|
||||||
|
public CancellationToken CancellationToken { get; }
|
||||||
|
public DocumentCache Cache { get; private set; } = [];
|
||||||
|
public ILogger? DownloadLogger { get; set; }
|
||||||
|
|
||||||
|
public DownloadContext(HtmlWeb web,
|
||||||
|
IEnumerable<DocumentSourceLink> links,
|
||||||
|
CancellationToken cancellationToken = default,
|
||||||
|
HtmlTransformer<T>? transformer = null,
|
||||||
|
AsyncHtmlTransformer<T>? asyncTransformer = null,
|
||||||
|
IProgress<T>? downloadReporter = null,
|
||||||
|
IProgress<int>? retryReporter = null,
|
||||||
|
AsyncDownloadFailurePredicate<HtmlDocument>?[]? asyncFailurePredicates = null,
|
||||||
|
TimeSpan? timeOut = null,
|
||||||
|
ILogger? downloadLogger = null) {
|
||||||
|
ArgumentNullException.ThrowIfNull(web, nameof(web));
|
||||||
|
ArgumentNullException.ThrowIfNull(links, nameof(links));
|
||||||
|
|
||||||
|
Web = web;
|
||||||
|
Links = links;
|
||||||
|
CancellationToken = cancellationToken;
|
||||||
|
if (transformer is null && asyncTransformer is null)
|
||||||
|
throw new ArgumentException($"Either {nameof(transformer)} or {nameof(asyncTransformer)} must be not null.");
|
||||||
|
|
||||||
|
Transformer = transformer!;
|
||||||
|
AsyncTranformer = asyncTransformer!;
|
||||||
|
if (transformer is null && asyncTransformer is not null)
|
||||||
|
Transformer = (x) => asyncTransformer(x).Result;
|
||||||
|
if (asyncTransformer is null && transformer is not null)
|
||||||
|
AsyncTranformer = (x) => Task.FromResult(transformer(x));
|
||||||
|
|
||||||
|
DownloadReporter = downloadReporter;
|
||||||
|
RetryReporter = retryReporter;
|
||||||
|
AsyncFailurePredicates = asyncFailurePredicates;
|
||||||
|
TimeOut = timeOut ?? TimeSpan.FromMinutes(1);
|
||||||
|
DownloadLogger = downloadLogger;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected virtual void Dispose(bool disposing) {
|
||||||
|
if (!disposedValue) {
|
||||||
|
if (disposing) {
|
||||||
|
// TODO: dispose managed state (managed objects)
|
||||||
|
Cache = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: free unmanaged resources (unmanaged objects) and override finalizer
|
||||||
|
// TODO: set large fields to null
|
||||||
|
disposedValue = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources
|
||||||
|
// ~DownloadContext()
|
||||||
|
// {
|
||||||
|
// // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
||||||
|
// Dispose(disposing: false);
|
||||||
|
// }
|
||||||
|
|
||||||
|
public void Dispose() {
|
||||||
|
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
||||||
|
Dispose(disposing: true);
|
||||||
|
GC.SuppressFinalize(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public class DownloadEnumerable<T>(IAsyncEnumerator<Ordered<T>> download) : IAsyncEnumerable<Ordered<T>> {
|
||||||
|
public IAsyncEnumerator<Ordered<T>> Download { get; } = download;
|
||||||
|
|
||||||
|
public IAsyncEnumerator<Ordered<T>> GetAsyncEnumerator(CancellationToken cancellationToken = default)
|
||||||
|
=> Download;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
using System.Collections.Concurrent;
|
||||||
|
using System.Diagnostics.CodeAnalysis;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public sealed class Fragment<T>(int size) {
|
||||||
|
public int Size => FragmentBag.Count;
|
||||||
|
public int MaxSize { get; } = size;
|
||||||
|
private ConcurrentBag<T> FragmentBag { get; set; } = new();
|
||||||
|
public bool TryTake([NotNullWhen(true)] out T? shard) {
|
||||||
|
return FragmentBag.TryTake(out shard) && shard != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private bool? Complete = false;
|
||||||
|
public bool IsComplete => Complete ?? Size == MaxSize;
|
||||||
|
|
||||||
|
private bool UpdaterLocked = false;
|
||||||
|
|
||||||
|
public static bool TryAcquireUpdater(Fragment<T> fragment, [NotNullWhen(true)] out Action<T>? updater) {
|
||||||
|
updater = null;
|
||||||
|
if (Interlocked.CompareExchange(ref fragment.UpdaterLocked, true, false) == true)
|
||||||
|
// equivalent to : fragment.UpdaterLocked == true, side-effect: sets fragment.UpdaterLocked to true
|
||||||
|
return false;
|
||||||
|
updater = fragment.FragmentBag.Add;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
public static bool TryReleaseUpdater(Fragment<T> fragment, Action<T> updater) {
|
||||||
|
if (updater == fragment.FragmentBag.Add) {
|
||||||
|
Interlocked.Exchange(ref fragment.UpdaterLocked, false);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
public static void SetComplete(Fragment<T> fragment, bool status) {
|
||||||
|
fragment.Complete = status;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
namespace Beam {
|
||||||
|
public static class IAsyncEnumeratorExtensions {
|
||||||
|
public static async IAsyncEnumerator<T> UnwrapFragmented<T>(this IAsyncEnumerator<Fragment<T>> fragmented) {
|
||||||
|
if (fragmented is null)
|
||||||
|
throw new ArgumentNullException();
|
||||||
|
try {
|
||||||
|
while(await fragmented.MoveNextAsync().ConfigureAwait(false)) {
|
||||||
|
if (fragmented.Current is null)
|
||||||
|
yield break;
|
||||||
|
if (!fragmented.Current.IsComplete)
|
||||||
|
yield break;
|
||||||
|
while (fragmented.Current.TryTake(out var item))
|
||||||
|
if (item is null)
|
||||||
|
yield break;
|
||||||
|
else
|
||||||
|
yield return item;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
await fragmented.DisposeAsync().ConfigureAwait(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
using aeqw89.DataKeys;
|
||||||
|
using System.Diagnostics.CodeAnalysis;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public interface IDocument {
|
||||||
|
/// <summary>
|
||||||
|
/// The file name of the document. Must be valid in both <c>UNIX</c>,
|
||||||
|
/// <c>WINDOWS</c>, <c>APPLE</c>, and <c>ANDROID</c> file systems.
|
||||||
|
/// </summary>
|
||||||
|
string Filename { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Additional descriptive data
|
||||||
|
/// </summary>
|
||||||
|
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> MetaData { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Retrieves the binary representation for the <see cref="IDocument"/>
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>Binary representation of the <see cref="IDocument"/></returns>
|
||||||
|
byte[] ToBytes();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Retrieves the string representation for the <see cref="IDocument"/>
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>String representation of the <see cref="IDocument"/></returns>
|
||||||
|
string ToString();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
using System.Diagnostics.CodeAnalysis;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public static class IDocumentExtensions {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
using System.Text.Json;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public interface IDocumentMetaData {
|
||||||
|
string AsJson(JsonSerializerOptions? options = null);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
namespace Beam {
|
||||||
|
internal interface IDocumentSourceLinkFactory {
|
||||||
|
DocumentSourceLink GetNextLink(DocumentSourceLink current);
|
||||||
|
DocumentSourceLink GetPrecedingLink(DocumentSourceLink current);
|
||||||
|
DocumentSourceLink GetArbitraryLink(DocumentSourceLink current, int offset) => offset switch {
|
||||||
|
0 => current,
|
||||||
|
> 0 => GetArbitraryLink(GetNextLink(current), offset - 1),
|
||||||
|
< 0 => GetArbitraryLink(GetPrecedingLink(current), offset + 1)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
namespace Beam {
|
||||||
|
public interface IUnitDownloader<T> {
|
||||||
|
public int LinksPerDownload { get; }
|
||||||
|
public Task<(bool, T?)> TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress<int>? tryProgress = null);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
namespace Beam {
|
||||||
|
/// <summary>
|
||||||
|
/// Defines how a url template should should be updated, in what order, and by how much
|
||||||
|
/// </summary>
|
||||||
|
public struct IncrementationBehaviour {
|
||||||
|
public Dictionary<int, Incrementor> Map { get; set; }
|
||||||
|
|
||||||
|
public readonly void Apply(object[] objects, int amount) {
|
||||||
|
foreach(var (i, inc) in Map) {
|
||||||
|
if (i < objects.Length)
|
||||||
|
objects[i] = inc(objects[i], amount)?.ToString();
|
||||||
|
else
|
||||||
|
throw new S.MapException(S.M.MapDoesNotMatchArgs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
namespace Beam {
|
||||||
|
public struct PackagedSourceLinkGenerationData {
|
||||||
|
public string Template { get; set; }
|
||||||
|
public int IndexOfChapterIndex { get; set; }
|
||||||
|
|
||||||
|
public readonly DocumentSourceLink GenerateLink(params object[] ps)
|
||||||
|
=> new(string.Format(Template, ps));
|
||||||
|
public IncrementationBehaviour GetBehaviour() {
|
||||||
|
return new IncrementationBehaviour() {
|
||||||
|
Map = new Dictionary<int, Incrementor>() { {
|
||||||
|
IndexOfChapterIndex,
|
||||||
|
(x, i) => int.Parse(x.ToString() ?? throw new ArgumentException()) + i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
using System.Collections;
|
||||||
|
using System.Collections.Concurrent;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public record Ordered<T>(T Data, int Order);
|
||||||
|
[Obsolete("Use chunk downloader instead.")]
|
||||||
|
public class ParallelDownloader<T>(DownloadContext<T> context, int maximumConcurrentDownloads = 4) : IAsyncEnumerator<Ordered<T>> {
|
||||||
|
|
||||||
|
public DownloadContext<T> Context { get; } = context;
|
||||||
|
public int MaximumConcurrentDownloads { get; } = maximumConcurrentDownloads;
|
||||||
|
|
||||||
|
private Task? CacheFiller { get; set; }
|
||||||
|
private int Count = 0;
|
||||||
|
private ConcurrentBag<Ordered<T>> Cache { get; set; } = [];
|
||||||
|
public Ordered<T> Current { get; set; }
|
||||||
|
|
||||||
|
private UnitDownloader<T> GetUnitDownloader()
|
||||||
|
=> new(Context.Web, Context.AsyncTranformer, Context.AsyncFailurePredicates);
|
||||||
|
private ParallelOptions GetOptions()
|
||||||
|
=> new() {
|
||||||
|
CancellationToken = Context.CancellationToken,
|
||||||
|
MaxDegreeOfParallelism = MaximumConcurrentDownloads
|
||||||
|
};
|
||||||
|
|
||||||
|
private async Task FillCache() {
|
||||||
|
List<Ordered<DocumentSourceLink>> chunk = [];
|
||||||
|
int i = 0;
|
||||||
|
foreach (var link in Context.Links.Take(MaximumConcurrentDownloads * 2))
|
||||||
|
chunk.Add(new Ordered<DocumentSourceLink>(link, i++));
|
||||||
|
Console.WriteLine(chunk.Select((x) => $"{x.Order}: {x.Data.Link}").Aggregate((x, y) => $"{x}\n{y}"));
|
||||||
|
var unitDownloader = GetUnitDownloader();
|
||||||
|
int downloadedCount = 0;
|
||||||
|
|
||||||
|
await Parallel.ForEachAsync(chunk, GetOptions(), async (x, ct) => {
|
||||||
|
var (result, doc) = await unitDownloader.TryDownload([new Ordered<string>(x.Data.Link.ToString(), x.Order)], ct, tryProgress: Context.RetryReporter);
|
||||||
|
if (!result || doc is null) {
|
||||||
|
Console.WriteLine($"FAILED to download {x.Data.Link}");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Cache.Add(new(doc, x.Order));
|
||||||
|
Context.DownloadReporter?.Report(doc);
|
||||||
|
Interlocked.Increment(ref downloadedCount);
|
||||||
|
Interlocked.Increment(ref Count);
|
||||||
|
});
|
||||||
|
|
||||||
|
Console.WriteLine("Downloaded Chunk");
|
||||||
|
CacheFiller = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async ValueTask<bool> MoveNextAsync() {
|
||||||
|
TimeSpan waited = TimeSpan.Zero;
|
||||||
|
TimeSpan delta = TimeSpan.FromSeconds(0.01);
|
||||||
|
while(waited < Context.TimeOut) {
|
||||||
|
if (Cache.Count < MaximumConcurrentDownloads && CacheFiller is null) // strange
|
||||||
|
CacheFiller ??= FillCache();
|
||||||
|
|
||||||
|
Cache.TryTake(out var k);
|
||||||
|
if (k is not null) {
|
||||||
|
Current = k;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
waited += delta;
|
||||||
|
await Task.Delay(delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ValueTask DisposeAsync() {
|
||||||
|
GC.SuppressFinalize(this);
|
||||||
|
return ValueTask.CompletedTask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
internal partial class RegexGenerated {
|
||||||
|
[GeneratedRegex("(?<!{){\\d*}(?!})")]
|
||||||
|
public static partial Regex CurlyBracketedParameters();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,39 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
internal class S {
|
||||||
|
|
||||||
|
[Serializable]
|
||||||
|
public class MapException : ArgumentException {
|
||||||
|
public MapException() { }
|
||||||
|
public MapException(string message) : base(message) { }
|
||||||
|
public MapException(string message, Exception inner) : base(message, inner) { }
|
||||||
|
protected MapException(
|
||||||
|
System.Runtime.Serialization.SerializationInfo info,
|
||||||
|
System.Runtime.Serialization.StreamingContext context) : base(info, context) { }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The kind of exception that should never happen
|
||||||
|
/// </summary>
|
||||||
|
[Serializable]
|
||||||
|
public class AssertionException : Exception {
|
||||||
|
public AssertionException() { }
|
||||||
|
public AssertionException(string message) : base(message) { }
|
||||||
|
public AssertionException(string message, Exception inner) : base(message, inner) { }
|
||||||
|
protected AssertionException(
|
||||||
|
System.Runtime.Serialization.SerializationInfo info,
|
||||||
|
System.Runtime.Serialization.StreamingContext context) : base(info, context) { }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class M {
|
||||||
|
public const string MapDoesNotMatchArgs = "Error; Map contains indicies that exceed the argument list passed.";
|
||||||
|
public const string NewFragmentShouldBeFree = "Assertion Error: Could not acquire lock of newly created fragment";
|
||||||
|
public const string LinksCannotBeEmpty = "Cannot construct downloader with empty links collection!";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,100 @@
|
|||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using System.Collections.Concurrent;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public class SequentialFragmentDownloader<T> : SequentialDownloader<T, Fragment<Ordered<T>>> {
|
||||||
|
public SequentialFragmentDownloader(
|
||||||
|
DownloadContext<T> context,
|
||||||
|
Func<DownloadContext<T>, IUnitDownloader<Fragment<Ordered<T>>>> getUnitDownloader,
|
||||||
|
ILogger? logger = null)
|
||||||
|
: base(context, getUnitDownloader, logger) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// public class SequentialChunkDownloader<T> : IAsyncEnumerator<Fragment<Ordered<T>>> {
|
||||||
|
// public Fragment<Ordered<T>> Current { get; protected set; }
|
||||||
|
// public DownloadContext<T> Context { get; }
|
||||||
|
// protected IEnumerator<DocumentSourceLink> LinksEnumerator;
|
||||||
|
// protected ConcurrentQueue<Fragment<Ordered<T>>> DownloadQueue { get; set; } = [];
|
||||||
|
// public int ChunkSize { get; }
|
||||||
|
|
||||||
|
// private ILogger? Logger => Context.DownloadLogger;
|
||||||
|
|
||||||
|
// public UnitDownloader<T> GetUnitDownloader()
|
||||||
|
// => new(Context.Web, Context.AsyncTranformer, Context.AsyncFailurePredicates);
|
||||||
|
|
||||||
|
// public SequentialChunkDownloader(DownloadContext<T> context, int chunkSize) {
|
||||||
|
// Context = context;
|
||||||
|
// LinksEnumerator = Context.Links.GetEnumerator();
|
||||||
|
// Current = new Fragment<Ordered<T>>(0);
|
||||||
|
// ChunkSize = chunkSize;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public ValueTask DisposeAsync() {
|
||||||
|
// GC.SuppressFinalize(this);
|
||||||
|
// return ValueTask.CompletedTask;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// protected Task<bool>? DownloadsTask = null;
|
||||||
|
// protected virtual async Task<bool> ProcessDownloads() {
|
||||||
|
// if (DownloadQueue.IsEmpty)
|
||||||
|
// return true;
|
||||||
|
// if (DownloadsTask is null) {
|
||||||
|
// DownloadsTask = Task.Run(async () => {
|
||||||
|
// if (!DownloadQueue.TryDequeue(out var fragment))
|
||||||
|
// return true; // no fragments left, likely race condition but return true as technically all items have been downloaded
|
||||||
|
// var unit = GetUnitDownloader(); // instantiates unit downloader per request (okay)
|
||||||
|
// if (!Fragment<Ordered<T>>.TryAcquireUpdater(fragment, out var updater)) { // gets the add method for the current fragment
|
||||||
|
// Logger?.LogError("Failed to acquire updater for fragment {{{}}}", fragment.GetHashCode());
|
||||||
|
// return false; // fragment is unsafe to modify
|
||||||
|
// }
|
||||||
|
// try {
|
||||||
|
// var links = Enumerable.Range(0, ChunkSize).Select((x) => {
|
||||||
|
// if (!LinksEnumerator.MoveNext())
|
||||||
|
// return new Ordered<DocumentSourceLink>(DocumentSourceLink.InvalidLink, -1); // stops link collection if end-of-links is reached
|
||||||
|
// return new Ordered<DocumentSourceLink>(LinksEnumerator.Current, x);
|
||||||
|
// }).Where((x) => x.Data != DocumentSourceLink.InvalidLink); // filter invalid links
|
||||||
|
// await Parallel.ForEachAsync(links, async (x, ct) => {
|
||||||
|
// Logger?.LogInformation("Started download for {} order={}", x.Data.Link, x.Order);
|
||||||
|
// var (result, downloadedT) = await unit.TryDownload( // download (parallel) objects
|
||||||
|
// x.Data.Link.ToString(), // use link from links collection (exposed as x)
|
||||||
|
// ct, // use ct provided with method call
|
||||||
|
// tryProgress: Context.RetryReporter);
|
||||||
|
// if (!result) { // download failure (soft because it was detected)
|
||||||
|
// Logger?.LogError("Failed to retrieve {} order={}", x.Data.Link, x.Order);
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
// if (downloadedT is null) { // download failure (hard because it was not detected)
|
||||||
|
// Logger?.LogCritical("Failed to retrieve {} order={}", x.Data.Link, x.Order);
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
// Logger?.LogInformation("Retrieved {} order={} successfully", x.Data.Link, x.Order);
|
||||||
|
// updater(new Ordered<T>(downloadedT, x.Order)); // update the fragment
|
||||||
|
// });
|
||||||
|
// Fragment<Ordered<T>>.SetComplete(fragment, true);
|
||||||
|
// } finally {
|
||||||
|
// Fragment<Ordered<T>>.TryReleaseUpdater(fragment, updater); // returns updater to allow modification
|
||||||
|
// }
|
||||||
|
|
||||||
|
|
||||||
|
// return fragment.Size == fragment.MaxSize;
|
||||||
|
// });
|
||||||
|
// }
|
||||||
|
// if (DownloadsTask.IsCompleted) {
|
||||||
|
// DownloadsTask = null;
|
||||||
|
// return await ProcessDownloads();
|
||||||
|
// }
|
||||||
|
// return true; // if task is still processing return should be neither true or false...
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public async ValueTask<bool> MoveNextAsync() {
|
||||||
|
// if (Current.IsComplete && Current.Size < Current.MaxSize)
|
||||||
|
// return false; // if a fragment is marked complete despite being unsaturated, we've run out links!
|
||||||
|
// if (DownloadQueue.Count == 0) {
|
||||||
|
// Current = new Fragment<Ordered<T>>(ChunkSize);
|
||||||
|
// DownloadQueue.Enqueue(Current);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// return await ProcessDownloads();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
}
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public class SequentialDownloader<TInput, TOutput> : IAsyncEnumerator<TOutput> {
|
||||||
|
public TOutput Current { get; protected set; }
|
||||||
|
public DownloadContext<TInput> Context { get; }
|
||||||
|
public ILogger? Logger { get; set; }
|
||||||
|
public int LastOrder { get; set; } = 0;
|
||||||
|
|
||||||
|
protected IEnumerator<DocumentSourceLink> LinksEnumerator;
|
||||||
|
|
||||||
|
public Func<IUnitDownloader<TOutput>> GetUnitDownloader { get; set; }
|
||||||
|
|
||||||
|
public SequentialDownloader(DownloadContext<TInput> context, Func<DownloadContext<TInput>, IUnitDownloader<TOutput>> getUnitDownloader, ILogger? logger = null) {
|
||||||
|
Context = context;
|
||||||
|
Logger = logger;
|
||||||
|
LinksEnumerator = Context.Links.GetEnumerator();
|
||||||
|
LinksEnumerator.Reset();
|
||||||
|
if (!LinksEnumerator.MoveNext())
|
||||||
|
throw new ArgumentOutOfRangeException(S.M.LinksCannotBeEmpty);
|
||||||
|
Current = default(TOutput);
|
||||||
|
GetUnitDownloader = () => getUnitDownloader(Context);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ValueTask DisposeAsync() {
|
||||||
|
GC.SuppressFinalize(this);
|
||||||
|
return ValueTask.CompletedTask;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async ValueTask<bool> MoveNextAsync() {
|
||||||
|
var unit = GetUnitDownloader(); // safe to instantiate per request.
|
||||||
|
var idealLinkCount = unit.LinksPerDownload;
|
||||||
|
List<Ordered<string>> links = [];
|
||||||
|
|
||||||
|
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
|
||||||
|
|
||||||
|
while (LinksEnumerator.MoveNext() && links.Count < idealLinkCount)
|
||||||
|
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
|
||||||
|
if (links.Count == 0) {
|
||||||
|
Logger?.LogInformation("Out of links!");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
var (result, downloadedT) = await unit.TryDownload(
|
||||||
|
links.ToArray(),
|
||||||
|
Context.CancellationToken,
|
||||||
|
tryProgress: Context.RetryReporter);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name);
|
||||||
|
return false; // unit download failed
|
||||||
|
}
|
||||||
|
if (downloadedT is null) {
|
||||||
|
Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name);
|
||||||
|
return false; // unit download failed
|
||||||
|
}
|
||||||
|
|
||||||
|
Current = downloadedT;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public class SourceLinkEnumerable : IEnumerable<DocumentSourceLink> {
|
||||||
|
private SourceLinkEnumerable(IEnumerator<DocumentSourceLink> enumerator) {
|
||||||
|
Enumerator = enumerator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IEnumerator<DocumentSourceLink> Enumerator { get; }
|
||||||
|
|
||||||
|
public static SourceLinkEnumerable FromGenerator(IEnumerator<DocumentSourceLink> generator)
|
||||||
|
=> new SourceLinkEnumerable(generator);
|
||||||
|
|
||||||
|
public IEnumerator<DocumentSourceLink> GetEnumerator() {
|
||||||
|
return Enumerator;
|
||||||
|
}
|
||||||
|
|
||||||
|
IEnumerator IEnumerable.GetEnumerator() {
|
||||||
|
return Enumerator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
internal class StreamDocument(string filename, Stream content, Encoding? encoding = null) : Document(filename) {
|
||||||
|
public Stream Content { get; set; } = content;
|
||||||
|
public Encoding Encoding { get; set; } = encoding ?? Encoding.UTF8;
|
||||||
|
|
||||||
|
byte[] Content_ { get; set; } = [];
|
||||||
|
|
||||||
|
public override byte[] ToBytes() {
|
||||||
|
return Content_;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override string ToString() {
|
||||||
|
return Encoding.GetString(Content_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public class StringDocument(string filename, string content, Encoding? encoding = null) : Document(filename, encoding) {
|
||||||
|
public string Content { get; set; } = content;
|
||||||
|
|
||||||
|
public override byte[] ToBytes() {
|
||||||
|
return Encoding.GetBytes(Content);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override string ToString() {
|
||||||
|
return Content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,66 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public delegate Task<bool> AsyncDownloadFailurePredicate<in T>(T download);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// A download managing class that manages a singular download with failure-detection and exponential-backoff retries. This class is safe to instantiate per request.
|
||||||
|
/// </summary>
|
||||||
|
/// <typeparam name="T"></typeparam>
|
||||||
|
/// <param name="web"></param>
|
||||||
|
/// <param name="transformer"></param>
|
||||||
|
/// <param name="failurePredicate"></param>
|
||||||
|
public class UnitDownloader<T>(HtmlWeb web, AsyncHtmlTransformer<T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null) : IUnitDownloader<T> {
|
||||||
|
public HtmlWeb Web { get; } = web;
|
||||||
|
public virtual AsyncHtmlTransformer<T> Transformer { get; } = transformer;
|
||||||
|
public virtual AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicates { get; } = failurePredicate;
|
||||||
|
|
||||||
|
public int LinksPerDownload { get; } = 1;
|
||||||
|
|
||||||
|
protected virtual async Task<bool> IsFailure(HtmlDocument doc) {
|
||||||
|
if (FailurePredicates is null)
|
||||||
|
return false;
|
||||||
|
var failed = false;
|
||||||
|
await Parallel.ForEachAsync(FailurePredicates, async (x, ct) => {
|
||||||
|
if (failed == true)
|
||||||
|
return;
|
||||||
|
if (x is null)
|
||||||
|
return;
|
||||||
|
if (await x(doc))
|
||||||
|
failed = true;
|
||||||
|
});
|
||||||
|
|
||||||
|
return failed;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected virtual async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
||||||
|
try {
|
||||||
|
var html = await Web.LoadFromWebAsync(link, ct);
|
||||||
|
if (FailurePredicates is null || !(await IsFailure(html)))
|
||||||
|
return (true, await Transformer(html));
|
||||||
|
else
|
||||||
|
return (false, default);
|
||||||
|
} catch(Exception) {
|
||||||
|
return (false, default);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<(bool, T?)> TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress<int>? tryProgress = null) {
|
||||||
|
if (link.Length == 0)
|
||||||
|
return (false, default);
|
||||||
|
|
||||||
|
T? doc = default;
|
||||||
|
int tryCount = 0;
|
||||||
|
while (tryCount < maximumRetryCount) {
|
||||||
|
ct.ThrowIfCancellationRequested();
|
||||||
|
(var success, doc) = await TryDownloadWithNoRetries(link[0].Data, ct);
|
||||||
|
if (success && doc != null)
|
||||||
|
return (true, doc);
|
||||||
|
tryProgress?.Report(++tryCount);
|
||||||
|
await Task.Delay((int)Math.Pow(2, tryCount) * 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (false, doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Concurrent;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public class UnitFragmentDownloader<T> : IUnitDownloader<Fragment<Ordered<T>>> {
|
||||||
|
public UnitFragmentDownloader(HtmlWeb web,
|
||||||
|
AsyncHtmlTransformer<T> transformer,
|
||||||
|
AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null,
|
||||||
|
int fragmentSize = 4,
|
||||||
|
ILogger? logger = null) {
|
||||||
|
Web = web;
|
||||||
|
Transformer = transformer;
|
||||||
|
FailurePredicate = failurePredicate;
|
||||||
|
UnitDownloader = new UnitDownloader<T>(Web, Transformer, FailurePredicate);
|
||||||
|
LinksPerDownload = fragmentSize;
|
||||||
|
Logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HtmlWeb Web { get; }
|
||||||
|
public AsyncHtmlTransformer<T> Transformer { get; }
|
||||||
|
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicate { get; }
|
||||||
|
public int LinksPerDownload { get; set; }
|
||||||
|
public ILogger? Logger { get; set; }
|
||||||
|
|
||||||
|
private readonly UnitDownloader<T> UnitDownloader;
|
||||||
|
|
||||||
|
async Task<(bool, Fragment<Ordered<T>>?)> IUnitDownloader<Fragment<Ordered<T>>>.TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<int>? tryProgress) {
|
||||||
|
Fragment<Ordered<T>> fragment = new Fragment<Ordered<T>>(link.Length);
|
||||||
|
if (!Fragment<Ordered<T>>.TryAcquireUpdater(fragment, out var updater))
|
||||||
|
throw new S.AssertionException(S.M.NewFragmentShouldBeFree);
|
||||||
|
bool isFailure = false;
|
||||||
|
await Parallel.ForEachAsync(link, async (x, pct) => {
|
||||||
|
pct.ThrowIfCancellationRequested();
|
||||||
|
ct.ThrowIfCancellationRequested();
|
||||||
|
var (result, downloadedT) = await UnitDownloader.TryDownload([x], ct, maximumRetryCount, tryProgress);
|
||||||
|
if (!result) {
|
||||||
|
Interlocked.Exchange(ref isFailure, true);
|
||||||
|
Logger?.LogError("Failed to retrieve {} order={}", x.Data, x.Order);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (downloadedT == null) {
|
||||||
|
Interlocked.Exchange(ref isFailure, true);
|
||||||
|
Logger?.LogCritical("Failed to retrieve {} order={}", x.Data, x.Order);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
updater(new Ordered<T>(downloadedT, x.Order));
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!isFailure)
|
||||||
|
Fragment<Ordered<T>>.SetComplete(fragment, true);
|
||||||
|
|
||||||
|
Fragment<Ordered<T>>.TryReleaseUpdater(fragment, updater);
|
||||||
|
|
||||||
|
return (!isFailure, fragment);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user