Add project files.

This commit is contained in:
2025-04-19 20:47:58 +03:00
parent 9e14d137ae
commit bfdcdb1f3b
66 changed files with 2394 additions and 0 deletions
+22
View File
@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Beam\Beam.csproj" />
</ItemGroup>
<ItemGroup>
<Reference Include="aeqw89.DataKeys">
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
</Reference>
<Reference Include="aeqw89.PersistentData">
<HintPath>..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll</HintPath>
</Reference>
</ItemGroup>
</Project>
+68
View File
@@ -0,0 +1,68 @@
using aeqw89.DataKeys;
using HtmlAgilityPack;
using System.Text.Json.Serialization;
namespace Beam.Dynamic {
public class Binding(DataKey<Binding> key) : IKeyed<Binding> {
public Binding(string key) : this(new DataKey<Binding>(key)) { }
public Binding() : this("") { }
[JsonRequired]
public DataKey<Binding> Key { get; set; } = key;
[JsonRequired]
public BindingType Type { get; set; }
public string? ArrayDelimiters { get; set; }
public string? XPath { get; set; }
public string? CssPath { get; set; }
private IDataProvider? Provider_;
public IDataProvider? Provider {
get => Provider_;
set {
if (value is null)
return;
if (value is not IDataProvider)
throw new InvalidOperationException();
var constructor = value.GetType().GetConstructor([]);
if (!constructor?.IsPublic ?? true)
throw new InvalidOperationException();
Provider_ = value;
}
}
public HtmlNode? ResolveNode(HtmlDocument doc) {
if (XPath is not null)
return doc.DocumentNode.SelectSingleNode(XPath);
if (CssPath is not null)
return doc.DocumentNode.ThenByClasses(CssPath.Split('/'));
if (Provider is not null)
return Provider.GetNode(doc);
return null;
}
public string ResolveString(HtmlDocument doc) {
if (XPath is not null)
return doc.DocumentNode.SelectSingleNode(XPath)?.InnerText ?? "";
if (CssPath is not null)
return doc.DocumentNode.ThenByClasses(CssPath.Split('/'))?.InnerText ?? "";
if (Provider is not null)
return Provider.Get(doc);
return "";
}
public string[] ResolveArray(HtmlDocument doc) {
if (Type is not BindingType.Array)
return [];
var str = ResolveString(doc);
return str.Split(ArrayDelimiters);
}
public dynamic? Resolve(HtmlDocument doc) => Type switch {
BindingType.Single => ResolveString(doc),
BindingType.Array => ResolveArray(doc),
BindingType.UseProvider => Provider?.Get(doc),
_ => null
};
}
}
+7
View File
@@ -0,0 +1,7 @@
namespace Beam.Dynamic {
public enum BindingType {
Single,
Array,
UseProvider
}
}
+32
View File
@@ -0,0 +1,32 @@
using HtmlAgilityPack;
namespace Beam.Dynamic {
public class DataBindings {
public Binding? Title { get; set; }
public Binding? Authors { get; set; }
public Binding? Description { get; set; }
public Binding? Content { get; set; }
public Binding? Language { get; set; }
public Binding? Tags { get; set; }
public ResolvedBindings Resolve(HtmlDocument doc) {
return new ResolvedBindings() {
Title = Title?.Resolve(doc),
Authors = Authors?.Resolve(doc) ?? Array.Empty<string>(),
Language = Language?.Resolve(doc) ?? Array.Empty<string>(),
Content = Content?.Resolve(doc),
Description = Description?.Resolve(doc),
Tags = Tags?.Resolve(doc) ?? Array.Empty<string>()
};
}
}
public class ResolvedBindings {
public string? Title { get; set; }
public string[]? Authors { get; set; }
public string? Description { get; set; }
public string? Content { get; set; }
public string[]? Language { get; set; }
public string[]? Tags { get; set; }
}
}
+70
View File
@@ -0,0 +1,70 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace Beam.Dynamic {
public static partial class HtmlNodeExtensions {
public static HtmlNode? ThenByClasses(this HtmlNode node, params string[] classes) {
return node.DescendCollectionTree(ThenByClass, classes);
}
public static HtmlNode? ThenByClass(this HtmlNode node, string @class, int count = 1) {
return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.HasClass(@class)), count);
}
public static HtmlNode? ThenByName(this HtmlNode node, string name, int count = 1) {
return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.Name == name), count);
}
public static HtmlNode? ThenByNames(this HtmlNode node, params string[] name) {
return node.DescendCollectionTree(ThenByName, name);
}
public static HtmlNode? DescendCollectionTree<T>(this HtmlNode node, Func<HtmlNode, T, int, HtmlNode?> func, params T[] values) {
return node.DescendCollectionTree((a, b) => func(a, b, 1), values);
}
public static HtmlNode? DescendCollectionTree<T>(this HtmlNode node, Func<HtmlNode, T, HtmlNode?> func, params T[] values) {
HtmlNode? result = node;
foreach (var value in values) {
if (result is null)
return result;
result = func(result, value);
}
return result;
}
public static HtmlNode? ThenByFunc(this HtmlNode node, Func<HtmlNode, HtmlNode?> func, int count = 1) {
var ret = func(node);
if (count <= 1)
return ret;
return ret?.ThenByFunc(func, count - 1);
}
public static string[]? SplitInnerText(this HtmlNode? node, string separators)
=> node?.InnerText.Split(separators, StringSplitOptions.RemoveEmptyEntries);
public static bool TextNodesOnly(HtmlNode n) => TextNodesOnlyRegex().Match(n.Name).Success;
public static string InnerLineSeparatedText(this HtmlNode? node, Func<HtmlNode, bool>? filter = null) {
if (node?.ChildNodes is null || node?.ChildNodes.Count == 0)
return "";
return node?.ChildNodes
.Where(filter ?? ((x) => true))
.DefaultIfEmpty()
.Select((x) => x?.InnerText)
.Where((x) => !string.IsNullOrWhiteSpace(x))
.DefaultIfEmpty()
.Aggregate((x, y) => $"{x}\n{y}")
?? "";
}
[GeneratedRegex("p|h\\d")]
private static partial Regex TextNodesOnlyRegex();
}
}
+10
View File
@@ -0,0 +1,10 @@
using HtmlAgilityPack;
namespace Beam.Dynamic {
[System.Text.Json.Serialization.JsonDerivedType(typeof(ParagraphedContentDataProvider), 20)]
[System.Text.Json.Serialization.JsonDerivedType(typeof(ListContentDataProvider), 21)]
public interface IDataProvider {
public string Get(HtmlDocument document);
public HtmlNode? GetNode(HtmlDocument document);
}
}
+31
View File
@@ -0,0 +1,31 @@
using HtmlAgilityPack;
using System.Text;
namespace Beam.Dynamic {
public class ListContentDataProvider : IDataProvider {
public Binding? Content { get; set; }
public string Get(HtmlDocument document) {
if (Content is null)
return "";
var node = Content.ResolveNode(document);
if (node is null)
return "";
StringBuilder content = new();
foreach(var childNode in node.ChildNodes.SkipLast(1)) {
if (childNode.Name != "li")
continue;
content.Append(childNode.InnerText.Trim() + ";");
}
content.Append(node.ChildNodes.Last().InnerText.Trim());
return content.ToString();
}
public HtmlNode? GetNode(HtmlDocument document) {
return Content?.ResolveNode(document);
}
}
}
+36
View File
@@ -0,0 +1,36 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Web;
namespace Beam.Dynamic {
public static partial class OnlineCleaner {
[GeneratedRegex("&#x?[\\d\\w]{1,4};")]
public static partial Regex MochaBlendUnicodeEscapeSequence();
private static string UnicodeEscapeSequences(string text) {
return MochaBlendUnicodeEscapeSequence().Replace(text, (x) => {
int numOfDigits = x.Value.Length - 3;
int sequence = 0;
if (x.Value[2] == 'x')
sequence = int.Parse(x.Value[3..(3 + (numOfDigits - 1))], System.Globalization.NumberStyles.HexNumber);
else
sequence = int.Parse(x.Value[2..(2 + numOfDigits)]);
var uni = Encoding.Unicode.GetString(BitConverter.GetBytes(sequence));
return uni.Length == 1 ? uni : uni[0].ToString();
});
}
public static string Clean(string? onlineText) {
if (string.IsNullOrWhiteSpace(onlineText))
return "";
var decoded = HttpUtility.HtmlDecode(onlineText);
var escaped = UnicodeEscapeSequences(onlineText);
return escaped;
}
}
}
@@ -0,0 +1,35 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Dynamic {
public class ParagraphedContentDataProvider : IDataProvider {
public Binding? Content { get; set; }
public string Get(HtmlDocument document) {
if (Content is null)
return "";
var node = Content.ResolveNode(document);
if (node is null)
return "";
StringBuilder content = new();
foreach(var childNode in node.ChildNodes) {
if (childNode.Name != "p")
continue;
content.AppendLine(childNode.InnerText);
}
return content.ToString();
}
public HtmlNode? GetNode(HtmlDocument document) {
return Content?.ResolveNode(document);
}
}
}
+13
View File
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Beam\Beam.csproj" />
</ItemGroup>
</Project>
+37
View File
@@ -0,0 +1,37 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Exports {
public class PlainTextExporter : IExporter, IAsyncExporter {
public PlainTextExporter(IDocument document) {
Document = document;
}
public IDocument Document { get; }
protected virtual string Convert() {
return Document.ToString();
}
protected virtual Task<string> ConvertAsync() {
return Task.FromResult(Document.ToString());
}
public virtual void Write(string path) {
var text = Convert();
if (!Directory.Exists(Path.GetDirectoryName(path)))
throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path));
File.WriteAllText(path, text, Encoding.Unicode);
}
public virtual async Task WriteAsync(string path) {
var text = await ConvertAsync();
if (!Directory.Exists(path))
throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path));
await File.WriteAllTextAsync(path, text);
}
}
}
+42
View File
@@ -0,0 +1,42 @@
using System.Text;
namespace Beam.Exports {
public class HtmlExporter : PlainTextExporter {
public HtmlExporter(IDocument document,
ArticleData? meta = null,
Dictionary<string, string>? linkButtons = null,
string? eofHtml = null) : base(document) {
Meta = meta;
LinkButtons = linkButtons;
EofHtml = eofHtml;
}
public ArticleData? Meta { get; }
public Dictionary<string, string>? LinkButtons { get; }
public string? EofHtml { get; }
protected override string Convert() {
var text = Document.ToString();
// Convert newlines to <p></p> tags
text = "<p>" + text.Replace("\n", "</p><p>") + "</p>";
if (Meta is null)
return text;
text = $"<h1>{Meta.Name}</h1>" + text;
if (LinkButtons is null || LinkButtons.Count == 0)
return text;
StringBuilder buttons = new();
foreach(var (btnText, btnLink) in LinkButtons.Select((x) => (x.Key, x.Value))) {
buttons.AppendLine($"<a href=\"{btnLink}\">{btnText}</a>");
}
var buttonsDiv = $"<div class=\"controls\">{buttons}</div>";
text = buttonsDiv + text + buttonsDiv;
text += EofHtml ?? "";
text = "<!DOCTYPE html>\n<html>" + text + "</html>";
return text;
}
protected override Task<string> ConvertAsync() {
return Task.FromResult(Convert());
}
}
}
+10
View File
@@ -0,0 +1,10 @@
namespace Beam.Exports {
public interface IAsyncExporter : IExporter {
/// <summary>
/// Asynchronously writes the object to the desired path, creating it if it does not exist.
/// </summary>
/// <param name="path">The path of the exported object</param>
/// <returns></returns>
public Task WriteAsync(string path);
}
}
+19
View File
@@ -0,0 +1,19 @@
namespace Beam.Exports {
public interface IExporter {
/// <summary>
/// Synchronously writes the object to the desired path, creating it if it does not exist.
/// </summary>
/// <param name="path">The path of the exported object</param>
public void Write(string path);
protected void EnsurePathExists(string path) {
if (File.Exists(path)) {
File.Delete(path);
return;
}
else if (!Directory.Exists(path))
Directory.CreateDirectory(path);
}
}
}
+11
View File
@@ -0,0 +1,11 @@
namespace Beam.Exports {
public interface IStreamExporter : IAsyncExporter {
/// <summary>
/// Asynchronously writes the object to the desired path in many parts, returning the path
/// of each written file as a stream
/// </summary>
/// <param name="path">The path of the exported object</param>
/// <returns>The async enumerator of each written file</returns>
public IAsyncEnumerator<string> WriteAsyncStream(string path);
}
}
+13
View File
@@ -0,0 +1,13 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Exports {
internal static class S {
internal static class M {
internal const string FileDirectoryDoesNotExist = "Part of the path supplied does not exist.";
}
}
}
+52
View File
@@ -0,0 +1,52 @@
using aeqw89.DataKeys;
using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
/// <summary>
/// <para>
/// A collection of specific useful methods and constants that facilitate the use of the application; allows other parts of the application to depend on architecture-specific arbitrary choices without compromising the Single-Responsibility principle or increasing redundant code.
/// </para>
/// </summary>
partial interface IArchitecture {
/// <summary>
/// Gets the metadata associated with a <see cref="TextResource"/>
/// </summary>
/// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
/// <param name="pieceKey">The key of the <see cref="TextResource"/> stored in the <paramref name="sdd"/></param>
/// <param name="sdd">The <see cref="SharedDataDictionary"/> to be used to retrieve information</param>
/// <param name="logger">Optional logger for logging debug information</param>
/// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, SharedDataDictionary sdd, ILogger? logger = null);
/// <summary>
/// Gets the <see cref="DownloadContext{T}"/> of the text record associated with <see cref="TextResource"/>
/// </summary>
/// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
/// <param name="pieceKey">The key of the <see cref="TextResource"/> stored in the <paramref name="sdd"/></param>
/// <param name="sdd">The <see cref="SharedDataDictionary"/> to be used to retrieve information</param>
/// <param name="metadata">Optional book metadata to include with the final text record</param>
/// <param name="logger">Optional logger for logging debug information</param>
/// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> pieceKey, SharedDataDictionary sdd, IDocumentMetaData? metadata = null, ILogger? logger = null);
/// <summary>
/// The <see cref="DataKey{IDocumentMetaData}"/> to use when looking for the chapter metadata
/// </summary>
public DataKey<IDocumentMetaData> ChapterKey { get; set; }
/// <summary>
/// The <see cref="DataKey{IDocumentMetaData}"/> to use when looking for the book metadata
/// </summary>
public DataKey<IDocumentMetaData> BookKey { get; set; }
/// <summary>
/// The default architecture
/// </summary>
public static IArchitecture Default => new MainArchitecture();
}
}
@@ -0,0 +1,33 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1" />
<PackageReference Include="Spectre.Console" Version="0.49.2-preview.0.70" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj" />
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj" />
<ProjectReference Include="..\Beam\Beam.csproj" />
</ItemGroup>
<ItemGroup>
<Reference Include="aeqw89.DataKeys">
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
</Reference>
<Reference Include="aeqw89.PersistentData">
<HintPath>..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll</HintPath>
</Reference>
</ItemGroup>
</Project>
+30
View File
@@ -0,0 +1,30 @@
namespace Beam.Temporary.Cli {
public class CssData {
// Primary background color (e.g., for the body)
public string PrimaryColor { get; set; } = "#f5f5f5";
// Secondary color (e.g., for header background)
public string SecondaryColor { get; set; } = "#e0e0e0";
// Tertiary color (e.g., for content sections)
public string TertiaryColor { get; set; } = "#ffffff";
// Button background color
public string ButtonColor { get; set; } = "#007bff";
// Foreground text color
public string ForegroundColor { get; set; } = "#333333";
// Font family for main content
public string ContentFont { get; set; } = "Arial, sans-serif";
// Font size for main content
public string ContentFontSize { get; set; } = "16px";
// Font family for titles
public string TitleFont { get; set; } = "Georgia, serif";
// Font size for titles
public string TitleFontSize { get; set; } = "32px";
}
}
+34
View File
@@ -0,0 +1,34 @@
using aeqw89.DataKeys;
namespace Beam.Temporary.Cli {
internal static class DataKeyExtensions {
public static DataKey WithNamespace(this DataKey dk, string @namespace) {
string[] names = @namespace.Split(':');
var agg = (string x, string y) => $"{x}:{y}";
for (int i = 0; i < names.Length; i++) {
string test = names.SkipLast(i).Aggregate(agg);
if (dk.Identifier.StartsWith(test)) {
return new DataKey(dk.Identifier.Replace(test, @namespace));
}
}
return new DataKey(@namespace + ":" + dk.Identifier);
}
public static DataKey<T> WithNamespace<T>(this DataKey<T> dk, string @namespace) {
return ((DataKey)dk).WithNamespace(@namespace).As<T>();
}
public static DataKey<T> WithSuffix<T>(this DataKey<T> dk, string suffix) {
return new DataKey<T>(dk.Identifier + suffix);
}
public static DataKey ToAggregator(this DataKey dk)
=> dk.WithNamespace("aeqw89:document:aggregators");
public static DataKey ToAuxiliary(this DataKey dk)
=> dk.WithNamespace("aeqw89:document:auxillaries");
public static DataKey<T> As<T>(this DataKey dk) => new DataKey<T>(dk.Identifier);
}
}
+6
View File
@@ -0,0 +1,6 @@
namespace Beam.Temporary.Cli {
internal class File(string path, params string[] tags) {
public string Path { get; set; } = path;
public string[] Tags { get; set; } = tags;
}
}
+132
View File
@@ -0,0 +1,132 @@
//using aeqw89.DataKeys;
//using System;
//using System.Collections.Generic;
//using System.Linq;
//using System.Text;
//using System.Threading.Tasks;
//namespace Beam.Temporary.Cli {
// internal class HtmlBook : Document {
// public class Keys {
// public static DataKey<File> ContentPage => new DataKey<File>("content_page");
// public static DataKey<File> NoContentPage => new DataKey<File>("no_content_page");
// public static DataKey<File> TitlePage => new DataKey<File>("title_page");
// public static DataKey<File> StylesPage => new DataKey<File>("styles_page");
// }
// public List<Tracked<IDocument>> Documents { get; set; }
// public IReadOnlyList<string> Pages => _Pages;
// private List<string> _Pages { get; set; } = [];
// private const string EMTPY_PAGE = "EMPTY";
// public CssData CssData { get; }
// public ArticleData BookData { get; set; }
// public HtmlBookTemplates Templates { get; set; }
// public HtmlBook(string bookname, CssData cssData, ArticleData bookData, HtmlBookTemplates templates, List<IDocument>? documents = null, Encoding? encoding = null)
// : base(bookname, encoding) {
// Documents = [];
// CssData = cssData;
// BookData = bookData;
// Templates = templates;
// if (documents is not null)
// Documents = documents.Select((x) => new Tracked<IDocument>(x)).ToList();
// }
// public void Update(bool ignoreDirty = false) {
// if (!Directory.Exists(Filename))
// Directory.CreateDirectory(Filename);
// //System.IO.File.WriteAllLines(Path.Combine(Filename, "styles.css"), Format())
// List<string> newpages = [];
// if (Pages.Count < Documents.Count)
// _Pages.AddRange(Enumerable.Repeat(EMTPY_PAGE, Documents.Count - Pages.Count));
// foreach (var (doc, page) in Documents.Zip(Pages)) {
// if (!doc.IsDirty)
// newpages.Add(page);
// else if (doc.TrackedObject.MetaData.Count == 0)
// newpages.Add(PlainPage(doc.TrackedObject));
// else if (doc.TrackedObject.MetaData.TryGetValue(Program.Architecture.ChapterKey, out var meta) && meta is ArticleData articleData)
// newpages.Add(ArticlePage(doc.TrackedObject, articleData));
// else {
// Console.WriteLine("Unhandlable Metadata detected!");
// newpages.Add(PlainPage(doc.TrackedObject));
// }
// System.IO.File.WriteAllText(Path.Combine(Filename, Path.GetRandomFileName() + ".html"), newpages[^1]);
// doc.IsDirty = false;
// }
// _Pages = newpages;
// }
// public void UpdateCss() {
// }
// public void UpateTitle() {
// }
// private string Format(string template, Dictionary<string, string> table) {
// ArgumentNullException.ThrowIfNull(template);
// ArgumentNullException.ThrowIfNull(table);
// foreach (var kvp in table) {
// template = template.Replace(kvp.Key, kvp.Value);
// }
// return template;
// }
// private Dictionary<string, string> GetDocumentTable(IDocument doc, bool keepPlaceholders = false) {
// var table = new Dictionary<string, string>() {
// { "{" + nameof(doc.Filename) + "}", doc.Filename },
// { "{Content}", doc.ToString() }
// };
// return SolvePlaceholders(table, keepPlaceholders);
// }
// private Dictionary<string, string> GetArticleDataTable(IDocument doc, ArticleData ad, bool keepPlaceholders = false) {
// var table = new Dictionary<string, string>() {
// { "{" + nameof(ad.Language) + "}", ad.Language ?? "" },
// { "{" + nameof(ad.Authors) + "}", ad.Authors.Aggregate("; ")},
// { "{" + nameof(ad.Categories) + "}", ad.Categories.Aggregate("; ") },
// { "{" + nameof(ad.Version) + "}", ad.Version ?? "" },
// { "{" + nameof(ad.Description) + "}", ad.Description ?? "" },
// { "{" + nameof(ad.Name) + "}", ad.Name ?? "" },
// { "{" + nameof(doc.Filename) + "}", doc.Filename },
// { "{Content}", doc.ToString() }
// };
// return SolvePlaceholders(table, keepPlaceholders);
// }
// private Dictionary<string, string> SolvePlaceholders(Dictionary<string, string> table, bool keepPlaceholders) {
// if (keepPlaceholders)
// return table.Select(
// (x) => new KeyValuePair<string, string>(x.Key, x.Value == "" ? $"{x.Key}" : x.Value))
// .ToDictionary();
// return table;
// }
// private string PlainPage(IDocument doc, bool keepPlaceholders = false) {
// return Format(Templates.ContentPageTemplate, GetDocumentTable(doc, keepPlaceholders));
// }
// private string ArticlePage(IDocument doc, ArticleData data, bool keepPlaceholders = false) {
// return Format(Templates.ContentPageTemplate, GetArticleDataTable(doc, data, keepPlaceholders));
// }
// public override byte[] ToBytes() {
// throw new NotImplementedException();
// }
// public override string ToString() {
// throw new NotImplementedException();
// }
// }
//}
+8
View File
@@ -0,0 +1,8 @@
namespace Beam.Temporary.Cli {
internal struct HtmlBookTemplates {
public string TitlePageTemplate { get; set; }
public string ContentPageTemplate { get; set; }
public string CssTemplate { get; set; }
public string NoContentTemplate { get; set; }
}
}
+79
View File
@@ -0,0 +1,79 @@
using aeqw89.DataKeys;
using Beam.Dynamic;
using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
namespace Beam.Temporary.Cli {
partial interface IArchitecture {
private class MainArchitecture : IArchitecture {
public MainArchitecture() { }
public DataKey<IDocumentMetaData> ChapterKey { get; set; } = new("ma:chapter");
public DataKey<IDocumentMetaData> BookKey { get; set; } = new("ma:book");
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, SharedDataDictionary sdd, ILogger? logger = null) {
var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
// null checks
if (auxiliary is null) // aux is required to get metadata
return null;
if (piece?.Resource?.MetaTemplateInitialData is null) // sanity check to avoid null warnings
return null;
// gets the link for the novel's metadata using the auxillary data retrieved from the sdd
var link = sdd.Templates[auxiliary.Resource.Key].GenerateLink(piece?.Resource?.MetaTemplateInitialData!);
var binding = auxiliary.Bindings;
return new DownloadContext<IDocumentMetaData>(web, [link], downloadLogger: logger, transformer: (x) => {
return new ArticleData() {
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
};
});
}
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> resKey, SharedDataDictionary sdd, IDocumentMetaData? metaData = null, ILogger? logger = null) {
var res = sdd.Novels[resKey].ToRecord(sdd); // retrieves the novel data from the sdd
var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd
if (aggregator is null) // ensure aggergator data was retrieved successfully
return null;
if (res is null) // ensure novel data was retrieved successfully
return null;
var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator
// creates a generative enumerable of type link from 'template'
var sle = SourceLinkEnumerable.FromGenerator(new DataBackedSourceLinkGenerator(
template, res.Resource.TemplateInitialData));
return new DownloadContext<IDocument>(web, sle,
transformer: (x) => {
var resolved = aggregator.Bindings.Resolve(x);
var articleData = new ArticleData() {
Name = OnlineCleaner.Clean(resolved.Title),
};
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
meta.Add(ChapterKey, articleData);
if (metaData is not null)
meta.Add(BookKey, metaData);
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved.Content)) {
MetaData = meta
};
},
retryReporter: new Progress<int>((x) => Console.WriteLine($"Retrying download ({x})")),
downloadReporter: new Progress<IDocument>((x) => Console.WriteLine($"Downloaded ({x.Filename})")),
asyncFailurePredicates: [
(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
],
timeOut: TimeSpan.FromSeconds(15),
downloadLogger: logger
);
}
}
}
}
+144
View File
@@ -0,0 +1,144 @@
using aeqw89.DataKeys;
using Beam.Dynamic;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
internal static class NovelStatics {
public static void Define_LightNovelWorld_Novel_TheLegendaryMechanic(SharedDataDictionary sdd) {
var lnwAggregator = new DataKey<WebResource>("aeqw89:document:aggregators:light_novel_world");
var lnwAuxiliary = new DataKey<WebResource>("aeqw89:document:auxillaries:light_novel_world");
var novel = new TextResource() {
Key = new DataKey<TextResource>("novels:the_legendary_mechanic"),
AssociatedSource = lnwAggregator,
AssociatedMetaSource = lnwAuxiliary,
TemplateInitialData = ["the-legendary-mechanic-245", "1"],
MetaTemplateInitialData = ["the-legendary-mechanic"]
};
sdd.Novels.TryAdd(novel.Key, novel);
sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
}
public static void Define_LightNovelWorl_Novel_IAloneLevelUp(SharedDataDictionary sdd) {
var lnwAggregator = new DataKey("light_novel_world").ToAggregator().As<WebResource>();
var lnwAuxiliary = new DataKey("light_novel_world").ToAuxiliary().As<WebResource>();
var novel = new TextResource() {
Key = new DataKey<TextResource>("novels:i_alone_level_up"),
AssociatedSource = lnwAggregator,
AssociatedMetaSource = lnwAuxiliary,
TemplateInitialData = ["i-alone-level-up-236", "1"],
MetaTemplateInitialData = ["i-alone-level-up-solo-leveling-05122225"]
};
sdd.Novels.TryAdd(novel.Key, novel);
sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
}
public static void Define_NovelFull(SharedDataDictionary sdd) {
var docNamespace = "aeqw89:document";
var nfAgg = new DataKey<WebResource>("aggregators:novel_full").WithNamespace(docNamespace);
var nfAux = new DataKey<WebResource>("auxillaries:novel_full").WithNamespace(docNamespace);
var nfBindings = new DataKey<DataBindings>("aeqw89:bindings:light_novel_world");
var aggregator = new WebResource(nfAgg) {
Name = "Novel Full",
Description = "A novel aggregator site",
Domain = "https://novelfull.net",
Bindings = nfBindings
};
var auxiliary = new WebResource(nfAux) {
Name = "Novel Full",
Description = "A novel aggregator site",
Domain = "https://novelfull.net",
Bindings = nfBindings.WithSuffix("_aux")
};
sdd.Templates.TryAdd(nfAgg, new() {
Template = ""
});
}
public static void Define_LightNovelWorld(SharedDataDictionary sdd) {
var lnwAggregator = new DataKey<WebResource>("aeqw89:document:aggregators:light_novel_world");
var lnwAuxiliary = new DataKey<WebResource>("aeqw89:document:auxillaries:light_novel_world");
const string lnwBindingsA = "aeqw89:bindings:light_novel_world";
var aggregator = new WebResource(lnwAggregator) {
Name = "Light Novel World",
Description = "A novel aggregator site maintained by NetherClaw",
Domain = "https://www.lightnovelworld.co",
Bindings = new DataKey<DataBindings>(lnwBindingsA)
};
const string lnwBindingsB = "aeqw89:bindings:light_novel_world_aux";
var auxiliary = new WebResource(lnwAuxiliary) {
Name = "Light Novel World",
Description = "A novel aggregator site maintained by NetherClaw",
Domain = "https://www.lightnovelworld.co",
Bindings = new DataKey<DataBindings>(lnwBindingsB)
};
sdd.Templates.TryAdd(lnwAuxiliary, new() {
Template = "https://www.lightnovelworld.co/novel/{0}",
IndexOfChapterIndex = -1
});
sdd.Templates.TryAdd(lnwAggregator, new() {
Template = "https://www.lightnovelworld.co/novel/{0}/chapter-{1}",
IndexOfChapterIndex = 1
});
sdd.Aggregators.TryAdd(aggregator.Key, aggregator);
sdd.Auxillaries.TryAdd(auxiliary.Key, auxiliary);
var lnwBindings = new DataKey<DataBindings>(lnwBindingsA);
var lnwBindingsAux = new DataKey<DataBindings>(lnwBindingsB);
sdd.Bindings.TryAdd(lnwBindings, new DataBindings() {
Title = new Binding("aeqw89:binding:light_novel_world:title") {
XPath = "/html/body/main/article/section/div[1]/h1/span[2]",
Type = BindingType.Single
},
Content = new("aeqw89:binding:light_novel_world:content") {
Provider = new ParagraphedContentDataProvider() {
Content = new Binding() {
XPath = "//*[@id=\"chapter-container\"]"
}
},
Type = BindingType.UseProvider
},
});
sdd.Bindings.TryAdd(lnwBindingsAux, new DataBindings() {
Title = new("aeqw89:binding:light_novel_world_aux:title") {
XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/h1",
Type = BindingType.Single
},
Authors = new("aeqw89:binding:light_novel_world_aux:authors") {
XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/div[1]/a",
Type = BindingType.Single
},
Description = new("aeqw89:binding:light_novel_world_aux:description") {
Provider = new ParagraphedContentDataProvider() {
Content = new() {
XPath = "/html/body/main/article/div/section/div[1]/div"
}
},
Type = BindingType.UseProvider
},
Tags = new("aeqw89:binding:light_novel_world_aux:tags") {
Provider = new ListContentDataProvider() {
Content = new() {
XPath = "/html/body/main/article/header/div[2]/div[2]/div[3]/ul"
}
},
Type = BindingType.UseProvider
}
});
}
}
}
+135
View File
@@ -0,0 +1,135 @@
using aeqw89.PersistentData;
using aeqw89.DataKeys;
using Beam.Dynamic;
using HtmlAgilityPack;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text.Json.Serialization.Metadata;
using Beam.Temporary.Cli.Templates.Classic;
using Beam.Exports;
namespace Beam.Temporary.Cli {
internal class Program {
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
public static SharedDataDictionary Shared { get; set; } = [];
public static IArchitecture Architecture = IArchitecture.Default;
const string SharedDataPath = "data/.dat";
static async Task Main(string[] args) {
ConversionOptions.Converters.AddPersistentDataRequiredConverters();
ConversionOptions.WriteIndented = true;
var web = new HtmlWeb();
var lf = LoggerFactory.Create((x) => {
x.AddConsole();
});
ILogger logger = lf
.CreateLogger("Program");
await using var sharedContext = await DataDictionaryContext<SharedDataDictionary>.Create(
SharedDataPath,
DataKind.Shared,
logger,
ConversionOptions
);
Shared = sharedContext.Data;
Shared.Clear();
NovelStatics.Define_LightNovelWorld(Shared);
NovelStatics.Define_LightNovelWorld_Novel_TheLegendaryMechanic(Shared);
NovelStatics.Define_LightNovelWorl_Novel_IAloneLevelUp(Shared);
ClassicTemplates.Register(Shared);
var novel = new DataKey<TextResource>("novels:i_alone_level_up");
var context_aux = Architecture.GetMeta(web, novel, Shared);
var metaDownloader = new DownloadEnumerable<IDocumentMetaData>(
new SequentialFragmentDownloader<IDocumentMetaData>(
context_aux,
(c) => new UnitFragmentDownloader<IDocumentMetaData>(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger),
logger)
.UnwrapFragmented());
var metadata = (await metaDownloader.FirstAsync());
var context = Architecture.GetTextRecord(web, novel, Shared, metadata.Data);
context.DownloadReporter = new Progress<IDocument>((x) => Console.WriteLine(x.Filename));
var downloader = new DownloadEnumerable<IDocument>(
new SequentialFragmentDownloader<IDocument>(
context,
(c) => new UnitFragmentDownloader<IDocument>(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger),
logger)
.UnwrapFragmented());
List<Ordered<IDocument>> documents = [];
await foreach (var download in downloader.Take(20)) {
if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
continue;
if (meta is not ArticleData articleMetaData)
continue;
//Console.WriteLine($"Title: {data.Name}");
//Console.WriteLine($"Description: {data.Description}");
//Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
//Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
//Console.WriteLine($"Content: {download}");
documents.Add(download);
}
string testDir = Path.Combine("txt", Path.GetRandomFileName());
Directory.CreateDirectory(testDir);
int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
foreach (var document in documents.OrderBy((x) => x.Order)) {
document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
Dictionary<string, string> linkButtons = new();
if (document.Order != 0)
linkButtons.Add("Previous", $"{document.Order - 1}.html");
if (document.Order != len)
linkButtons.Add("Next", $"{document.Order + 1}.html");
new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
}
Console.ReadKey();
//foreach (var download in documents.OrderBy((x) => x.Order)) {
// if (download.Data.TryGetTaggedMetaData<ArticleData>(Architecture.ChapterKey, out var meta))
// Console.WriteLine($"{download.Order}:{meta.Name}");
//}
//string[] templates = new DataKey<File>[] {
// HtmlBook.Keys.ContentPage,
// HtmlBook.Keys.NoContentPage,
// HtmlBook.Keys.TitlePage,
// HtmlBook.Keys.StylesPage,
//}.Select(
// (x) => Shared.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
//).ToArray();
//HtmlBook book = new(
// bookname: Path.Combine(Path.GetRandomFileName(), "I Alone Level Up"),
// new CssData(),
// new ArticleData(),
// new HtmlBookTemplates() {
// ContentPageTemplate = templates[0],
// NoContentTemplate = templates[1],
// TitlePageTemplate = templates[2],
// CssTemplate = templates[3],
// },
// documents: documents.Select((x) => x.Data).ToList()
//);
//book.Update();
//Console.WriteLine("One variable!");
}
}
}
@@ -0,0 +1,48 @@
using aeqw89.PersistentData;
using aeqw89.DataKeys;
using Beam.Dynamic;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
public class SharedDataDictionary : BaseDataDictionary {
public Dictionary<DataKey<WebResource>, PackagedSourceLinkGenerationData> Templates {
get => GetOrCreateDictionary<DataKey<WebResource>, PackagedSourceLinkGenerationData>(nameof(Templates));
set => Data[nameof(Templates)] = value;
}
public Dictionary<DataKey<WebResource>, WebResource> Aggregators {
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Aggregators));
set => Data[nameof(Aggregators)] = value;
}
public Dictionary<DataKey<WebResource>, WebResource> Auxillaries {
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Auxillaries));
set => Data[nameof(Auxillaries)] = value;
}
public Dictionary<DataKey<DataBindings>, DataBindings> Bindings {
get => GetOrCreateDictionary<DataKey<DataBindings>, DataBindings>(nameof(Bindings));
set => Data[nameof(Bindings)] = value;
}
public Dictionary<DataKey<WebResource>, HashSet<DataKey<TextResource>>> AggregatorNovels {
get => GetOrCreateDictionary<DataKey<WebResource>, HashSet<DataKey<TextResource>>>(nameof(AggregatorNovels));
set => Data[nameof(AggregatorNovels)] = value;
}
public Dictionary<DataKey<TextResource>, TextResource> Novels {
get => GetOrCreateDictionary<DataKey<TextResource>, TextResource>(nameof(Novels));
set => Data[nameof(Novels)] = value;
}
internal Dictionary<DataKey<File>, File> Files {
get => GetOrCreateDictionary<DataKey<File>, File>(nameof(Files));
set => Data[nameof(Files)] = value;
}
}
}
+15
View File
@@ -0,0 +1,15 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
public static class StringExtensions {
public static string Aggregate(this IEnumerable<string> str, string separator) {
if (!str.Any())
return string.Empty;
return str.Aggregate((x, y) => $"{x}{separator}{y}");
}
}
}
@@ -0,0 +1,30 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli.Templates.Classic {
internal class ClassicTemplates {
public static void Register(SharedDataDictionary sdd) {
sdd.Files.TryAdd(
new("aeqw89:files:templates:classic:content_page"),
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Content.template.html", "htmlpage", "templates"));
sdd.Files.TryAdd(
new("aeqw89:files:templates:classic:title_page"),
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Title.template.html", "htmlpage", "templates"));
sdd.Files.TryAdd(
new("aeqw89:files:templates:classic:styles_page"),
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Styles.template.css", "styles", "templates"));
sdd.Files.TryAdd(
new("aeqw89:files:templates:classic:no_content_page"),
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\NoContent.template.html", "htmlpage", "templates"));
}
}
internal static class DictionaryOfFileExtensions {
public static string ReadToString<T>(this Dictionary<T, File> dict, T key) where T: notnull {
return System.IO.File.ReadAllText(dict[key].Path);
}
}
}
@@ -0,0 +1,27 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>{Name}</title>
<link rel="stylesheet" href="styles.css">
</head>
<body>
<header>
<h1>{Name}</h1>
<p><em>{Description}</em></p>
<div>
<span><strong>Authors:</strong> {Authors}</span> |
<span><strong>Language:</strong> {Language}</span> |
<span><strong>Categories:</strong> {Categories}</span> |
<span><strong>Version:</strong> {Version}</span>
</div>
</header>
<article>
{Content}
</article>
<div class="navigation">
<button id="prev">Previous</button>
<button id="next">Next</button>
</div>
</body>
</html>
@@ -0,0 +1,15 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>404 - Not Found</title>
<link rel="stylesheet" href="styles.css">
</head>
<body>
<div class="error-container">
<h1>404 - Content Not Found</h1>
<p>The file <strong>{Filename}</strong> was not found.</p>
<p>{Content}</p>
</div>
</body>
</html>
@@ -0,0 +1,60 @@
/* styles.css */
/* Placeholders:
{PrimaryColor}, {SecondaryColor}, {TertiaryColor}, {ButtonColor},
{ForegroundColor}, {ContentFont}, {ContentFontSize}, {TitleFont}, {TitleFontSize}
*/
body {
font-family: {ContentFont};
font-size: {ContentFontSize};
background-color: {PrimaryColor};
color: {ForegroundColor};
margin: 0;
padding: 20px;
}
header {
background-color: {SecondaryColor};
padding: 20px;
text-align: center;
}
header h1 {
font-family: {TitleFont};
font-size: {TitleFontSize};
margin: 0;
}
header p {
font-style: italic;
margin: 5px 0;
}
section, article, nav {
background: {TertiaryColor};
padding: 15px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
margin: 20px auto;
max-width: 800px;
}
.navigation {
display: flex;
justify-content: space-between;
max-width: 800px;
margin: 20px auto;
}
button {
background-color: {ButtonColor};
color: {ForegroundColor};
border: none;
padding: 10px 20px;
cursor: pointer;
font-size: {ContentFontSize};
border-radius: 4px;
}
nav h2 {
margin-top: 0;
}
@@ -0,0 +1,26 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>{Name}</title>
<link rel="stylesheet" href="styles.css">
</head>
<body>
<header>
<h1>{Name}</h1>
<p><em>{Description}</em></p>
</header>
<section>
<div><strong>Authors:</strong> {Authors}</div>
<div><strong>Language:</strong> {Language}</div>
<div><strong>Categories:</strong> {Categories}</div>
<div><strong>Version:</strong> {Version}</div>
</section>
<nav>
<h2>Table of Contents</h2>
<ul>
{TOC} <!-- Expected to be a list of items (e.g. <li>Chapter 1</li>, etc.) -->
</ul>
</nav>
</body>
</html>
+26
View File
@@ -0,0 +1,26 @@
using aeqw89.DataKeys;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
public class TextResource : IKeyed<TextResource> {
public required DataKey<TextResource> Key { get; set; }
public DataKey<WebResource>? AssociatedSource { get; set; }
public DataKey<WebResource>? AssociatedMetaSource { get; set; }
public required string[] TemplateInitialData { get; set; }
public string?[]? MetaTemplateInitialData { get; set; }
public TextResourceRecord ToRecord(SharedDataDictionary sdd) {
return new(this,
AssociatedSource is null ? null : sdd.Aggregators[AssociatedSource],
AssociatedMetaSource is null ? null : sdd.Auxillaries[AssociatedMetaSource]);
}
}
public record TextResourceRecord(TextResource Resource, WebResource? AssociatedSource, WebResource? AssociatedMetaSource);
}
+11
View File
@@ -0,0 +1,11 @@
namespace Beam.Temporary.Cli {
internal class Tracked<T>(T obj) {
public T TrackedObject { get; set; } = obj;
public bool IsDirty { get; set; } = true;
public Tracked<T> SetDirty() {
IsDirty = true;
return this;
}
}
}
+28
View File
@@ -0,0 +1,28 @@
using aeqw89.PersistentData;
using aeqw89.DataKeys;
using Beam.Dynamic;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
public class WebResource(DataKey<WebResource> key) : IKeyed<WebResource> {
public DataKey<WebResource> Key { get; set; } = key;
public required DataKey<DataBindings> Bindings { get; set; }
public string? Name { get; set; }
public string? Domain { get; set; }
public string? Description { get; set; }
public WebResource() : this(new(string.Empty)) { }
public WebResourceRecord ToRecord(SharedDataDictionary sdd) {
return new WebResourceRecord(this, sdd.Bindings[Bindings]);
}
}
public record WebResourceRecord(WebResource Resource, DataBindings Bindings);
}
+40
View File
@@ -0,0 +1,40 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.12.35506.116
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam", "Beam\Beam.csproj", "{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Temporary.Cli", "Beam.Temporary.Cli\Beam.Temporary.Cli.csproj", "{8F650BBA-3800-4B5E-A6FF-9057633601EE}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Dynamic", "Beam.Dynamic\Beam.Dynamic.csproj", "{DDEABE82-096C-4799-87F1-56F494D35FAA}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Exports", "Beam.Exports\Beam.Exports.csproj", "{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Release|Any CPU.Build.0 = Release|Any CPU
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Release|Any CPU.Build.0 = Release|Any CPU
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Release|Any CPU.Build.0 = Release|Any CPU
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
+21
View File
@@ -0,0 +1,21 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
namespace Beam {
public class ArticleData : IDocumentMetaData {
public string? Name { get; set; }
public string[] Authors { get; set; } = [];
public string? Language { get; set; }
public string[] Categories { get; set; } = [];
public string? Version { get; set; }
public string? Description { get; set; }
public string AsJson(JsonSerializerOptions? options = null) {
return JsonSerializer.Serialize(this, options);
}
}
}
+25
View File
@@ -0,0 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="FluentBuilder" Version="0.10.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
</ItemGroup>
<ItemGroup>
<Reference Include="aeqw89.DataKeys">
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
</Reference>
</ItemGroup>
</Project>
+15
View File
@@ -0,0 +1,15 @@
using System.Text;
namespace Beam {
internal class ByteDocument(string filename, byte[] content, Encoding? encoding = null) : Document(filename, encoding) {
public byte[] Content { get; set; } = content;
public override byte[] ToBytes() {
return Content;
}
public override string ToString() {
return Encoding.GetString(Content);
}
}
}
+9
View File
@@ -0,0 +1,9 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public class DataBackedSourceLinkGenerator(PackagedSourceLinkGenerationData data, params object[] initialState) : DelegateBackedSourceLinkGenerator(data.GenerateLink, data.GetBehaviour(), initialState) {}
}
+48
View File
@@ -0,0 +1,48 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public delegate DocumentSourceLink LinkGenerator(params object[] ps);
public delegate object Incrementor(object obj, int amount);
public class DelegateBackedSourceLinkGenerator : IEnumerator<DocumentSourceLink> {
public LinkGenerator Generator { get; set; }
public IncrementationBehaviour Behaviour { get; }
private object[] InitialState;
public DelegateBackedSourceLinkGenerator(LinkGenerator generator, IncrementationBehaviour behaviour, params object[] initialState) {
Generator = generator;
Behaviour = behaviour;
InitialState = (object[])initialState.Clone();
State = (object[])initialState.Clone();
Reset();
}
public object[] State { get; set; }
public DocumentSourceLink Current { get; private set; }
object IEnumerator.Current => Current;
public void Dispose() {
return;
}
public bool MoveNext() {
Behaviour.Apply(State, 1);
Current = Generator(State);
return Current.HasValue;
}
public void Reset() {
State = (object[])InitialState.Clone();
Behaviour.Apply(State, -1);
Current = Generator(State);
}
}
}
+13
View File
@@ -0,0 +1,13 @@
using aeqw89.DataKeys;
using System.Text;
namespace Beam {
public abstract class Document(string filename, Encoding? encoding = null) : IDocument {
public string Filename { get; set; } = filename;
public Encoding Encoding { get; set; } = encoding ?? Encoding.UTF8;
public Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> MetaData { get; set; } = [];
public abstract byte[] ToBytes();
public override abstract string ToString();
}
}
+57
View File
@@ -0,0 +1,57 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
/// <summary>
/// Holds a collection of <see cref="IDocument"/> objects in memory to facilitate lazy loading
/// </summary>
public class DocumentCache : Dictionary<object, IDocument>, IDisposable {
private bool disposedValue;
/// <summary>
/// Calculates memory usage and checks if it does not exceed a certain limit
/// </summary>
/// <param name="allocatedSpaceInBytes">The memory limit</param>
/// <returns></returns>
public bool IsCapacityLessThan(int allocatedSpaceInBytes) {
return this.Count < CalculateMemorySpaceUsage();
}
/// <summary>
/// Gets an estimate of the space used by the IDocument objects (disregarding metadata) in bytes.
/// </summary>
/// <returns>Estimated memory usage in bytes</returns>
public long CalculateMemorySpaceUsage() {
return this.Select((x) => (x.Value.ToBytes().LongLength)).Aggregate((x, y) => x + y);
}
protected virtual void Dispose(bool disposing) {
if (!disposedValue) {
if (disposing) {
// TODO: dispose managed state (managed objects)
this.Clear();
}
// TODO: free unmanaged resources (unmanaged objects) and override finalizer
// TODO: set large fields to null
disposedValue = true;
}
}
// // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources
// ~DocumentCache()
// {
// // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
// Dispose(disposing: false);
// }
public void Dispose() {
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
Dispose(disposing: true);
GC.SuppressFinalize(this);
}
}
}
+31
View File
@@ -0,0 +1,31 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public readonly struct DocumentSourceLink(string link) {
private readonly string Link_ { get; } = link;
public readonly Uri Link => new(Link_);
public bool HasValue => !string.IsNullOrWhiteSpace(Link_);
public static DocumentSourceLink InvalidLink { get; } = new("https://invalid.link");
public static bool operator ==(DocumentSourceLink lhs, DocumentSourceLink rhs) {
return lhs.Link == rhs.Link;
}
public static bool operator !=(DocumentSourceLink lhs, DocumentSourceLink rhs) {
return lhs.Link != rhs.Link;
}
public override bool Equals(object? obj) {
return GetHashCode() == obj?.GetHashCode();
}
public override int GetHashCode() {
return Link.GetHashCode();
}
}
}
+89
View File
@@ -0,0 +1,89 @@
using HtmlAgilityPack;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using FluentBuilder;
using Microsoft.Extensions.Logging;
namespace Beam {
public delegate T HtmlTransformer<out T>(HtmlDocument doc);
public delegate Task<T> AsyncHtmlTransformer<T>(HtmlDocument doc);
public class DownloadContext<T> : IDisposable {
private bool disposedValue;
public HtmlWeb Web { get; }
public HtmlTransformer<T> Transformer { get; }
public AsyncHtmlTransformer<T> AsyncTranformer { get; }
public IProgress<T>? DownloadReporter { get; set; }
public IProgress<int>? RetryReporter { get; set; }
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? AsyncFailurePredicates { get; }
public TimeSpan TimeOut { get; set; }
public IEnumerable<DocumentSourceLink> Links { get; }
public CancellationToken CancellationToken { get; }
public DocumentCache Cache { get; private set; } = [];
public ILogger? DownloadLogger { get; set; }
public DownloadContext(HtmlWeb web,
IEnumerable<DocumentSourceLink> links,
CancellationToken cancellationToken = default,
HtmlTransformer<T>? transformer = null,
AsyncHtmlTransformer<T>? asyncTransformer = null,
IProgress<T>? downloadReporter = null,
IProgress<int>? retryReporter = null,
AsyncDownloadFailurePredicate<HtmlDocument>?[]? asyncFailurePredicates = null,
TimeSpan? timeOut = null,
ILogger? downloadLogger = null) {
ArgumentNullException.ThrowIfNull(web, nameof(web));
ArgumentNullException.ThrowIfNull(links, nameof(links));
Web = web;
Links = links;
CancellationToken = cancellationToken;
if (transformer is null && asyncTransformer is null)
throw new ArgumentException($"Either {nameof(transformer)} or {nameof(asyncTransformer)} must be not null.");
Transformer = transformer!;
AsyncTranformer = asyncTransformer!;
if (transformer is null && asyncTransformer is not null)
Transformer = (x) => asyncTransformer(x).Result;
if (asyncTransformer is null && transformer is not null)
AsyncTranformer = (x) => Task.FromResult(transformer(x));
DownloadReporter = downloadReporter;
RetryReporter = retryReporter;
AsyncFailurePredicates = asyncFailurePredicates;
TimeOut = timeOut ?? TimeSpan.FromMinutes(1);
DownloadLogger = downloadLogger;
}
protected virtual void Dispose(bool disposing) {
if (!disposedValue) {
if (disposing) {
// TODO: dispose managed state (managed objects)
Cache = null;
}
// TODO: free unmanaged resources (unmanaged objects) and override finalizer
// TODO: set large fields to null
disposedValue = true;
}
}
// // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources
// ~DownloadContext()
// {
// // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
// Dispose(disposing: false);
// }
public void Dispose() {
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
Dispose(disposing: true);
GC.SuppressFinalize(this);
}
}
}
+14
View File
@@ -0,0 +1,14 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public class DownloadEnumerable<T>(IAsyncEnumerator<Ordered<T>> download) : IAsyncEnumerable<Ordered<T>> {
public IAsyncEnumerator<Ordered<T>> Download { get; } = download;
public IAsyncEnumerator<Ordered<T>> GetAsyncEnumerator(CancellationToken cancellationToken = default)
=> Download;
}
}
+37
View File
@@ -0,0 +1,37 @@
using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
namespace Beam {
public sealed class Fragment<T>(int size) {
public int Size => FragmentBag.Count;
public int MaxSize { get; } = size;
private ConcurrentBag<T> FragmentBag { get; set; } = new();
public bool TryTake([NotNullWhen(true)] out T? shard) {
return FragmentBag.TryTake(out shard) && shard != null;
}
private bool? Complete = false;
public bool IsComplete => Complete ?? Size == MaxSize;
private bool UpdaterLocked = false;
public static bool TryAcquireUpdater(Fragment<T> fragment, [NotNullWhen(true)] out Action<T>? updater) {
updater = null;
if (Interlocked.CompareExchange(ref fragment.UpdaterLocked, true, false) == true)
// equivalent to : fragment.UpdaterLocked == true, side-effect: sets fragment.UpdaterLocked to true
return false;
updater = fragment.FragmentBag.Add;
return true;
}
public static bool TryReleaseUpdater(Fragment<T> fragment, Action<T> updater) {
if (updater == fragment.FragmentBag.Add) {
Interlocked.Exchange(ref fragment.UpdaterLocked, false);
return true;
}
return false;
}
public static void SetComplete(Fragment<T> fragment, bool status) {
fragment.Complete = status;
}
}
}
+23
View File
@@ -0,0 +1,23 @@
namespace Beam {
public static class IAsyncEnumeratorExtensions {
public static async IAsyncEnumerator<T> UnwrapFragmented<T>(this IAsyncEnumerator<Fragment<T>> fragmented) {
if (fragmented is null)
throw new ArgumentNullException();
try {
while(await fragmented.MoveNextAsync().ConfigureAwait(false)) {
if (fragmented.Current is null)
yield break;
if (!fragmented.Current.IsComplete)
yield break;
while (fragmented.Current.TryTake(out var item))
if (item is null)
yield break;
else
yield return item;
}
} finally {
await fragmented.DisposeAsync().ConfigureAwait(false);
}
}
}
}
+29
View File
@@ -0,0 +1,29 @@
using aeqw89.DataKeys;
using System.Diagnostics.CodeAnalysis;
namespace Beam {
public interface IDocument {
/// <summary>
/// The file name of the document. Must be valid in both <c>UNIX</c>,
/// <c>WINDOWS</c>, <c>APPLE</c>, and <c>ANDROID</c> file systems.
/// </summary>
string Filename { get; }
/// <summary>
/// Additional descriptive data
/// </summary>
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> MetaData { get; }
/// <summary>
/// Retrieves the binary representation for the <see cref="IDocument"/>
/// </summary>
/// <returns>Binary representation of the <see cref="IDocument"/></returns>
byte[] ToBytes();
/// <summary>
/// Retrieves the string representation for the <see cref="IDocument"/>
/// </summary>
/// <returns>String representation of the <see cref="IDocument"/></returns>
string ToString();
}
}
+7
View File
@@ -0,0 +1,7 @@
using System.Diagnostics.CodeAnalysis;
namespace Beam {
public static class IDocumentExtensions {
}
}
+7
View File
@@ -0,0 +1,7 @@
using System.Text.Json;
namespace Beam {
public interface IDocumentMetaData {
string AsJson(JsonSerializerOptions? options = null);
}
}
+11
View File
@@ -0,0 +1,11 @@
namespace Beam {
internal interface IDocumentSourceLinkFactory {
DocumentSourceLink GetNextLink(DocumentSourceLink current);
DocumentSourceLink GetPrecedingLink(DocumentSourceLink current);
DocumentSourceLink GetArbitraryLink(DocumentSourceLink current, int offset) => offset switch {
0 => current,
> 0 => GetArbitraryLink(GetNextLink(current), offset - 1),
< 0 => GetArbitraryLink(GetPrecedingLink(current), offset + 1)
};
}
}
+6
View File
@@ -0,0 +1,6 @@
namespace Beam {
public interface IUnitDownloader<T> {
public int LinksPerDownload { get; }
public Task<(bool, T?)> TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress<int>? tryProgress = null);
}
}
+17
View File
@@ -0,0 +1,17 @@
namespace Beam {
/// <summary>
/// Defines how a url template should should be updated, in what order, and by how much
/// </summary>
public struct IncrementationBehaviour {
public Dictionary<int, Incrementor> Map { get; set; }
public readonly void Apply(object[] objects, int amount) {
foreach(var (i, inc) in Map) {
if (i < objects.Length)
objects[i] = inc(objects[i], amount)?.ToString();
else
throw new S.MapException(S.M.MapDoesNotMatchArgs);
}
}
}
}
+18
View File
@@ -0,0 +1,18 @@
namespace Beam {
public struct PackagedSourceLinkGenerationData {
public string Template { get; set; }
public int IndexOfChapterIndex { get; set; }
public readonly DocumentSourceLink GenerateLink(params object[] ps)
=> new(string.Format(Template, ps));
public IncrementationBehaviour GetBehaviour() {
return new IncrementationBehaviour() {
Map = new Dictionary<int, Incrementor>() { {
IndexOfChapterIndex,
(x, i) => int.Parse(x.ToString() ?? throw new ArgumentException()) + i
}
}
};
}
}
}
+78
View File
@@ -0,0 +1,78 @@
using HtmlAgilityPack;
using System.Collections;
using System.Collections.Concurrent;
namespace Beam {
public record Ordered<T>(T Data, int Order);
[Obsolete("Use chunk downloader instead.")]
public class ParallelDownloader<T>(DownloadContext<T> context, int maximumConcurrentDownloads = 4) : IAsyncEnumerator<Ordered<T>> {
public DownloadContext<T> Context { get; } = context;
public int MaximumConcurrentDownloads { get; } = maximumConcurrentDownloads;
private Task? CacheFiller { get; set; }
private int Count = 0;
private ConcurrentBag<Ordered<T>> Cache { get; set; } = [];
public Ordered<T> Current { get; set; }
private UnitDownloader<T> GetUnitDownloader()
=> new(Context.Web, Context.AsyncTranformer, Context.AsyncFailurePredicates);
private ParallelOptions GetOptions()
=> new() {
CancellationToken = Context.CancellationToken,
MaxDegreeOfParallelism = MaximumConcurrentDownloads
};
private async Task FillCache() {
List<Ordered<DocumentSourceLink>> chunk = [];
int i = 0;
foreach (var link in Context.Links.Take(MaximumConcurrentDownloads * 2))
chunk.Add(new Ordered<DocumentSourceLink>(link, i++));
Console.WriteLine(chunk.Select((x) => $"{x.Order}: {x.Data.Link}").Aggregate((x, y) => $"{x}\n{y}"));
var unitDownloader = GetUnitDownloader();
int downloadedCount = 0;
await Parallel.ForEachAsync(chunk, GetOptions(), async (x, ct) => {
var (result, doc) = await unitDownloader.TryDownload([new Ordered<string>(x.Data.Link.ToString(), x.Order)], ct, tryProgress: Context.RetryReporter);
if (!result || doc is null) {
Console.WriteLine($"FAILED to download {x.Data.Link}");
return;
}
Cache.Add(new(doc, x.Order));
Context.DownloadReporter?.Report(doc);
Interlocked.Increment(ref downloadedCount);
Interlocked.Increment(ref Count);
});
Console.WriteLine("Downloaded Chunk");
CacheFiller = null;
}
public async ValueTask<bool> MoveNextAsync() {
TimeSpan waited = TimeSpan.Zero;
TimeSpan delta = TimeSpan.FromSeconds(0.01);
while(waited < Context.TimeOut) {
if (Cache.Count < MaximumConcurrentDownloads && CacheFiller is null) // strange
CacheFiller ??= FillCache();
Cache.TryTake(out var k);
if (k is not null) {
Current = k;
return true;
}
waited += delta;
await Task.Delay(delta);
}
return false;
}
public ValueTask DisposeAsync() {
GC.SuppressFinalize(this);
return ValueTask.CompletedTask;
}
}
}
+8
View File
@@ -0,0 +1,8 @@
using System.Text.RegularExpressions;
namespace Beam {
internal partial class RegexGenerated {
[GeneratedRegex("(?<!{){\\d*}(?!})")]
public static partial Regex CurlyBracketedParameters();
}
}
+39
View File
@@ -0,0 +1,39 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
internal class S {
[Serializable]
public class MapException : ArgumentException {
public MapException() { }
public MapException(string message) : base(message) { }
public MapException(string message, Exception inner) : base(message, inner) { }
protected MapException(
System.Runtime.Serialization.SerializationInfo info,
System.Runtime.Serialization.StreamingContext context) : base(info, context) { }
}
/// <summary>
/// The kind of exception that should never happen
/// </summary>
[Serializable]
public class AssertionException : Exception {
public AssertionException() { }
public AssertionException(string message) : base(message) { }
public AssertionException(string message, Exception inner) : base(message, inner) { }
protected AssertionException(
System.Runtime.Serialization.SerializationInfo info,
System.Runtime.Serialization.StreamingContext context) : base(info, context) { }
}
public class M {
public const string MapDoesNotMatchArgs = "Error; Map contains indicies that exceed the argument list passed.";
public const string NewFragmentShouldBeFree = "Assertion Error: Could not acquire lock of newly created fragment";
public const string LinksCannotBeEmpty = "Cannot construct downloader with empty links collection!";
}
}
}
+100
View File
@@ -0,0 +1,100 @@
using Microsoft.Extensions.Logging;
using System.Collections.Concurrent;
namespace Beam {
public class SequentialFragmentDownloader<T> : SequentialDownloader<T, Fragment<Ordered<T>>> {
public SequentialFragmentDownloader(
DownloadContext<T> context,
Func<DownloadContext<T>, IUnitDownloader<Fragment<Ordered<T>>>> getUnitDownloader,
ILogger? logger = null)
: base(context, getUnitDownloader, logger) {}
}
// public class SequentialChunkDownloader<T> : IAsyncEnumerator<Fragment<Ordered<T>>> {
// public Fragment<Ordered<T>> Current { get; protected set; }
// public DownloadContext<T> Context { get; }
// protected IEnumerator<DocumentSourceLink> LinksEnumerator;
// protected ConcurrentQueue<Fragment<Ordered<T>>> DownloadQueue { get; set; } = [];
// public int ChunkSize { get; }
// private ILogger? Logger => Context.DownloadLogger;
// public UnitDownloader<T> GetUnitDownloader()
// => new(Context.Web, Context.AsyncTranformer, Context.AsyncFailurePredicates);
// public SequentialChunkDownloader(DownloadContext<T> context, int chunkSize) {
// Context = context;
// LinksEnumerator = Context.Links.GetEnumerator();
// Current = new Fragment<Ordered<T>>(0);
// ChunkSize = chunkSize;
// }
// public ValueTask DisposeAsync() {
// GC.SuppressFinalize(this);
// return ValueTask.CompletedTask;
// }
// protected Task<bool>? DownloadsTask = null;
// protected virtual async Task<bool> ProcessDownloads() {
// if (DownloadQueue.IsEmpty)
// return true;
// if (DownloadsTask is null) {
// DownloadsTask = Task.Run(async () => {
// if (!DownloadQueue.TryDequeue(out var fragment))
// return true; // no fragments left, likely race condition but return true as technically all items have been downloaded
// var unit = GetUnitDownloader(); // instantiates unit downloader per request (okay)
// if (!Fragment<Ordered<T>>.TryAcquireUpdater(fragment, out var updater)) { // gets the add method for the current fragment
// Logger?.LogError("Failed to acquire updater for fragment {{{}}}", fragment.GetHashCode());
// return false; // fragment is unsafe to modify
// }
// try {
// var links = Enumerable.Range(0, ChunkSize).Select((x) => {
// if (!LinksEnumerator.MoveNext())
// return new Ordered<DocumentSourceLink>(DocumentSourceLink.InvalidLink, -1); // stops link collection if end-of-links is reached
// return new Ordered<DocumentSourceLink>(LinksEnumerator.Current, x);
// }).Where((x) => x.Data != DocumentSourceLink.InvalidLink); // filter invalid links
// await Parallel.ForEachAsync(links, async (x, ct) => {
// Logger?.LogInformation("Started download for {} order={}", x.Data.Link, x.Order);
// var (result, downloadedT) = await unit.TryDownload( // download (parallel) objects
// x.Data.Link.ToString(), // use link from links collection (exposed as x)
// ct, // use ct provided with method call
// tryProgress: Context.RetryReporter);
// if (!result) { // download failure (soft because it was detected)
// Logger?.LogError("Failed to retrieve {} order={}", x.Data.Link, x.Order);
// return;
// }
// if (downloadedT is null) { // download failure (hard because it was not detected)
// Logger?.LogCritical("Failed to retrieve {} order={}", x.Data.Link, x.Order);
// return;
// }
// Logger?.LogInformation("Retrieved {} order={} successfully", x.Data.Link, x.Order);
// updater(new Ordered<T>(downloadedT, x.Order)); // update the fragment
// });
// Fragment<Ordered<T>>.SetComplete(fragment, true);
// } finally {
// Fragment<Ordered<T>>.TryReleaseUpdater(fragment, updater); // returns updater to allow modification
// }
// return fragment.Size == fragment.MaxSize;
// });
// }
// if (DownloadsTask.IsCompleted) {
// DownloadsTask = null;
// return await ProcessDownloads();
// }
// return true; // if task is still processing return should be neither true or false...
// }
// public async ValueTask<bool> MoveNextAsync() {
// if (Current.IsComplete && Current.Size < Current.MaxSize)
// return false; // if a fragment is marked complete despite being unsaturated, we've run out links!
// if (DownloadQueue.Count == 0) {
// Current = new Fragment<Ordered<T>>(ChunkSize);
// DownloadQueue.Enqueue(Current);
// }
// return await ProcessDownloads();
// }
// }
}
+63
View File
@@ -0,0 +1,63 @@
using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
namespace Beam {
public class SequentialDownloader<TInput, TOutput> : IAsyncEnumerator<TOutput> {
public TOutput Current { get; protected set; }
public DownloadContext<TInput> Context { get; }
public ILogger? Logger { get; set; }
public int LastOrder { get; set; } = 0;
protected IEnumerator<DocumentSourceLink> LinksEnumerator;
public Func<IUnitDownloader<TOutput>> GetUnitDownloader { get; set; }
public SequentialDownloader(DownloadContext<TInput> context, Func<DownloadContext<TInput>, IUnitDownloader<TOutput>> getUnitDownloader, ILogger? logger = null) {
Context = context;
Logger = logger;
LinksEnumerator = Context.Links.GetEnumerator();
LinksEnumerator.Reset();
if (!LinksEnumerator.MoveNext())
throw new ArgumentOutOfRangeException(S.M.LinksCannotBeEmpty);
Current = default(TOutput);
GetUnitDownloader = () => getUnitDownloader(Context);
}
public ValueTask DisposeAsync() {
GC.SuppressFinalize(this);
return ValueTask.CompletedTask;
}
public async ValueTask<bool> MoveNextAsync() {
var unit = GetUnitDownloader(); // safe to instantiate per request.
var idealLinkCount = unit.LinksPerDownload;
List<Ordered<string>> links = [];
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
while (LinksEnumerator.MoveNext() && links.Count < idealLinkCount)
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
if (links.Count == 0) {
Logger?.LogInformation("Out of links!");
return false;
}
var (result, downloadedT) = await unit.TryDownload(
links.ToArray(),
Context.CancellationToken,
tryProgress: Context.RetryReporter);
if (!result) {
Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name);
return false; // unit download failed
}
if (downloadedT is null) {
Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name);
return false; // unit download failed
}
Current = downloadedT;
return true;
}
}
}
+27
View File
@@ -0,0 +1,27 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public class SourceLinkEnumerable : IEnumerable<DocumentSourceLink> {
private SourceLinkEnumerable(IEnumerator<DocumentSourceLink> enumerator) {
Enumerator = enumerator;
}
public IEnumerator<DocumentSourceLink> Enumerator { get; }
public static SourceLinkEnumerable FromGenerator(IEnumerator<DocumentSourceLink> generator)
=> new SourceLinkEnumerable(generator);
public IEnumerator<DocumentSourceLink> GetEnumerator() {
return Enumerator;
}
IEnumerator IEnumerable.GetEnumerator() {
return Enumerator;
}
}
}
+18
View File
@@ -0,0 +1,18 @@
using System.Text;
namespace Beam {
internal class StreamDocument(string filename, Stream content, Encoding? encoding = null) : Document(filename) {
public Stream Content { get; set; } = content;
public Encoding Encoding { get; set; } = encoding ?? Encoding.UTF8;
byte[] Content_ { get; set; } = [];
public override byte[] ToBytes() {
return Content_;
}
public override string ToString() {
return Encoding.GetString(Content_);
}
}
}
+19
View File
@@ -0,0 +1,19 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public class StringDocument(string filename, string content, Encoding? encoding = null) : Document(filename, encoding) {
public string Content { get; set; } = content;
public override byte[] ToBytes() {
return Encoding.GetBytes(Content);
}
public override string ToString() {
return Content;
}
}
}
+66
View File
@@ -0,0 +1,66 @@
using HtmlAgilityPack;
namespace Beam {
public delegate Task<bool> AsyncDownloadFailurePredicate<in T>(T download);
/// <summary>
/// A download managing class that manages a singular download with failure-detection and exponential-backoff retries. This class is safe to instantiate per request.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="web"></param>
/// <param name="transformer"></param>
/// <param name="failurePredicate"></param>
public class UnitDownloader<T>(HtmlWeb web, AsyncHtmlTransformer<T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null) : IUnitDownloader<T> {
public HtmlWeb Web { get; } = web;
public virtual AsyncHtmlTransformer<T> Transformer { get; } = transformer;
public virtual AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicates { get; } = failurePredicate;
public int LinksPerDownload { get; } = 1;
protected virtual async Task<bool> IsFailure(HtmlDocument doc) {
if (FailurePredicates is null)
return false;
var failed = false;
await Parallel.ForEachAsync(FailurePredicates, async (x, ct) => {
if (failed == true)
return;
if (x is null)
return;
if (await x(doc))
failed = true;
});
return failed;
}
protected virtual async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
try {
var html = await Web.LoadFromWebAsync(link, ct);
if (FailurePredicates is null || !(await IsFailure(html)))
return (true, await Transformer(html));
else
return (false, default);
} catch(Exception) {
return (false, default);
}
}
public async Task<(bool, T?)> TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress<int>? tryProgress = null) {
if (link.Length == 0)
return (false, default);
T? doc = default;
int tryCount = 0;
while (tryCount < maximumRetryCount) {
ct.ThrowIfCancellationRequested();
(var success, doc) = await TryDownloadWithNoRetries(link[0].Data, ct);
if (success && doc != null)
return (true, doc);
tryProgress?.Report(++tryCount);
await Task.Delay((int)Math.Pow(2, tryCount) * 1000);
}
return (false, doc);
}
}
}
+64
View File
@@ -0,0 +1,64 @@
using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public class UnitFragmentDownloader<T> : IUnitDownloader<Fragment<Ordered<T>>> {
public UnitFragmentDownloader(HtmlWeb web,
AsyncHtmlTransformer<T> transformer,
AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null,
int fragmentSize = 4,
ILogger? logger = null) {
Web = web;
Transformer = transformer;
FailurePredicate = failurePredicate;
UnitDownloader = new UnitDownloader<T>(Web, Transformer, FailurePredicate);
LinksPerDownload = fragmentSize;
Logger = logger;
}
public HtmlWeb Web { get; }
public AsyncHtmlTransformer<T> Transformer { get; }
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicate { get; }
public int LinksPerDownload { get; set; }
public ILogger? Logger { get; set; }
private readonly UnitDownloader<T> UnitDownloader;
async Task<(bool, Fragment<Ordered<T>>?)> IUnitDownloader<Fragment<Ordered<T>>>.TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<int>? tryProgress) {
Fragment<Ordered<T>> fragment = new Fragment<Ordered<T>>(link.Length);
if (!Fragment<Ordered<T>>.TryAcquireUpdater(fragment, out var updater))
throw new S.AssertionException(S.M.NewFragmentShouldBeFree);
bool isFailure = false;
await Parallel.ForEachAsync(link, async (x, pct) => {
pct.ThrowIfCancellationRequested();
ct.ThrowIfCancellationRequested();
var (result, downloadedT) = await UnitDownloader.TryDownload([x], ct, maximumRetryCount, tryProgress);
if (!result) {
Interlocked.Exchange(ref isFailure, true);
Logger?.LogError("Failed to retrieve {} order={}", x.Data, x.Order);
return;
}
if (downloadedT == null) {
Interlocked.Exchange(ref isFailure, true);
Logger?.LogCritical("Failed to retrieve {} order={}", x.Data, x.Order);
return;
}
updater(new Ordered<T>(downloadedT, x.Order));
});
if (!isFailure)
Fragment<Ordered<T>>.SetComplete(fragment, true);
Fragment<Ordered<T>>.TryReleaseUpdater(fragment, updater);
return (!isFailure, fragment);
}
}
}