From bfdcdb1f3b259cccd64e06b5929146dc216c83c5 Mon Sep 17 00:00:00 2001 From: aeqw89 Date: Sat, 19 Apr 2025 20:47:58 +0300 Subject: [PATCH] Add project files. --- Beam.Dynamic/Beam.Dynamic.csproj | 22 +++ Beam.Dynamic/Binding.cs | 68 +++++++++ Beam.Dynamic/BindingType.cs | 7 + Beam.Dynamic/DataBindings.cs | 32 ++++ Beam.Dynamic/HtmlNodeExtensions.cs | 70 +++++++++ Beam.Dynamic/IDataProvider.cs | 10 ++ Beam.Dynamic/ListContentDataProvider.cs | 31 ++++ Beam.Dynamic/OnlineCleaner.cs | 36 +++++ .../ParagraphedContentDataProvider.cs | 35 +++++ Beam.Exports/Beam.Exports.csproj | 13 ++ Beam.Exports/Exporter.cs | 37 +++++ Beam.Exports/HtmlExporter.cs | 42 +++++ Beam.Exports/IAsyncExporter.cs | 10 ++ Beam.Exports/IExporter.cs | 19 +++ Beam.Exports/IStreamExporter.cs | 11 ++ Beam.Exports/S.cs | 13 ++ Beam.Temporary.Cli/Architecture.cs | 52 +++++++ Beam.Temporary.Cli/Beam.Temporary.Cli.csproj | 33 ++++ Beam.Temporary.Cli/CssData.cs | 30 ++++ Beam.Temporary.Cli/DataKeyExtensions.cs | 34 +++++ Beam.Temporary.Cli/File.cs | 6 + Beam.Temporary.Cli/HtmlBook.cs | 132 ++++++++++++++++ Beam.Temporary.Cli/HtmlBookTemplates.cs | 8 + Beam.Temporary.Cli/MainArchitecture.cs | 79 ++++++++++ Beam.Temporary.Cli/NovelStatics.cs | 144 ++++++++++++++++++ Beam.Temporary.Cli/Program.cs | 135 ++++++++++++++++ Beam.Temporary.Cli/SharedDataDictionary.cs | 48 ++++++ Beam.Temporary.Cli/StringExtensions.cs | 15 ++ .../Templates/Classic/ClassicTemplates.cs | 30 ++++ .../Templates/Classic/Content.template.html | 27 ++++ .../Templates/Classic/NoContent.template.html | 15 ++ .../Templates/Classic/Styles.template.css | 60 ++++++++ .../Templates/Classic/Title.template.html | 26 ++++ Beam.Temporary.Cli/TextResource.cs | 26 ++++ Beam.Temporary.Cli/Tracked.cs | 11 ++ Beam.Temporary.Cli/WebResource.cs | 28 ++++ Beam.sln | 40 +++++ Beam/ArticleData.cs | 21 +++ Beam/Beam.csproj | 25 +++ Beam/ByteDocument.cs | 15 ++ Beam/DataBackedSourceLinkGenerator.cs | 9 ++ Beam/DelegateBackedSourceLinkGenerator.cs | 48 ++++++ Beam/Document.cs | 13 ++ Beam/DocumentCache.cs | 57 +++++++ Beam/DocumentSourceLink.cs | 31 ++++ Beam/DownloadContext.cs | 89 +++++++++++ Beam/DownloadEnumerable.cs | 14 ++ Beam/Fragment.cs | 37 +++++ Beam/IAsyncEnumeratorExtensions.cs | 23 +++ Beam/IDocument.cs | 29 ++++ Beam/IDocumentExtensions.cs | 7 + Beam/IDocumentMetaData.cs | 7 + Beam/IDocumentSourceLinkFactory.cs | 11 ++ Beam/IUnitDownloader.cs | 6 + Beam/IncrementationBehaviour.cs | 17 +++ Beam/PackagedSourceLinkGenerationData.cs | 18 +++ Beam/ParallelDownloader.cs | 78 ++++++++++ Beam/RegexGenerated.cs | 8 + Beam/S.cs | 39 +++++ Beam/SequentialChunkDownloader.cs | 100 ++++++++++++ Beam/SequentialDownloader.cs | 63 ++++++++ Beam/SourceLinkEnumerable.cs | 27 ++++ Beam/StreamDocument.cs | 18 +++ Beam/StringDocument.cs | 19 +++ Beam/UnitDownloader.cs | 66 ++++++++ Beam/UnitFragmentDownloader.cs | 64 ++++++++ 66 files changed, 2394 insertions(+) create mode 100644 Beam.Dynamic/Beam.Dynamic.csproj create mode 100644 Beam.Dynamic/Binding.cs create mode 100644 Beam.Dynamic/BindingType.cs create mode 100644 Beam.Dynamic/DataBindings.cs create mode 100644 Beam.Dynamic/HtmlNodeExtensions.cs create mode 100644 Beam.Dynamic/IDataProvider.cs create mode 100644 Beam.Dynamic/ListContentDataProvider.cs create mode 100644 Beam.Dynamic/OnlineCleaner.cs create mode 100644 Beam.Dynamic/ParagraphedContentDataProvider.cs create mode 100644 Beam.Exports/Beam.Exports.csproj create mode 100644 Beam.Exports/Exporter.cs create mode 100644 Beam.Exports/HtmlExporter.cs create mode 100644 Beam.Exports/IAsyncExporter.cs create mode 100644 Beam.Exports/IExporter.cs create mode 100644 Beam.Exports/IStreamExporter.cs create mode 100644 Beam.Exports/S.cs create mode 100644 Beam.Temporary.Cli/Architecture.cs create mode 100644 Beam.Temporary.Cli/Beam.Temporary.Cli.csproj create mode 100644 Beam.Temporary.Cli/CssData.cs create mode 100644 Beam.Temporary.Cli/DataKeyExtensions.cs create mode 100644 Beam.Temporary.Cli/File.cs create mode 100644 Beam.Temporary.Cli/HtmlBook.cs create mode 100644 Beam.Temporary.Cli/HtmlBookTemplates.cs create mode 100644 Beam.Temporary.Cli/MainArchitecture.cs create mode 100644 Beam.Temporary.Cli/NovelStatics.cs create mode 100644 Beam.Temporary.Cli/Program.cs create mode 100644 Beam.Temporary.Cli/SharedDataDictionary.cs create mode 100644 Beam.Temporary.Cli/StringExtensions.cs create mode 100644 Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs create mode 100644 Beam.Temporary.Cli/Templates/Classic/Content.template.html create mode 100644 Beam.Temporary.Cli/Templates/Classic/NoContent.template.html create mode 100644 Beam.Temporary.Cli/Templates/Classic/Styles.template.css create mode 100644 Beam.Temporary.Cli/Templates/Classic/Title.template.html create mode 100644 Beam.Temporary.Cli/TextResource.cs create mode 100644 Beam.Temporary.Cli/Tracked.cs create mode 100644 Beam.Temporary.Cli/WebResource.cs create mode 100644 Beam.sln create mode 100644 Beam/ArticleData.cs create mode 100644 Beam/Beam.csproj create mode 100644 Beam/ByteDocument.cs create mode 100644 Beam/DataBackedSourceLinkGenerator.cs create mode 100644 Beam/DelegateBackedSourceLinkGenerator.cs create mode 100644 Beam/Document.cs create mode 100644 Beam/DocumentCache.cs create mode 100644 Beam/DocumentSourceLink.cs create mode 100644 Beam/DownloadContext.cs create mode 100644 Beam/DownloadEnumerable.cs create mode 100644 Beam/Fragment.cs create mode 100644 Beam/IAsyncEnumeratorExtensions.cs create mode 100644 Beam/IDocument.cs create mode 100644 Beam/IDocumentExtensions.cs create mode 100644 Beam/IDocumentMetaData.cs create mode 100644 Beam/IDocumentSourceLinkFactory.cs create mode 100644 Beam/IUnitDownloader.cs create mode 100644 Beam/IncrementationBehaviour.cs create mode 100644 Beam/PackagedSourceLinkGenerationData.cs create mode 100644 Beam/ParallelDownloader.cs create mode 100644 Beam/RegexGenerated.cs create mode 100644 Beam/S.cs create mode 100644 Beam/SequentialChunkDownloader.cs create mode 100644 Beam/SequentialDownloader.cs create mode 100644 Beam/SourceLinkEnumerable.cs create mode 100644 Beam/StreamDocument.cs create mode 100644 Beam/StringDocument.cs create mode 100644 Beam/UnitDownloader.cs create mode 100644 Beam/UnitFragmentDownloader.cs diff --git a/Beam.Dynamic/Beam.Dynamic.csproj b/Beam.Dynamic/Beam.Dynamic.csproj new file mode 100644 index 0000000..7dc4e64 --- /dev/null +++ b/Beam.Dynamic/Beam.Dynamic.csproj @@ -0,0 +1,22 @@ + + + + net9.0 + enable + enable + + + + + + + + + ..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll + + + ..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll + + + + diff --git a/Beam.Dynamic/Binding.cs b/Beam.Dynamic/Binding.cs new file mode 100644 index 0000000..c4897d6 --- /dev/null +++ b/Beam.Dynamic/Binding.cs @@ -0,0 +1,68 @@ + +using aeqw89.DataKeys; +using HtmlAgilityPack; +using System.Text.Json.Serialization; + +namespace Beam.Dynamic { + public class Binding(DataKey key) : IKeyed { + public Binding(string key) : this(new DataKey(key)) { } + public Binding() : this("") { } + + [JsonRequired] + public DataKey Key { get; set; } = key; + [JsonRequired] + public BindingType Type { get; set; } + + public string? ArrayDelimiters { get; set; } + public string? XPath { get; set; } + public string? CssPath { get; set; } + private IDataProvider? Provider_; + public IDataProvider? Provider { + get => Provider_; + set { + if (value is null) + return; + if (value is not IDataProvider) + throw new InvalidOperationException(); + var constructor = value.GetType().GetConstructor([]); + if (!constructor?.IsPublic ?? true) + throw new InvalidOperationException(); + Provider_ = value; + } + } + + public HtmlNode? ResolveNode(HtmlDocument doc) { + if (XPath is not null) + return doc.DocumentNode.SelectSingleNode(XPath); + if (CssPath is not null) + return doc.DocumentNode.ThenByClasses(CssPath.Split('/')); + if (Provider is not null) + return Provider.GetNode(doc); + return null; + } + + public string ResolveString(HtmlDocument doc) { + if (XPath is not null) + return doc.DocumentNode.SelectSingleNode(XPath)?.InnerText ?? ""; + if (CssPath is not null) + return doc.DocumentNode.ThenByClasses(CssPath.Split('/'))?.InnerText ?? ""; + if (Provider is not null) + return Provider.Get(doc); + return ""; + } + + public string[] ResolveArray(HtmlDocument doc) { + if (Type is not BindingType.Array) + return []; + var str = ResolveString(doc); + return str.Split(ArrayDelimiters); + } + + public dynamic? Resolve(HtmlDocument doc) => Type switch { + BindingType.Single => ResolveString(doc), + BindingType.Array => ResolveArray(doc), + BindingType.UseProvider => Provider?.Get(doc), + _ => null + }; + } +} diff --git a/Beam.Dynamic/BindingType.cs b/Beam.Dynamic/BindingType.cs new file mode 100644 index 0000000..d24edd5 --- /dev/null +++ b/Beam.Dynamic/BindingType.cs @@ -0,0 +1,7 @@ +namespace Beam.Dynamic { + public enum BindingType { + Single, + Array, + UseProvider + } +} diff --git a/Beam.Dynamic/DataBindings.cs b/Beam.Dynamic/DataBindings.cs new file mode 100644 index 0000000..d0c89c9 --- /dev/null +++ b/Beam.Dynamic/DataBindings.cs @@ -0,0 +1,32 @@ +using HtmlAgilityPack; + +namespace Beam.Dynamic { + public class DataBindings { + public Binding? Title { get; set; } + public Binding? Authors { get; set; } + public Binding? Description { get; set; } + public Binding? Content { get; set; } + public Binding? Language { get; set; } + public Binding? Tags { get; set; } + + public ResolvedBindings Resolve(HtmlDocument doc) { + return new ResolvedBindings() { + Title = Title?.Resolve(doc), + Authors = Authors?.Resolve(doc) ?? Array.Empty(), + Language = Language?.Resolve(doc) ?? Array.Empty(), + Content = Content?.Resolve(doc), + Description = Description?.Resolve(doc), + Tags = Tags?.Resolve(doc) ?? Array.Empty() + }; + } + } + + public class ResolvedBindings { + public string? Title { get; set; } + public string[]? Authors { get; set; } + public string? Description { get; set; } + public string? Content { get; set; } + public string[]? Language { get; set; } + public string[]? Tags { get; set; } + } +} diff --git a/Beam.Dynamic/HtmlNodeExtensions.cs b/Beam.Dynamic/HtmlNodeExtensions.cs new file mode 100644 index 0000000..2e836e6 --- /dev/null +++ b/Beam.Dynamic/HtmlNodeExtensions.cs @@ -0,0 +1,70 @@ +using HtmlAgilityPack; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; + +namespace Beam.Dynamic { + public static partial class HtmlNodeExtensions { + public static HtmlNode? ThenByClasses(this HtmlNode node, params string[] classes) { + return node.DescendCollectionTree(ThenByClass, classes); + } + + public static HtmlNode? ThenByClass(this HtmlNode node, string @class, int count = 1) { + return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.HasClass(@class)), count); + } + + public static HtmlNode? ThenByName(this HtmlNode node, string name, int count = 1) { + return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.Name == name), count); + } + + public static HtmlNode? ThenByNames(this HtmlNode node, params string[] name) { + return node.DescendCollectionTree(ThenByName, name); + } + + public static HtmlNode? DescendCollectionTree(this HtmlNode node, Func func, params T[] values) { + return node.DescendCollectionTree((a, b) => func(a, b, 1), values); + } + + public static HtmlNode? DescendCollectionTree(this HtmlNode node, Func func, params T[] values) { + HtmlNode? result = node; + foreach (var value in values) { + if (result is null) + return result; + result = func(result, value); + } + + return result; + } + + public static HtmlNode? ThenByFunc(this HtmlNode node, Func func, int count = 1) { + var ret = func(node); + if (count <= 1) + return ret; + return ret?.ThenByFunc(func, count - 1); + } + + public static string[]? SplitInnerText(this HtmlNode? node, string separators) + => node?.InnerText.Split(separators, StringSplitOptions.RemoveEmptyEntries); + + public static bool TextNodesOnly(HtmlNode n) => TextNodesOnlyRegex().Match(n.Name).Success; + + public static string InnerLineSeparatedText(this HtmlNode? node, Func? filter = null) { + if (node?.ChildNodes is null || node?.ChildNodes.Count == 0) + return ""; + return node?.ChildNodes + .Where(filter ?? ((x) => true)) + .DefaultIfEmpty() + .Select((x) => x?.InnerText) + .Where((x) => !string.IsNullOrWhiteSpace(x)) + .DefaultIfEmpty() + .Aggregate((x, y) => $"{x}\n{y}") + ?? ""; + } + + [GeneratedRegex("p|h\\d")] + private static partial Regex TextNodesOnlyRegex(); + } +} diff --git a/Beam.Dynamic/IDataProvider.cs b/Beam.Dynamic/IDataProvider.cs new file mode 100644 index 0000000..e2e43b8 --- /dev/null +++ b/Beam.Dynamic/IDataProvider.cs @@ -0,0 +1,10 @@ +using HtmlAgilityPack; + +namespace Beam.Dynamic { + [System.Text.Json.Serialization.JsonDerivedType(typeof(ParagraphedContentDataProvider), 20)] + [System.Text.Json.Serialization.JsonDerivedType(typeof(ListContentDataProvider), 21)] + public interface IDataProvider { + public string Get(HtmlDocument document); + public HtmlNode? GetNode(HtmlDocument document); + } +} \ No newline at end of file diff --git a/Beam.Dynamic/ListContentDataProvider.cs b/Beam.Dynamic/ListContentDataProvider.cs new file mode 100644 index 0000000..9ecea32 --- /dev/null +++ b/Beam.Dynamic/ListContentDataProvider.cs @@ -0,0 +1,31 @@ +using HtmlAgilityPack; +using System.Text; + +namespace Beam.Dynamic { + public class ListContentDataProvider : IDataProvider { + public Binding? Content { get; set; } + + public string Get(HtmlDocument document) { + if (Content is null) + return ""; + + var node = Content.ResolveNode(document); + if (node is null) + return ""; + + StringBuilder content = new(); + foreach(var childNode in node.ChildNodes.SkipLast(1)) { + if (childNode.Name != "li") + continue; + content.Append(childNode.InnerText.Trim() + ";"); + } + + content.Append(node.ChildNodes.Last().InnerText.Trim()); + return content.ToString(); + } + + public HtmlNode? GetNode(HtmlDocument document) { + return Content?.ResolveNode(document); + } + } +} diff --git a/Beam.Dynamic/OnlineCleaner.cs b/Beam.Dynamic/OnlineCleaner.cs new file mode 100644 index 0000000..6ccd781 --- /dev/null +++ b/Beam.Dynamic/OnlineCleaner.cs @@ -0,0 +1,36 @@ +using HtmlAgilityPack; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using System.Web; + +namespace Beam.Dynamic { + public static partial class OnlineCleaner { + [GeneratedRegex("&#x?[\\d\\w]{1,4};")] + public static partial Regex MochaBlendUnicodeEscapeSequence(); + + private static string UnicodeEscapeSequences(string text) { + return MochaBlendUnicodeEscapeSequence().Replace(text, (x) => { + int numOfDigits = x.Value.Length - 3; + int sequence = 0; + if (x.Value[2] == 'x') + sequence = int.Parse(x.Value[3..(3 + (numOfDigits - 1))], System.Globalization.NumberStyles.HexNumber); + else + sequence = int.Parse(x.Value[2..(2 + numOfDigits)]); + var uni = Encoding.Unicode.GetString(BitConverter.GetBytes(sequence)); + return uni.Length == 1 ? uni : uni[0].ToString(); + }); + } + + public static string Clean(string? onlineText) { + if (string.IsNullOrWhiteSpace(onlineText)) + return ""; + var decoded = HttpUtility.HtmlDecode(onlineText); + var escaped = UnicodeEscapeSequences(onlineText); + return escaped; + } + } +} diff --git a/Beam.Dynamic/ParagraphedContentDataProvider.cs b/Beam.Dynamic/ParagraphedContentDataProvider.cs new file mode 100644 index 0000000..2262475 --- /dev/null +++ b/Beam.Dynamic/ParagraphedContentDataProvider.cs @@ -0,0 +1,35 @@ +using HtmlAgilityPack; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Dynamic { + public class ParagraphedContentDataProvider : IDataProvider { + public Binding? Content { get; set; } + + public string Get(HtmlDocument document) { + if (Content is null) + return ""; + + var node = Content.ResolveNode(document); + if (node is null) + return ""; + + StringBuilder content = new(); + foreach(var childNode in node.ChildNodes) { + if (childNode.Name != "p") + continue; + content.AppendLine(childNode.InnerText); + } + + return content.ToString(); + } + + public HtmlNode? GetNode(HtmlDocument document) { + return Content?.ResolveNode(document); + } + + } +} diff --git a/Beam.Exports/Beam.Exports.csproj b/Beam.Exports/Beam.Exports.csproj new file mode 100644 index 0000000..9d971be --- /dev/null +++ b/Beam.Exports/Beam.Exports.csproj @@ -0,0 +1,13 @@ + + + + net9.0 + enable + enable + + + + + + + diff --git a/Beam.Exports/Exporter.cs b/Beam.Exports/Exporter.cs new file mode 100644 index 0000000..8a6dab1 --- /dev/null +++ b/Beam.Exports/Exporter.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Exports { + public class PlainTextExporter : IExporter, IAsyncExporter { + public PlainTextExporter(IDocument document) { + Document = document; + } + + public IDocument Document { get; } + + protected virtual string Convert() { + return Document.ToString(); + } + + protected virtual Task ConvertAsync() { + return Task.FromResult(Document.ToString()); + } + + public virtual void Write(string path) { + var text = Convert(); + if (!Directory.Exists(Path.GetDirectoryName(path))) + throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path)); + File.WriteAllText(path, text, Encoding.Unicode); + } + + public virtual async Task WriteAsync(string path) { + var text = await ConvertAsync(); + if (!Directory.Exists(path)) + throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path)); + await File.WriteAllTextAsync(path, text); + } + } +} diff --git a/Beam.Exports/HtmlExporter.cs b/Beam.Exports/HtmlExporter.cs new file mode 100644 index 0000000..8bcd87c --- /dev/null +++ b/Beam.Exports/HtmlExporter.cs @@ -0,0 +1,42 @@ +using System.Text; + +namespace Beam.Exports { + public class HtmlExporter : PlainTextExporter { + public HtmlExporter(IDocument document, + ArticleData? meta = null, + Dictionary? linkButtons = null, + string? eofHtml = null) : base(document) { + Meta = meta; + LinkButtons = linkButtons; + EofHtml = eofHtml; + } + + public ArticleData? Meta { get; } + public Dictionary? LinkButtons { get; } + public string? EofHtml { get; } + + protected override string Convert() { + var text = Document.ToString(); + // Convert newlines to

tags + text = "

" + text.Replace("\n", "

") + "

"; + if (Meta is null) + return text; + text = $"

{Meta.Name}

" + text; + if (LinkButtons is null || LinkButtons.Count == 0) + return text; + StringBuilder buttons = new(); + foreach(var (btnText, btnLink) in LinkButtons.Select((x) => (x.Key, x.Value))) { + buttons.AppendLine($"{btnText}"); + } + var buttonsDiv = $"
{buttons}
"; + text = buttonsDiv + text + buttonsDiv; + text += EofHtml ?? ""; + text = "\n" + text + ""; + return text; + } + + protected override Task ConvertAsync() { + return Task.FromResult(Convert()); + } + } +} diff --git a/Beam.Exports/IAsyncExporter.cs b/Beam.Exports/IAsyncExporter.cs new file mode 100644 index 0000000..de0e4ef --- /dev/null +++ b/Beam.Exports/IAsyncExporter.cs @@ -0,0 +1,10 @@ +namespace Beam.Exports { + public interface IAsyncExporter : IExporter { + /// + /// Asynchronously writes the object to the desired path, creating it if it does not exist. + /// + /// The path of the exported object + /// + public Task WriteAsync(string path); + } +} diff --git a/Beam.Exports/IExporter.cs b/Beam.Exports/IExporter.cs new file mode 100644 index 0000000..34e713a --- /dev/null +++ b/Beam.Exports/IExporter.cs @@ -0,0 +1,19 @@ +namespace Beam.Exports { + public interface IExporter { + /// + /// Synchronously writes the object to the desired path, creating it if it does not exist. + /// + /// The path of the exported object + public void Write(string path); + + protected void EnsurePathExists(string path) { + if (File.Exists(path)) { + File.Delete(path); + return; + } + else if (!Directory.Exists(path)) + Directory.CreateDirectory(path); + } + + } +} diff --git a/Beam.Exports/IStreamExporter.cs b/Beam.Exports/IStreamExporter.cs new file mode 100644 index 0000000..8ca934c --- /dev/null +++ b/Beam.Exports/IStreamExporter.cs @@ -0,0 +1,11 @@ +namespace Beam.Exports { + public interface IStreamExporter : IAsyncExporter { + /// + /// Asynchronously writes the object to the desired path in many parts, returning the path + /// of each written file as a stream + /// + /// The path of the exported object + /// The async enumerator of each written file + public IAsyncEnumerator WriteAsyncStream(string path); + } +} diff --git a/Beam.Exports/S.cs b/Beam.Exports/S.cs new file mode 100644 index 0000000..9a4ab9b --- /dev/null +++ b/Beam.Exports/S.cs @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Exports { + internal static class S { + internal static class M { + internal const string FileDirectoryDoesNotExist = "Part of the path supplied does not exist."; + } + } +} diff --git a/Beam.Temporary.Cli/Architecture.cs b/Beam.Temporary.Cli/Architecture.cs new file mode 100644 index 0000000..3c8d175 --- /dev/null +++ b/Beam.Temporary.Cli/Architecture.cs @@ -0,0 +1,52 @@ +using aeqw89.DataKeys; +using HtmlAgilityPack; +using Microsoft.Extensions.Logging; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Temporary.Cli { + /// + /// + /// A collection of specific useful methods and constants that facilitate the use of the application; allows other parts of the application to depend on architecture-specific arbitrary choices without compromising the Single-Responsibility principle or increasing redundant code. + /// + /// + partial interface IArchitecture { + /// + /// Gets the metadata associated with a + /// + /// The web client to use when downloading s + /// The key of the stored in the + /// The to be used to retrieve information + /// Optional logger for logging debug information + /// A object with the required information to perform the download + public DownloadContext? GetMeta(HtmlWeb web, DataKey pieceKey, SharedDataDictionary sdd, ILogger? logger = null); + /// + /// Gets the of the text record associated with + /// + /// The web client to use when downloading s + /// The key of the stored in the + /// The to be used to retrieve information + /// Optional book metadata to include with the final text record + /// Optional logger for logging debug information + /// A object with the required information to perform the download + public DownloadContext? GetTextRecord(HtmlWeb web, DataKey pieceKey, SharedDataDictionary sdd, IDocumentMetaData? metadata = null, ILogger? logger = null); + + /// + /// The to use when looking for the chapter metadata + /// + public DataKey ChapterKey { get; set; } + + /// + /// The to use when looking for the book metadata + /// + public DataKey BookKey { get; set; } + + /// + /// The default architecture + /// + public static IArchitecture Default => new MainArchitecture(); + } +} diff --git a/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj b/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj new file mode 100644 index 0000000..fc4622c --- /dev/null +++ b/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj @@ -0,0 +1,33 @@ + + + + Exe + net9.0 + enable + enable + + + + + + + + + + + + + + + + + + + ..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll + + + ..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll + + + + diff --git a/Beam.Temporary.Cli/CssData.cs b/Beam.Temporary.Cli/CssData.cs new file mode 100644 index 0000000..aa1c5f1 --- /dev/null +++ b/Beam.Temporary.Cli/CssData.cs @@ -0,0 +1,30 @@ +namespace Beam.Temporary.Cli { + public class CssData { + // Primary background color (e.g., for the body) + public string PrimaryColor { get; set; } = "#f5f5f5"; + + // Secondary color (e.g., for header background) + public string SecondaryColor { get; set; } = "#e0e0e0"; + + // Tertiary color (e.g., for content sections) + public string TertiaryColor { get; set; } = "#ffffff"; + + // Button background color + public string ButtonColor { get; set; } = "#007bff"; + + // Foreground text color + public string ForegroundColor { get; set; } = "#333333"; + + // Font family for main content + public string ContentFont { get; set; } = "Arial, sans-serif"; + + // Font size for main content + public string ContentFontSize { get; set; } = "16px"; + + // Font family for titles + public string TitleFont { get; set; } = "Georgia, serif"; + + // Font size for titles + public string TitleFontSize { get; set; } = "32px"; + } +} diff --git a/Beam.Temporary.Cli/DataKeyExtensions.cs b/Beam.Temporary.Cli/DataKeyExtensions.cs new file mode 100644 index 0000000..91f9d37 --- /dev/null +++ b/Beam.Temporary.Cli/DataKeyExtensions.cs @@ -0,0 +1,34 @@ + + +using aeqw89.DataKeys; + +namespace Beam.Temporary.Cli { + internal static class DataKeyExtensions { + public static DataKey WithNamespace(this DataKey dk, string @namespace) { + string[] names = @namespace.Split(':'); + var agg = (string x, string y) => $"{x}:{y}"; + for (int i = 0; i < names.Length; i++) { + string test = names.SkipLast(i).Aggregate(agg); + if (dk.Identifier.StartsWith(test)) { + return new DataKey(dk.Identifier.Replace(test, @namespace)); + } + } + + return new DataKey(@namespace + ":" + dk.Identifier); + } + + public static DataKey WithNamespace(this DataKey dk, string @namespace) { + return ((DataKey)dk).WithNamespace(@namespace).As(); + } + + public static DataKey WithSuffix(this DataKey dk, string suffix) { + return new DataKey(dk.Identifier + suffix); + } + + public static DataKey ToAggregator(this DataKey dk) + => dk.WithNamespace("aeqw89:document:aggregators"); + public static DataKey ToAuxiliary(this DataKey dk) + => dk.WithNamespace("aeqw89:document:auxillaries"); + public static DataKey As(this DataKey dk) => new DataKey(dk.Identifier); + } +} diff --git a/Beam.Temporary.Cli/File.cs b/Beam.Temporary.Cli/File.cs new file mode 100644 index 0000000..6b3b95f --- /dev/null +++ b/Beam.Temporary.Cli/File.cs @@ -0,0 +1,6 @@ +namespace Beam.Temporary.Cli { + internal class File(string path, params string[] tags) { + public string Path { get; set; } = path; + public string[] Tags { get; set; } = tags; + } +} diff --git a/Beam.Temporary.Cli/HtmlBook.cs b/Beam.Temporary.Cli/HtmlBook.cs new file mode 100644 index 0000000..19e8897 --- /dev/null +++ b/Beam.Temporary.Cli/HtmlBook.cs @@ -0,0 +1,132 @@ +//using aeqw89.DataKeys; +//using System; +//using System.Collections.Generic; +//using System.Linq; +//using System.Text; +//using System.Threading.Tasks; + +//namespace Beam.Temporary.Cli { +// internal class HtmlBook : Document { +// public class Keys { +// public static DataKey ContentPage => new DataKey("content_page"); +// public static DataKey NoContentPage => new DataKey("no_content_page"); +// public static DataKey TitlePage => new DataKey("title_page"); +// public static DataKey StylesPage => new DataKey("styles_page"); +// } + +// public List> Documents { get; set; } +// public IReadOnlyList Pages => _Pages; +// private List _Pages { get; set; } = []; + +// private const string EMTPY_PAGE = "EMPTY"; + +// public CssData CssData { get; } +// public ArticleData BookData { get; set; } +// public HtmlBookTemplates Templates { get; set; } + +// public HtmlBook(string bookname, CssData cssData, ArticleData bookData, HtmlBookTemplates templates, List? documents = null, Encoding? encoding = null) +// : base(bookname, encoding) { +// Documents = []; +// CssData = cssData; +// BookData = bookData; +// Templates = templates; +// if (documents is not null) +// Documents = documents.Select((x) => new Tracked(x)).ToList(); +// } + +// public void Update(bool ignoreDirty = false) { +// if (!Directory.Exists(Filename)) +// Directory.CreateDirectory(Filename); + +// //System.IO.File.WriteAllLines(Path.Combine(Filename, "styles.css"), Format()) + +// List newpages = []; +// if (Pages.Count < Documents.Count) +// _Pages.AddRange(Enumerable.Repeat(EMTPY_PAGE, Documents.Count - Pages.Count)); +// foreach (var (doc, page) in Documents.Zip(Pages)) { +// if (!doc.IsDirty) +// newpages.Add(page); +// else if (doc.TrackedObject.MetaData.Count == 0) +// newpages.Add(PlainPage(doc.TrackedObject)); +// else if (doc.TrackedObject.MetaData.TryGetValue(Program.Architecture.ChapterKey, out var meta) && meta is ArticleData articleData) +// newpages.Add(ArticlePage(doc.TrackedObject, articleData)); +// else { +// Console.WriteLine("Unhandlable Metadata detected!"); +// newpages.Add(PlainPage(doc.TrackedObject)); +// } + +// System.IO.File.WriteAllText(Path.Combine(Filename, Path.GetRandomFileName() + ".html"), newpages[^1]); +// doc.IsDirty = false; +// } + +// _Pages = newpages; +// } + +// public void UpdateCss() { + +// } + +// public void UpateTitle() { + +// } + +// private string Format(string template, Dictionary table) { +// ArgumentNullException.ThrowIfNull(template); +// ArgumentNullException.ThrowIfNull(table); + +// foreach (var kvp in table) { +// template = template.Replace(kvp.Key, kvp.Value); +// } +// return template; +// } + +// private Dictionary GetDocumentTable(IDocument doc, bool keepPlaceholders = false) { +// var table = new Dictionary() { +// { "{" + nameof(doc.Filename) + "}", doc.Filename }, +// { "{Content}", doc.ToString() } +// }; + +// return SolvePlaceholders(table, keepPlaceholders); +// } + +// private Dictionary GetArticleDataTable(IDocument doc, ArticleData ad, bool keepPlaceholders = false) { +// var table = new Dictionary() { +// { "{" + nameof(ad.Language) + "}", ad.Language ?? "" }, +// { "{" + nameof(ad.Authors) + "}", ad.Authors.Aggregate("; ")}, +// { "{" + nameof(ad.Categories) + "}", ad.Categories.Aggregate("; ") }, +// { "{" + nameof(ad.Version) + "}", ad.Version ?? "" }, +// { "{" + nameof(ad.Description) + "}", ad.Description ?? "" }, +// { "{" + nameof(ad.Name) + "}", ad.Name ?? "" }, +// { "{" + nameof(doc.Filename) + "}", doc.Filename }, +// { "{Content}", doc.ToString() } +// }; + +// return SolvePlaceholders(table, keepPlaceholders); +// } + +// private Dictionary SolvePlaceholders(Dictionary table, bool keepPlaceholders) { +// if (keepPlaceholders) +// return table.Select( +// (x) => new KeyValuePair(x.Key, x.Value == "" ? $"{x.Key}" : x.Value)) +// .ToDictionary(); +// return table; +// } + +// private string PlainPage(IDocument doc, bool keepPlaceholders = false) { +// return Format(Templates.ContentPageTemplate, GetDocumentTable(doc, keepPlaceholders)); +// } + +// private string ArticlePage(IDocument doc, ArticleData data, bool keepPlaceholders = false) { +// return Format(Templates.ContentPageTemplate, GetArticleDataTable(doc, data, keepPlaceholders)); +// } + +// public override byte[] ToBytes() { +// throw new NotImplementedException(); +// } + +// public override string ToString() { +// throw new NotImplementedException(); +// } + +// } +//} diff --git a/Beam.Temporary.Cli/HtmlBookTemplates.cs b/Beam.Temporary.Cli/HtmlBookTemplates.cs new file mode 100644 index 0000000..932e901 --- /dev/null +++ b/Beam.Temporary.Cli/HtmlBookTemplates.cs @@ -0,0 +1,8 @@ +namespace Beam.Temporary.Cli { + internal struct HtmlBookTemplates { + public string TitlePageTemplate { get; set; } + public string ContentPageTemplate { get; set; } + public string CssTemplate { get; set; } + public string NoContentTemplate { get; set; } + } +} diff --git a/Beam.Temporary.Cli/MainArchitecture.cs b/Beam.Temporary.Cli/MainArchitecture.cs new file mode 100644 index 0000000..2a13610 --- /dev/null +++ b/Beam.Temporary.Cli/MainArchitecture.cs @@ -0,0 +1,79 @@ +using aeqw89.DataKeys; +using Beam.Dynamic; +using HtmlAgilityPack; +using Microsoft.Extensions.Logging; + +namespace Beam.Temporary.Cli { + partial interface IArchitecture { + private class MainArchitecture : IArchitecture { + public MainArchitecture() { } + + public DataKey ChapterKey { get; set; } = new("ma:chapter"); + public DataKey BookKey { get; set; } = new("ma:book"); + + public DownloadContext? GetMeta(HtmlWeb web, DataKey pieceKey, SharedDataDictionary sdd, ILogger? logger = null) { + var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd + var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd + + // null checks + if (auxiliary is null) // aux is required to get metadata + return null; + if (piece?.Resource?.MetaTemplateInitialData is null) // sanity check to avoid null warnings + return null; + + // gets the link for the novel's metadata using the auxillary data retrieved from the sdd + var link = sdd.Templates[auxiliary.Resource.Key].GenerateLink(piece?.Resource?.MetaTemplateInitialData!); + var binding = auxiliary.Bindings; + + return new DownloadContext(web, [link], downloadLogger: logger, transformer: (x) => { + return new ArticleData() { + Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")], + Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""), + Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [], + Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "") + }; + }); + } + + public DownloadContext? GetTextRecord(HtmlWeb web, DataKey resKey, SharedDataDictionary sdd, IDocumentMetaData? metaData = null, ILogger? logger = null) { + var res = sdd.Novels[resKey].ToRecord(sdd); // retrieves the novel data from the sdd + var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd + + if (aggregator is null) // ensure aggergator data was retrieved successfully + return null; + if (res is null) // ensure novel data was retrieved successfully + return null; + + var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator + + // creates a generative enumerable of type link from 'template' + var sle = SourceLinkEnumerable.FromGenerator(new DataBackedSourceLinkGenerator( + template, res.Resource.TemplateInitialData)); + + return new DownloadContext(web, sle, + transformer: (x) => { + var resolved = aggregator.Bindings.Resolve(x); + var articleData = new ArticleData() { + Name = OnlineCleaner.Clean(resolved.Title), + }; + Dictionary, IDocumentMetaData> meta = []; + meta.Add(ChapterKey, articleData); + if (metaData is not null) + meta.Add(BookKey, metaData); + return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved.Content)) { + MetaData = meta + }; + }, + retryReporter: new Progress((x) => Console.WriteLine($"Retrying download ({x})")), + downloadReporter: new Progress((x) => Console.WriteLine($"Downloaded ({x.Filename})")), + asyncFailurePredicates: [ + (x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("
")) + ], + timeOut: TimeSpan.FromSeconds(15), + downloadLogger: logger + ); + } + + } + } +} diff --git a/Beam.Temporary.Cli/NovelStatics.cs b/Beam.Temporary.Cli/NovelStatics.cs new file mode 100644 index 0000000..7183ee7 --- /dev/null +++ b/Beam.Temporary.Cli/NovelStatics.cs @@ -0,0 +1,144 @@ + + +using aeqw89.DataKeys; +using Beam.Dynamic; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Temporary.Cli { + + internal static class NovelStatics { + public static void Define_LightNovelWorld_Novel_TheLegendaryMechanic(SharedDataDictionary sdd) { + var lnwAggregator = new DataKey("aeqw89:document:aggregators:light_novel_world"); + var lnwAuxiliary = new DataKey("aeqw89:document:auxillaries:light_novel_world"); + var novel = new TextResource() { + Key = new DataKey("novels:the_legendary_mechanic"), + AssociatedSource = lnwAggregator, + AssociatedMetaSource = lnwAuxiliary, + TemplateInitialData = ["the-legendary-mechanic-245", "1"], + MetaTemplateInitialData = ["the-legendary-mechanic"] + }; + sdd.Novels.TryAdd(novel.Key, novel); + + sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]); + } + + public static void Define_LightNovelWorl_Novel_IAloneLevelUp(SharedDataDictionary sdd) { + var lnwAggregator = new DataKey("light_novel_world").ToAggregator().As(); + var lnwAuxiliary = new DataKey("light_novel_world").ToAuxiliary().As(); + var novel = new TextResource() { + Key = new DataKey("novels:i_alone_level_up"), + AssociatedSource = lnwAggregator, + AssociatedMetaSource = lnwAuxiliary, + TemplateInitialData = ["i-alone-level-up-236", "1"], + MetaTemplateInitialData = ["i-alone-level-up-solo-leveling-05122225"] + }; + + sdd.Novels.TryAdd(novel.Key, novel); + + sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]); + } + + public static void Define_NovelFull(SharedDataDictionary sdd) { + var docNamespace = "aeqw89:document"; + var nfAgg = new DataKey("aggregators:novel_full").WithNamespace(docNamespace); + var nfAux = new DataKey("auxillaries:novel_full").WithNamespace(docNamespace); + var nfBindings = new DataKey("aeqw89:bindings:light_novel_world"); + var aggregator = new WebResource(nfAgg) { + Name = "Novel Full", + Description = "A novel aggregator site", + Domain = "https://novelfull.net", + Bindings = nfBindings + }; + var auxiliary = new WebResource(nfAux) { + Name = "Novel Full", + Description = "A novel aggregator site", + Domain = "https://novelfull.net", + Bindings = nfBindings.WithSuffix("_aux") + }; + + sdd.Templates.TryAdd(nfAgg, new() { + Template = "" + }); + } + + public static void Define_LightNovelWorld(SharedDataDictionary sdd) { + var lnwAggregator = new DataKey("aeqw89:document:aggregators:light_novel_world"); + var lnwAuxiliary = new DataKey("aeqw89:document:auxillaries:light_novel_world"); + const string lnwBindingsA = "aeqw89:bindings:light_novel_world"; + var aggregator = new WebResource(lnwAggregator) { + Name = "Light Novel World", + Description = "A novel aggregator site maintained by NetherClaw", + Domain = "https://www.lightnovelworld.co", + Bindings = new DataKey(lnwBindingsA) + }; + const string lnwBindingsB = "aeqw89:bindings:light_novel_world_aux"; + var auxiliary = new WebResource(lnwAuxiliary) { + Name = "Light Novel World", + Description = "A novel aggregator site maintained by NetherClaw", + Domain = "https://www.lightnovelworld.co", + Bindings = new DataKey(lnwBindingsB) + }; + + sdd.Templates.TryAdd(lnwAuxiliary, new() { + Template = "https://www.lightnovelworld.co/novel/{0}", + IndexOfChapterIndex = -1 + }); + sdd.Templates.TryAdd(lnwAggregator, new() { + Template = "https://www.lightnovelworld.co/novel/{0}/chapter-{1}", + IndexOfChapterIndex = 1 + }); + + sdd.Aggregators.TryAdd(aggregator.Key, aggregator); + sdd.Auxillaries.TryAdd(auxiliary.Key, auxiliary); + + var lnwBindings = new DataKey(lnwBindingsA); + var lnwBindingsAux = new DataKey(lnwBindingsB); + sdd.Bindings.TryAdd(lnwBindings, new DataBindings() { + Title = new Binding("aeqw89:binding:light_novel_world:title") { + XPath = "/html/body/main/article/section/div[1]/h1/span[2]", + Type = BindingType.Single + }, + Content = new("aeqw89:binding:light_novel_world:content") { + Provider = new ParagraphedContentDataProvider() { + Content = new Binding() { + XPath = "//*[@id=\"chapter-container\"]" + } + }, + Type = BindingType.UseProvider + }, + }); + sdd.Bindings.TryAdd(lnwBindingsAux, new DataBindings() { + Title = new("aeqw89:binding:light_novel_world_aux:title") { + XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/h1", + Type = BindingType.Single + }, + Authors = new("aeqw89:binding:light_novel_world_aux:authors") { + XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/div[1]/a", + Type = BindingType.Single + }, + Description = new("aeqw89:binding:light_novel_world_aux:description") { + Provider = new ParagraphedContentDataProvider() { + Content = new() { + XPath = "/html/body/main/article/div/section/div[1]/div" + } + }, + Type = BindingType.UseProvider + }, + Tags = new("aeqw89:binding:light_novel_world_aux:tags") { + Provider = new ListContentDataProvider() { + Content = new() { + XPath = "/html/body/main/article/header/div[2]/div[2]/div[3]/ul" + } + }, + Type = BindingType.UseProvider + } + }); + } + + + } +} diff --git a/Beam.Temporary.Cli/Program.cs b/Beam.Temporary.Cli/Program.cs new file mode 100644 index 0000000..5958b63 --- /dev/null +++ b/Beam.Temporary.Cli/Program.cs @@ -0,0 +1,135 @@ +using aeqw89.PersistentData; +using aeqw89.DataKeys; +using Beam.Dynamic; +using HtmlAgilityPack; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using System.Text.Json; +using System.Text.Json.Serialization; +using System.Text.Json.Serialization.Metadata; +using Beam.Temporary.Cli.Templates.Classic; +using Beam.Exports; + +namespace Beam.Temporary.Cli { + internal class Program { + + public static JsonSerializerOptions ConversionOptions { get; internal set; } = new(); + + public static SharedDataDictionary Shared { get; set; } = []; + + public static IArchitecture Architecture = IArchitecture.Default; + + const string SharedDataPath = "data/.dat"; + + static async Task Main(string[] args) { + ConversionOptions.Converters.AddPersistentDataRequiredConverters(); + ConversionOptions.WriteIndented = true; + + var web = new HtmlWeb(); + + var lf = LoggerFactory.Create((x) => { + x.AddConsole(); + }); + + ILogger logger = lf + .CreateLogger("Program"); + + await using var sharedContext = await DataDictionaryContext.Create( + SharedDataPath, + DataKind.Shared, + logger, + ConversionOptions + ); + + Shared = sharedContext.Data; + + Shared.Clear(); + NovelStatics.Define_LightNovelWorld(Shared); + NovelStatics.Define_LightNovelWorld_Novel_TheLegendaryMechanic(Shared); + NovelStatics.Define_LightNovelWorl_Novel_IAloneLevelUp(Shared); + ClassicTemplates.Register(Shared); + + var novel = new DataKey("novels:i_alone_level_up"); + var context_aux = Architecture.GetMeta(web, novel, Shared); + var metaDownloader = new DownloadEnumerable( + new SequentialFragmentDownloader( + context_aux, + (c) => new UnitFragmentDownloader(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger), + logger) + .UnwrapFragmented()); + var metadata = (await metaDownloader.FirstAsync()); + + var context = Architecture.GetTextRecord(web, novel, Shared, metadata.Data); + context.DownloadReporter = new Progress((x) => Console.WriteLine(x.Filename)); + var downloader = new DownloadEnumerable( + new SequentialFragmentDownloader( + context, + (c) => new UnitFragmentDownloader(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger), + logger) + .UnwrapFragmented()); + + List> documents = []; + + await foreach (var download in downloader.Take(20)) { + if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta)) + continue; + if (meta is not ArticleData articleMetaData) + continue; + //Console.WriteLine($"Title: {data.Name}"); + //Console.WriteLine($"Description: {data.Description}"); + //Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}"); + //Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}"); + Console.WriteLine($"Chapter title: {articleMetaData.Name}"); + //Console.WriteLine($"Content: {download}"); + + documents.Add(download); + } + + string testDir = Path.Combine("txt", Path.GetRandomFileName()); + Directory.CreateDirectory(testDir); + + int len = documents.MaxBy((x) => x.Order)?.Order ?? -1; + foreach (var document in documents.OrderBy((x) => x.Order)) { + document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData); + Dictionary linkButtons = new(); + if (document.Order != 0) + linkButtons.Add("Previous", $"{document.Order - 1}.html"); + if (document.Order != len) + linkButtons.Add("Next", $"{document.Order + 1}.html"); + new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html")); + } + + Console.ReadKey(); + + //foreach (var download in documents.OrderBy((x) => x.Order)) { + // if (download.Data.TryGetTaggedMetaData(Architecture.ChapterKey, out var meta)) + // Console.WriteLine($"{download.Order}:{meta.Name}"); + //} + + //string[] templates = new DataKey[] { + // HtmlBook.Keys.ContentPage, + // HtmlBook.Keys.NoContentPage, + // HtmlBook.Keys.TitlePage, + // HtmlBook.Keys.StylesPage, + //}.Select( + // (x) => Shared.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic")) + //).ToArray(); + + //HtmlBook book = new( + // bookname: Path.Combine(Path.GetRandomFileName(), "I Alone Level Up"), + // new CssData(), + // new ArticleData(), + // new HtmlBookTemplates() { + // ContentPageTemplate = templates[0], + // NoContentTemplate = templates[1], + // TitlePageTemplate = templates[2], + // CssTemplate = templates[3], + // }, + // documents: documents.Select((x) => x.Data).ToList() + //); + + //book.Update(); + //Console.WriteLine("One variable!"); + } + } +} diff --git a/Beam.Temporary.Cli/SharedDataDictionary.cs b/Beam.Temporary.Cli/SharedDataDictionary.cs new file mode 100644 index 0000000..c39edda --- /dev/null +++ b/Beam.Temporary.Cli/SharedDataDictionary.cs @@ -0,0 +1,48 @@ +using aeqw89.PersistentData; +using aeqw89.DataKeys; +using Beam.Dynamic; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.Json; +using System.Threading.Tasks; + +namespace Beam.Temporary.Cli { + public class SharedDataDictionary : BaseDataDictionary { + public Dictionary, PackagedSourceLinkGenerationData> Templates { + get => GetOrCreateDictionary, PackagedSourceLinkGenerationData>(nameof(Templates)); + set => Data[nameof(Templates)] = value; + } + + public Dictionary, WebResource> Aggregators { + get => GetOrCreateDictionary, WebResource>(nameof(Aggregators)); + set => Data[nameof(Aggregators)] = value; + } + + public Dictionary, WebResource> Auxillaries { + get => GetOrCreateDictionary, WebResource>(nameof(Auxillaries)); + set => Data[nameof(Auxillaries)] = value; + } + + public Dictionary, DataBindings> Bindings { + get => GetOrCreateDictionary, DataBindings>(nameof(Bindings)); + set => Data[nameof(Bindings)] = value; + } + + public Dictionary, HashSet>> AggregatorNovels { + get => GetOrCreateDictionary, HashSet>>(nameof(AggregatorNovels)); + set => Data[nameof(AggregatorNovels)] = value; + } + + public Dictionary, TextResource> Novels { + get => GetOrCreateDictionary, TextResource>(nameof(Novels)); + set => Data[nameof(Novels)] = value; + } + + internal Dictionary, File> Files { + get => GetOrCreateDictionary, File>(nameof(Files)); + set => Data[nameof(Files)] = value; + } + } +} diff --git a/Beam.Temporary.Cli/StringExtensions.cs b/Beam.Temporary.Cli/StringExtensions.cs new file mode 100644 index 0000000..8e95d2b --- /dev/null +++ b/Beam.Temporary.Cli/StringExtensions.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Temporary.Cli { + public static class StringExtensions { + public static string Aggregate(this IEnumerable str, string separator) { + if (!str.Any()) + return string.Empty; + return str.Aggregate((x, y) => $"{x}{separator}{y}"); + } + } +} diff --git a/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs b/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs new file mode 100644 index 0000000..6b2ac8a --- /dev/null +++ b/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs @@ -0,0 +1,30 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Temporary.Cli.Templates.Classic { + internal class ClassicTemplates { + public static void Register(SharedDataDictionary sdd) { + sdd.Files.TryAdd( + new("aeqw89:files:templates:classic:content_page"), + new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Content.template.html", "htmlpage", "templates")); + sdd.Files.TryAdd( + new("aeqw89:files:templates:classic:title_page"), + new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Title.template.html", "htmlpage", "templates")); + sdd.Files.TryAdd( + new("aeqw89:files:templates:classic:styles_page"), + new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Styles.template.css", "styles", "templates")); + sdd.Files.TryAdd( + new("aeqw89:files:templates:classic:no_content_page"), + new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\NoContent.template.html", "htmlpage", "templates")); + } + } + + internal static class DictionaryOfFileExtensions { + public static string ReadToString(this Dictionary dict, T key) where T: notnull { + return System.IO.File.ReadAllText(dict[key].Path); + } + } +} diff --git a/Beam.Temporary.Cli/Templates/Classic/Content.template.html b/Beam.Temporary.Cli/Templates/Classic/Content.template.html new file mode 100644 index 0000000..6ee9342 --- /dev/null +++ b/Beam.Temporary.Cli/Templates/Classic/Content.template.html @@ -0,0 +1,27 @@ + + + + + {Name} + + + +
+

{Name}

+

{Description}

+
+ Authors: {Authors} | + Language: {Language} | + Categories: {Categories} | + Version: {Version} +
+
+
+ {Content} +
+ + + diff --git a/Beam.Temporary.Cli/Templates/Classic/NoContent.template.html b/Beam.Temporary.Cli/Templates/Classic/NoContent.template.html new file mode 100644 index 0000000..f41ef6a --- /dev/null +++ b/Beam.Temporary.Cli/Templates/Classic/NoContent.template.html @@ -0,0 +1,15 @@ + + + + + 404 - Not Found + + + +
+

404 - Content Not Found

+

The file {Filename} was not found.

+

{Content}

+
+ + diff --git a/Beam.Temporary.Cli/Templates/Classic/Styles.template.css b/Beam.Temporary.Cli/Templates/Classic/Styles.template.css new file mode 100644 index 0000000..b9154d9 --- /dev/null +++ b/Beam.Temporary.Cli/Templates/Classic/Styles.template.css @@ -0,0 +1,60 @@ +/* styles.css */ +/* Placeholders: + {PrimaryColor}, {SecondaryColor}, {TertiaryColor}, {ButtonColor}, + {ForegroundColor}, {ContentFont}, {ContentFontSize}, {TitleFont}, {TitleFontSize} +*/ +body { + font-family: {ContentFont}; + font-size: {ContentFontSize}; + background-color: {PrimaryColor}; + color: {ForegroundColor}; + margin: 0; + padding: 20px; +} + +header { + background-color: {SecondaryColor}; + padding: 20px; + text-align: center; +} + +header h1 { + font-family: {TitleFont}; + font-size: {TitleFontSize}; + margin: 0; +} + +header p { + font-style: italic; + margin: 5px 0; +} + +section, article, nav { + background: {TertiaryColor}; + padding: 15px; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); + margin: 20px auto; + max-width: 800px; +} + +.navigation { + display: flex; + justify-content: space-between; + max-width: 800px; + margin: 20px auto; +} + +button { + background-color: {ButtonColor}; + color: {ForegroundColor}; + border: none; + padding: 10px 20px; + cursor: pointer; + font-size: {ContentFontSize}; + border-radius: 4px; +} + +nav h2 { + margin-top: 0; +} diff --git a/Beam.Temporary.Cli/Templates/Classic/Title.template.html b/Beam.Temporary.Cli/Templates/Classic/Title.template.html new file mode 100644 index 0000000..19153dd --- /dev/null +++ b/Beam.Temporary.Cli/Templates/Classic/Title.template.html @@ -0,0 +1,26 @@ + + + + + {Name} + + + +
+

{Name}

+

{Description}

+
+
+
Authors: {Authors}
+
Language: {Language}
+
Categories: {Categories}
+
Version: {Version}
+
+ + + diff --git a/Beam.Temporary.Cli/TextResource.cs b/Beam.Temporary.Cli/TextResource.cs new file mode 100644 index 0000000..ff27c57 --- /dev/null +++ b/Beam.Temporary.Cli/TextResource.cs @@ -0,0 +1,26 @@ + + +using aeqw89.DataKeys; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Temporary.Cli { + public class TextResource : IKeyed { + public required DataKey Key { get; set; } + public DataKey? AssociatedSource { get; set; } + public DataKey? AssociatedMetaSource { get; set; } + public required string[] TemplateInitialData { get; set; } + public string?[]? MetaTemplateInitialData { get; set; } + + public TextResourceRecord ToRecord(SharedDataDictionary sdd) { + return new(this, + AssociatedSource is null ? null : sdd.Aggregators[AssociatedSource], + AssociatedMetaSource is null ? null : sdd.Auxillaries[AssociatedMetaSource]); + } + } + + public record TextResourceRecord(TextResource Resource, WebResource? AssociatedSource, WebResource? AssociatedMetaSource); +} diff --git a/Beam.Temporary.Cli/Tracked.cs b/Beam.Temporary.Cli/Tracked.cs new file mode 100644 index 0000000..61321ff --- /dev/null +++ b/Beam.Temporary.Cli/Tracked.cs @@ -0,0 +1,11 @@ +namespace Beam.Temporary.Cli { + internal class Tracked(T obj) { + public T TrackedObject { get; set; } = obj; + public bool IsDirty { get; set; } = true; + + public Tracked SetDirty() { + IsDirty = true; + return this; + } + } +} diff --git a/Beam.Temporary.Cli/WebResource.cs b/Beam.Temporary.Cli/WebResource.cs new file mode 100644 index 0000000..f7fcc71 --- /dev/null +++ b/Beam.Temporary.Cli/WebResource.cs @@ -0,0 +1,28 @@ +using aeqw89.PersistentData; +using aeqw89.DataKeys; +using Beam.Dynamic; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Temporary.Cli { + public class WebResource(DataKey key) : IKeyed { + public DataKey Key { get; set; } = key; + + public required DataKey Bindings { get; set; } + public string? Name { get; set; } + public string? Domain { get; set; } + public string? Description { get; set; } + + + public WebResource() : this(new(string.Empty)) { } + + public WebResourceRecord ToRecord(SharedDataDictionary sdd) { + return new WebResourceRecord(this, sdd.Bindings[Bindings]); + } + } + + public record WebResourceRecord(WebResource Resource, DataBindings Bindings); +} diff --git a/Beam.sln b/Beam.sln new file mode 100644 index 0000000..f0c9e82 --- /dev/null +++ b/Beam.sln @@ -0,0 +1,40 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.12.35506.116 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam", "Beam\Beam.csproj", "{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Temporary.Cli", "Beam.Temporary.Cli\Beam.Temporary.Cli.csproj", "{8F650BBA-3800-4B5E-A6FF-9057633601EE}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Dynamic", "Beam.Dynamic\Beam.Dynamic.csproj", "{DDEABE82-096C-4799-87F1-56F494D35FAA}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Exports", "Beam.Exports\Beam.Exports.csproj", "{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Release|Any CPU.Build.0 = Release|Any CPU + {8F650BBA-3800-4B5E-A6FF-9057633601EE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {8F650BBA-3800-4B5E-A6FF-9057633601EE}.Debug|Any CPU.Build.0 = Debug|Any CPU + {8F650BBA-3800-4B5E-A6FF-9057633601EE}.Release|Any CPU.ActiveCfg = Release|Any CPU + {8F650BBA-3800-4B5E-A6FF-9057633601EE}.Release|Any CPU.Build.0 = Release|Any CPU + {DDEABE82-096C-4799-87F1-56F494D35FAA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {DDEABE82-096C-4799-87F1-56F494D35FAA}.Debug|Any CPU.Build.0 = Debug|Any CPU + {DDEABE82-096C-4799-87F1-56F494D35FAA}.Release|Any CPU.ActiveCfg = Release|Any CPU + {DDEABE82-096C-4799-87F1-56F494D35FAA}.Release|Any CPU.Build.0 = Release|Any CPU + {7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Beam/ArticleData.cs b/Beam/ArticleData.cs new file mode 100644 index 0000000..f016b26 --- /dev/null +++ b/Beam/ArticleData.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.Json; +using System.Threading.Tasks; + +namespace Beam { + public class ArticleData : IDocumentMetaData { + public string? Name { get; set; } + public string[] Authors { get; set; } = []; + public string? Language { get; set; } + public string[] Categories { get; set; } = []; + public string? Version { get; set; } + public string? Description { get; set; } + + public string AsJson(JsonSerializerOptions? options = null) { + return JsonSerializer.Serialize(this, options); + } + } +} diff --git a/Beam/Beam.csproj b/Beam/Beam.csproj new file mode 100644 index 0000000..8d25a7b --- /dev/null +++ b/Beam/Beam.csproj @@ -0,0 +1,25 @@ + + + + net9.0 + enable + enable + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + ..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll + + + + diff --git a/Beam/ByteDocument.cs b/Beam/ByteDocument.cs new file mode 100644 index 0000000..7a6cdfe --- /dev/null +++ b/Beam/ByteDocument.cs @@ -0,0 +1,15 @@ +using System.Text; + +namespace Beam { + internal class ByteDocument(string filename, byte[] content, Encoding? encoding = null) : Document(filename, encoding) { + public byte[] Content { get; set; } = content; + + public override byte[] ToBytes() { + return Content; + } + + public override string ToString() { + return Encoding.GetString(Content); + } + } +} diff --git a/Beam/DataBackedSourceLinkGenerator.cs b/Beam/DataBackedSourceLinkGenerator.cs new file mode 100644 index 0000000..bf178ce --- /dev/null +++ b/Beam/DataBackedSourceLinkGenerator.cs @@ -0,0 +1,9 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + public class DataBackedSourceLinkGenerator(PackagedSourceLinkGenerationData data, params object[] initialState) : DelegateBackedSourceLinkGenerator(data.GenerateLink, data.GetBehaviour(), initialState) {} +} diff --git a/Beam/DelegateBackedSourceLinkGenerator.cs b/Beam/DelegateBackedSourceLinkGenerator.cs new file mode 100644 index 0000000..78023fc --- /dev/null +++ b/Beam/DelegateBackedSourceLinkGenerator.cs @@ -0,0 +1,48 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + public delegate DocumentSourceLink LinkGenerator(params object[] ps); + public delegate object Incrementor(object obj, int amount); + + public class DelegateBackedSourceLinkGenerator : IEnumerator { + public LinkGenerator Generator { get; set; } + public IncrementationBehaviour Behaviour { get; } + private object[] InitialState; + + public DelegateBackedSourceLinkGenerator(LinkGenerator generator, IncrementationBehaviour behaviour, params object[] initialState) { + Generator = generator; + Behaviour = behaviour; + InitialState = (object[])initialState.Clone(); + State = (object[])initialState.Clone(); + + Reset(); + } + + public object[] State { get; set; } + public DocumentSourceLink Current { get; private set; } + + object IEnumerator.Current => Current; + + public void Dispose() { + return; + } + + public bool MoveNext() { + Behaviour.Apply(State, 1); + Current = Generator(State); + return Current.HasValue; + } + + public void Reset() { + State = (object[])InitialState.Clone(); + Behaviour.Apply(State, -1); + Current = Generator(State); + } + } +} diff --git a/Beam/Document.cs b/Beam/Document.cs new file mode 100644 index 0000000..cacb842 --- /dev/null +++ b/Beam/Document.cs @@ -0,0 +1,13 @@ +using aeqw89.DataKeys; +using System.Text; + +namespace Beam { + public abstract class Document(string filename, Encoding? encoding = null) : IDocument { + public string Filename { get; set; } = filename; + public Encoding Encoding { get; set; } = encoding ?? Encoding.UTF8; + public Dictionary, IDocumentMetaData> MetaData { get; set; } = []; + + public abstract byte[] ToBytes(); + public override abstract string ToString(); + } +} diff --git a/Beam/DocumentCache.cs b/Beam/DocumentCache.cs new file mode 100644 index 0000000..a0b1b91 --- /dev/null +++ b/Beam/DocumentCache.cs @@ -0,0 +1,57 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + /// + /// Holds a collection of objects in memory to facilitate lazy loading + /// + public class DocumentCache : Dictionary, IDisposable { + private bool disposedValue; + + /// + /// Calculates memory usage and checks if it does not exceed a certain limit + /// + /// The memory limit + /// + public bool IsCapacityLessThan(int allocatedSpaceInBytes) { + return this.Count < CalculateMemorySpaceUsage(); + } + + /// + /// Gets an estimate of the space used by the IDocument objects (disregarding metadata) in bytes. + /// + /// Estimated memory usage in bytes + public long CalculateMemorySpaceUsage() { + return this.Select((x) => (x.Value.ToBytes().LongLength)).Aggregate((x, y) => x + y); + } + + protected virtual void Dispose(bool disposing) { + if (!disposedValue) { + if (disposing) { + // TODO: dispose managed state (managed objects) + this.Clear(); + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; + } + } + + // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources + // ~DocumentCache() + // { + // // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + // Dispose(disposing: false); + // } + + public void Dispose() { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } +} diff --git a/Beam/DocumentSourceLink.cs b/Beam/DocumentSourceLink.cs new file mode 100644 index 0000000..d972e27 --- /dev/null +++ b/Beam/DocumentSourceLink.cs @@ -0,0 +1,31 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + public readonly struct DocumentSourceLink(string link) { + private readonly string Link_ { get; } = link; + public readonly Uri Link => new(Link_); + + public bool HasValue => !string.IsNullOrWhiteSpace(Link_); + + public static DocumentSourceLink InvalidLink { get; } = new("https://invalid.link"); + + public static bool operator ==(DocumentSourceLink lhs, DocumentSourceLink rhs) { + return lhs.Link == rhs.Link; + } + public static bool operator !=(DocumentSourceLink lhs, DocumentSourceLink rhs) { + return lhs.Link != rhs.Link; + } + + public override bool Equals(object? obj) { + return GetHashCode() == obj?.GetHashCode(); + } + + public override int GetHashCode() { + return Link.GetHashCode(); + } + } +} diff --git a/Beam/DownloadContext.cs b/Beam/DownloadContext.cs new file mode 100644 index 0000000..4892d6a --- /dev/null +++ b/Beam/DownloadContext.cs @@ -0,0 +1,89 @@ +using HtmlAgilityPack; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using FluentBuilder; +using Microsoft.Extensions.Logging; + +namespace Beam { + public delegate T HtmlTransformer(HtmlDocument doc); + public delegate Task AsyncHtmlTransformer(HtmlDocument doc); + + public class DownloadContext : IDisposable { + private bool disposedValue; + + public HtmlWeb Web { get; } + public HtmlTransformer Transformer { get; } + public AsyncHtmlTransformer AsyncTranformer { get; } + public IProgress? DownloadReporter { get; set; } + public IProgress? RetryReporter { get; set; } + public AsyncDownloadFailurePredicate?[]? AsyncFailurePredicates { get; } + public TimeSpan TimeOut { get; set; } + public IEnumerable Links { get; } + public CancellationToken CancellationToken { get; } + public DocumentCache Cache { get; private set; } = []; + public ILogger? DownloadLogger { get; set; } + + public DownloadContext(HtmlWeb web, + IEnumerable links, + CancellationToken cancellationToken = default, + HtmlTransformer? transformer = null, + AsyncHtmlTransformer? asyncTransformer = null, + IProgress? downloadReporter = null, + IProgress? retryReporter = null, + AsyncDownloadFailurePredicate?[]? asyncFailurePredicates = null, + TimeSpan? timeOut = null, + ILogger? downloadLogger = null) { + ArgumentNullException.ThrowIfNull(web, nameof(web)); + ArgumentNullException.ThrowIfNull(links, nameof(links)); + + Web = web; + Links = links; + CancellationToken = cancellationToken; + if (transformer is null && asyncTransformer is null) + throw new ArgumentException($"Either {nameof(transformer)} or {nameof(asyncTransformer)} must be not null."); + + Transformer = transformer!; + AsyncTranformer = asyncTransformer!; + if (transformer is null && asyncTransformer is not null) + Transformer = (x) => asyncTransformer(x).Result; + if (asyncTransformer is null && transformer is not null) + AsyncTranformer = (x) => Task.FromResult(transformer(x)); + + DownloadReporter = downloadReporter; + RetryReporter = retryReporter; + AsyncFailurePredicates = asyncFailurePredicates; + TimeOut = timeOut ?? TimeSpan.FromMinutes(1); + DownloadLogger = downloadLogger; + } + + protected virtual void Dispose(bool disposing) { + if (!disposedValue) { + if (disposing) { + // TODO: dispose managed state (managed objects) + Cache = null; + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; + } + } + + // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources + // ~DownloadContext() + // { + // // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + // Dispose(disposing: false); + // } + + public void Dispose() { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } +} diff --git a/Beam/DownloadEnumerable.cs b/Beam/DownloadEnumerable.cs new file mode 100644 index 0000000..6f46a65 --- /dev/null +++ b/Beam/DownloadEnumerable.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + public class DownloadEnumerable(IAsyncEnumerator> download) : IAsyncEnumerable> { + public IAsyncEnumerator> Download { get; } = download; + + public IAsyncEnumerator> GetAsyncEnumerator(CancellationToken cancellationToken = default) + => Download; + } +} diff --git a/Beam/Fragment.cs b/Beam/Fragment.cs new file mode 100644 index 0000000..3549aa9 --- /dev/null +++ b/Beam/Fragment.cs @@ -0,0 +1,37 @@ +using System.Collections.Concurrent; +using System.Diagnostics.CodeAnalysis; + +namespace Beam { + public sealed class Fragment(int size) { + public int Size => FragmentBag.Count; + public int MaxSize { get; } = size; + private ConcurrentBag FragmentBag { get; set; } = new(); + public bool TryTake([NotNullWhen(true)] out T? shard) { + return FragmentBag.TryTake(out shard) && shard != null; + } + + private bool? Complete = false; + public bool IsComplete => Complete ?? Size == MaxSize; + + private bool UpdaterLocked = false; + + public static bool TryAcquireUpdater(Fragment fragment, [NotNullWhen(true)] out Action? updater) { + updater = null; + if (Interlocked.CompareExchange(ref fragment.UpdaterLocked, true, false) == true) + // equivalent to : fragment.UpdaterLocked == true, side-effect: sets fragment.UpdaterLocked to true + return false; + updater = fragment.FragmentBag.Add; + return true; + } + public static bool TryReleaseUpdater(Fragment fragment, Action updater) { + if (updater == fragment.FragmentBag.Add) { + Interlocked.Exchange(ref fragment.UpdaterLocked, false); + return true; + } + return false; + } + public static void SetComplete(Fragment fragment, bool status) { + fragment.Complete = status; + } + } +} diff --git a/Beam/IAsyncEnumeratorExtensions.cs b/Beam/IAsyncEnumeratorExtensions.cs new file mode 100644 index 0000000..4446557 --- /dev/null +++ b/Beam/IAsyncEnumeratorExtensions.cs @@ -0,0 +1,23 @@ +namespace Beam { + public static class IAsyncEnumeratorExtensions { + public static async IAsyncEnumerator UnwrapFragmented(this IAsyncEnumerator> fragmented) { + if (fragmented is null) + throw new ArgumentNullException(); + try { + while(await fragmented.MoveNextAsync().ConfigureAwait(false)) { + if (fragmented.Current is null) + yield break; + if (!fragmented.Current.IsComplete) + yield break; + while (fragmented.Current.TryTake(out var item)) + if (item is null) + yield break; + else + yield return item; + } + } finally { + await fragmented.DisposeAsync().ConfigureAwait(false); + } + } + } +} diff --git a/Beam/IDocument.cs b/Beam/IDocument.cs new file mode 100644 index 0000000..1f2f626 --- /dev/null +++ b/Beam/IDocument.cs @@ -0,0 +1,29 @@ +using aeqw89.DataKeys; +using System.Diagnostics.CodeAnalysis; + +namespace Beam { + public interface IDocument { + /// + /// The file name of the document. Must be valid in both UNIX, + /// WINDOWS, APPLE, and ANDROID file systems. + /// + string Filename { get; } + + /// + /// Additional descriptive data + /// + Dictionary, IDocumentMetaData> MetaData { get; } + + /// + /// Retrieves the binary representation for the + /// + /// Binary representation of the + byte[] ToBytes(); + + /// + /// Retrieves the string representation for the + /// + /// String representation of the + string ToString(); + } +} diff --git a/Beam/IDocumentExtensions.cs b/Beam/IDocumentExtensions.cs new file mode 100644 index 0000000..4d4bed4 --- /dev/null +++ b/Beam/IDocumentExtensions.cs @@ -0,0 +1,7 @@ +using System.Diagnostics.CodeAnalysis; + +namespace Beam { + public static class IDocumentExtensions { + + } +} diff --git a/Beam/IDocumentMetaData.cs b/Beam/IDocumentMetaData.cs new file mode 100644 index 0000000..8242348 --- /dev/null +++ b/Beam/IDocumentMetaData.cs @@ -0,0 +1,7 @@ +using System.Text.Json; + +namespace Beam { + public interface IDocumentMetaData { + string AsJson(JsonSerializerOptions? options = null); + } +} \ No newline at end of file diff --git a/Beam/IDocumentSourceLinkFactory.cs b/Beam/IDocumentSourceLinkFactory.cs new file mode 100644 index 0000000..5e9c9dd --- /dev/null +++ b/Beam/IDocumentSourceLinkFactory.cs @@ -0,0 +1,11 @@ +namespace Beam { + internal interface IDocumentSourceLinkFactory { + DocumentSourceLink GetNextLink(DocumentSourceLink current); + DocumentSourceLink GetPrecedingLink(DocumentSourceLink current); + DocumentSourceLink GetArbitraryLink(DocumentSourceLink current, int offset) => offset switch { + 0 => current, + > 0 => GetArbitraryLink(GetNextLink(current), offset - 1), + < 0 => GetArbitraryLink(GetPrecedingLink(current), offset + 1) + }; + } +} diff --git a/Beam/IUnitDownloader.cs b/Beam/IUnitDownloader.cs new file mode 100644 index 0000000..a66c202 --- /dev/null +++ b/Beam/IUnitDownloader.cs @@ -0,0 +1,6 @@ +namespace Beam { + public interface IUnitDownloader { + public int LinksPerDownload { get; } + public Task<(bool, T?)> TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null); + } +} \ No newline at end of file diff --git a/Beam/IncrementationBehaviour.cs b/Beam/IncrementationBehaviour.cs new file mode 100644 index 0000000..6023ca4 --- /dev/null +++ b/Beam/IncrementationBehaviour.cs @@ -0,0 +1,17 @@ +namespace Beam { + /// + /// Defines how a url template should should be updated, in what order, and by how much + /// + public struct IncrementationBehaviour { + public Dictionary Map { get; set; } + + public readonly void Apply(object[] objects, int amount) { + foreach(var (i, inc) in Map) { + if (i < objects.Length) + objects[i] = inc(objects[i], amount)?.ToString(); + else + throw new S.MapException(S.M.MapDoesNotMatchArgs); + } + } + } +} diff --git a/Beam/PackagedSourceLinkGenerationData.cs b/Beam/PackagedSourceLinkGenerationData.cs new file mode 100644 index 0000000..50e3cd9 --- /dev/null +++ b/Beam/PackagedSourceLinkGenerationData.cs @@ -0,0 +1,18 @@ +namespace Beam { + public struct PackagedSourceLinkGenerationData { + public string Template { get; set; } + public int IndexOfChapterIndex { get; set; } + + public readonly DocumentSourceLink GenerateLink(params object[] ps) + => new(string.Format(Template, ps)); + public IncrementationBehaviour GetBehaviour() { + return new IncrementationBehaviour() { + Map = new Dictionary() { { + IndexOfChapterIndex, + (x, i) => int.Parse(x.ToString() ?? throw new ArgumentException()) + i + } + } + }; + } + } +} diff --git a/Beam/ParallelDownloader.cs b/Beam/ParallelDownloader.cs new file mode 100644 index 0000000..ebbfd2a --- /dev/null +++ b/Beam/ParallelDownloader.cs @@ -0,0 +1,78 @@ +using HtmlAgilityPack; +using System.Collections; +using System.Collections.Concurrent; + +namespace Beam { + public record Ordered(T Data, int Order); + [Obsolete("Use chunk downloader instead.")] + public class ParallelDownloader(DownloadContext context, int maximumConcurrentDownloads = 4) : IAsyncEnumerator> { + + public DownloadContext Context { get; } = context; + public int MaximumConcurrentDownloads { get; } = maximumConcurrentDownloads; + + private Task? CacheFiller { get; set; } + private int Count = 0; + private ConcurrentBag> Cache { get; set; } = []; + public Ordered Current { get; set; } + + private UnitDownloader GetUnitDownloader() + => new(Context.Web, Context.AsyncTranformer, Context.AsyncFailurePredicates); + private ParallelOptions GetOptions() + => new() { + CancellationToken = Context.CancellationToken, + MaxDegreeOfParallelism = MaximumConcurrentDownloads + }; + + private async Task FillCache() { + List> chunk = []; + int i = 0; + foreach (var link in Context.Links.Take(MaximumConcurrentDownloads * 2)) + chunk.Add(new Ordered(link, i++)); + Console.WriteLine(chunk.Select((x) => $"{x.Order}: {x.Data.Link}").Aggregate((x, y) => $"{x}\n{y}")); + var unitDownloader = GetUnitDownloader(); + int downloadedCount = 0; + + await Parallel.ForEachAsync(chunk, GetOptions(), async (x, ct) => { + var (result, doc) = await unitDownloader.TryDownload([new Ordered(x.Data.Link.ToString(), x.Order)], ct, tryProgress: Context.RetryReporter); + if (!result || doc is null) { + Console.WriteLine($"FAILED to download {x.Data.Link}"); + return; + } + Cache.Add(new(doc, x.Order)); + Context.DownloadReporter?.Report(doc); + Interlocked.Increment(ref downloadedCount); + Interlocked.Increment(ref Count); + }); + + Console.WriteLine("Downloaded Chunk"); + CacheFiller = null; + } + + public async ValueTask MoveNextAsync() { + TimeSpan waited = TimeSpan.Zero; + TimeSpan delta = TimeSpan.FromSeconds(0.01); + while(waited < Context.TimeOut) { + if (Cache.Count < MaximumConcurrentDownloads && CacheFiller is null) // strange + CacheFiller ??= FillCache(); + + Cache.TryTake(out var k); + if (k is not null) { + Current = k; + return true; + } + + + + waited += delta; + await Task.Delay(delta); + } + + return false; + } + + public ValueTask DisposeAsync() { + GC.SuppressFinalize(this); + return ValueTask.CompletedTask; + } + } +} diff --git a/Beam/RegexGenerated.cs b/Beam/RegexGenerated.cs new file mode 100644 index 0000000..87509d4 --- /dev/null +++ b/Beam/RegexGenerated.cs @@ -0,0 +1,8 @@ +using System.Text.RegularExpressions; + +namespace Beam { + internal partial class RegexGenerated { + [GeneratedRegex("(? + /// The kind of exception that should never happen + /// + [Serializable] + public class AssertionException : Exception { + public AssertionException() { } + public AssertionException(string message) : base(message) { } + public AssertionException(string message, Exception inner) : base(message, inner) { } + protected AssertionException( + System.Runtime.Serialization.SerializationInfo info, + System.Runtime.Serialization.StreamingContext context) : base(info, context) { } + } + + public class M { + public const string MapDoesNotMatchArgs = "Error; Map contains indicies that exceed the argument list passed."; + public const string NewFragmentShouldBeFree = "Assertion Error: Could not acquire lock of newly created fragment"; + public const string LinksCannotBeEmpty = "Cannot construct downloader with empty links collection!"; + } + } +} diff --git a/Beam/SequentialChunkDownloader.cs b/Beam/SequentialChunkDownloader.cs new file mode 100644 index 0000000..c2fdd28 --- /dev/null +++ b/Beam/SequentialChunkDownloader.cs @@ -0,0 +1,100 @@ +using Microsoft.Extensions.Logging; +using System.Collections.Concurrent; + +namespace Beam { + public class SequentialFragmentDownloader : SequentialDownloader>> { + public SequentialFragmentDownloader( + DownloadContext context, + Func, IUnitDownloader>>> getUnitDownloader, + ILogger? logger = null) + : base(context, getUnitDownloader, logger) {} + } + + // public class SequentialChunkDownloader : IAsyncEnumerator>> { + // public Fragment> Current { get; protected set; } + // public DownloadContext Context { get; } + // protected IEnumerator LinksEnumerator; + // protected ConcurrentQueue>> DownloadQueue { get; set; } = []; + // public int ChunkSize { get; } + + // private ILogger? Logger => Context.DownloadLogger; + + // public UnitDownloader GetUnitDownloader() + // => new(Context.Web, Context.AsyncTranformer, Context.AsyncFailurePredicates); + + // public SequentialChunkDownloader(DownloadContext context, int chunkSize) { + // Context = context; + // LinksEnumerator = Context.Links.GetEnumerator(); + // Current = new Fragment>(0); + // ChunkSize = chunkSize; + // } + + // public ValueTask DisposeAsync() { + // GC.SuppressFinalize(this); + // return ValueTask.CompletedTask; + // } + + // protected Task? DownloadsTask = null; + // protected virtual async Task ProcessDownloads() { + // if (DownloadQueue.IsEmpty) + // return true; + // if (DownloadsTask is null) { + // DownloadsTask = Task.Run(async () => { + // if (!DownloadQueue.TryDequeue(out var fragment)) + // return true; // no fragments left, likely race condition but return true as technically all items have been downloaded + // var unit = GetUnitDownloader(); // instantiates unit downloader per request (okay) + // if (!Fragment>.TryAcquireUpdater(fragment, out var updater)) { // gets the add method for the current fragment + // Logger?.LogError("Failed to acquire updater for fragment {{{}}}", fragment.GetHashCode()); + // return false; // fragment is unsafe to modify + // } + // try { + // var links = Enumerable.Range(0, ChunkSize).Select((x) => { + // if (!LinksEnumerator.MoveNext()) + // return new Ordered(DocumentSourceLink.InvalidLink, -1); // stops link collection if end-of-links is reached + // return new Ordered(LinksEnumerator.Current, x); + // }).Where((x) => x.Data != DocumentSourceLink.InvalidLink); // filter invalid links + // await Parallel.ForEachAsync(links, async (x, ct) => { + // Logger?.LogInformation("Started download for {} order={}", x.Data.Link, x.Order); + // var (result, downloadedT) = await unit.TryDownload( // download (parallel) objects + // x.Data.Link.ToString(), // use link from links collection (exposed as x) + // ct, // use ct provided with method call + // tryProgress: Context.RetryReporter); + // if (!result) { // download failure (soft because it was detected) + // Logger?.LogError("Failed to retrieve {} order={}", x.Data.Link, x.Order); + // return; + // } + // if (downloadedT is null) { // download failure (hard because it was not detected) + // Logger?.LogCritical("Failed to retrieve {} order={}", x.Data.Link, x.Order); + // return; + // } + // Logger?.LogInformation("Retrieved {} order={} successfully", x.Data.Link, x.Order); + // updater(new Ordered(downloadedT, x.Order)); // update the fragment + // }); + // Fragment>.SetComplete(fragment, true); + // } finally { + // Fragment>.TryReleaseUpdater(fragment, updater); // returns updater to allow modification + // } + + + // return fragment.Size == fragment.MaxSize; + // }); + // } + // if (DownloadsTask.IsCompleted) { + // DownloadsTask = null; + // return await ProcessDownloads(); + // } + // return true; // if task is still processing return should be neither true or false... + // } + + // public async ValueTask MoveNextAsync() { + // if (Current.IsComplete && Current.Size < Current.MaxSize) + // return false; // if a fragment is marked complete despite being unsaturated, we've run out links! + // if (DownloadQueue.Count == 0) { + // Current = new Fragment>(ChunkSize); + // DownloadQueue.Enqueue(Current); + // } + + // return await ProcessDownloads(); + // } + // } +} diff --git a/Beam/SequentialDownloader.cs b/Beam/SequentialDownloader.cs new file mode 100644 index 0000000..402ad14 --- /dev/null +++ b/Beam/SequentialDownloader.cs @@ -0,0 +1,63 @@ +using HtmlAgilityPack; +using Microsoft.Extensions.Logging; + +namespace Beam { + public class SequentialDownloader : IAsyncEnumerator { + public TOutput Current { get; protected set; } + public DownloadContext Context { get; } + public ILogger? Logger { get; set; } + public int LastOrder { get; set; } = 0; + + protected IEnumerator LinksEnumerator; + + public Func> GetUnitDownloader { get; set; } + + public SequentialDownloader(DownloadContext context, Func, IUnitDownloader> getUnitDownloader, ILogger? logger = null) { + Context = context; + Logger = logger; + LinksEnumerator = Context.Links.GetEnumerator(); + LinksEnumerator.Reset(); + if (!LinksEnumerator.MoveNext()) + throw new ArgumentOutOfRangeException(S.M.LinksCannotBeEmpty); + Current = default(TOutput); + GetUnitDownloader = () => getUnitDownloader(Context); + } + + public ValueTask DisposeAsync() { + GC.SuppressFinalize(this); + return ValueTask.CompletedTask; + } + + public async ValueTask MoveNextAsync() { + var unit = GetUnitDownloader(); // safe to instantiate per request. + var idealLinkCount = unit.LinksPerDownload; + List> links = []; + + links.Add(new Ordered(LinksEnumerator.Current.Link.ToString(), LastOrder++)); + + while (LinksEnumerator.MoveNext() && links.Count < idealLinkCount) + links.Add(new Ordered(LinksEnumerator.Current.Link.ToString(), LastOrder++)); + if (links.Count == 0) { + Logger?.LogInformation("Out of links!"); + return false; + } + + var (result, downloadedT) = await unit.TryDownload( + links.ToArray(), + Context.CancellationToken, + tryProgress: Context.RetryReporter); + + if (!result) { + Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name); + return false; // unit download failed + } + if (downloadedT is null) { + Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name); + return false; // unit download failed + } + + Current = downloadedT; + return true; + } + } +} diff --git a/Beam/SourceLinkEnumerable.cs b/Beam/SourceLinkEnumerable.cs new file mode 100644 index 0000000..3bc2b2f --- /dev/null +++ b/Beam/SourceLinkEnumerable.cs @@ -0,0 +1,27 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + public class SourceLinkEnumerable : IEnumerable { + private SourceLinkEnumerable(IEnumerator enumerator) { + Enumerator = enumerator; + } + + public IEnumerator Enumerator { get; } + + public static SourceLinkEnumerable FromGenerator(IEnumerator generator) + => new SourceLinkEnumerable(generator); + + public IEnumerator GetEnumerator() { + return Enumerator; + } + + IEnumerator IEnumerable.GetEnumerator() { + return Enumerator; + } + } +} diff --git a/Beam/StreamDocument.cs b/Beam/StreamDocument.cs new file mode 100644 index 0000000..afcbd49 --- /dev/null +++ b/Beam/StreamDocument.cs @@ -0,0 +1,18 @@ +using System.Text; + +namespace Beam { + internal class StreamDocument(string filename, Stream content, Encoding? encoding = null) : Document(filename) { + public Stream Content { get; set; } = content; + public Encoding Encoding { get; set; } = encoding ?? Encoding.UTF8; + + byte[] Content_ { get; set; } = []; + + public override byte[] ToBytes() { + return Content_; + } + + public override string ToString() { + return Encoding.GetString(Content_); + } + } +} diff --git a/Beam/StringDocument.cs b/Beam/StringDocument.cs new file mode 100644 index 0000000..a7fcf10 --- /dev/null +++ b/Beam/StringDocument.cs @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + public class StringDocument(string filename, string content, Encoding? encoding = null) : Document(filename, encoding) { + public string Content { get; set; } = content; + + public override byte[] ToBytes() { + return Encoding.GetBytes(Content); + } + + public override string ToString() { + return Content; + } + } +} diff --git a/Beam/UnitDownloader.cs b/Beam/UnitDownloader.cs new file mode 100644 index 0000000..7370ae3 --- /dev/null +++ b/Beam/UnitDownloader.cs @@ -0,0 +1,66 @@ +using HtmlAgilityPack; + +namespace Beam { + public delegate Task AsyncDownloadFailurePredicate(T download); + + /// + /// A download managing class that manages a singular download with failure-detection and exponential-backoff retries. This class is safe to instantiate per request. + /// + /// + /// + /// + /// + public class UnitDownloader(HtmlWeb web, AsyncHtmlTransformer transformer, AsyncDownloadFailurePredicate?[]? failurePredicate = null) : IUnitDownloader { + public HtmlWeb Web { get; } = web; + public virtual AsyncHtmlTransformer Transformer { get; } = transformer; + public virtual AsyncDownloadFailurePredicate?[]? FailurePredicates { get; } = failurePredicate; + + public int LinksPerDownload { get; } = 1; + + protected virtual async Task IsFailure(HtmlDocument doc) { + if (FailurePredicates is null) + return false; + var failed = false; + await Parallel.ForEachAsync(FailurePredicates, async (x, ct) => { + if (failed == true) + return; + if (x is null) + return; + if (await x(doc)) + failed = true; + }); + + return failed; + } + + protected virtual async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) { + try { + var html = await Web.LoadFromWebAsync(link, ct); + if (FailurePredicates is null || !(await IsFailure(html))) + return (true, await Transformer(html)); + else + return (false, default); + } catch(Exception) { + return (false, default); + } + } + + public async Task<(bool, T?)> TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null) { + if (link.Length == 0) + return (false, default); + + T? doc = default; + int tryCount = 0; + while (tryCount < maximumRetryCount) { + ct.ThrowIfCancellationRequested(); + (var success, doc) = await TryDownloadWithNoRetries(link[0].Data, ct); + if (success && doc != null) + return (true, doc); + tryProgress?.Report(++tryCount); + await Task.Delay((int)Math.Pow(2, tryCount) * 1000); + } + + return (false, doc); + } + } +} diff --git a/Beam/UnitFragmentDownloader.cs b/Beam/UnitFragmentDownloader.cs new file mode 100644 index 0000000..380a910 --- /dev/null +++ b/Beam/UnitFragmentDownloader.cs @@ -0,0 +1,64 @@ +using HtmlAgilityPack; +using Microsoft.Extensions.Logging; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + public class UnitFragmentDownloader : IUnitDownloader>> { + public UnitFragmentDownloader(HtmlWeb web, + AsyncHtmlTransformer transformer, + AsyncDownloadFailurePredicate?[]? failurePredicate = null, + int fragmentSize = 4, + ILogger? logger = null) { + Web = web; + Transformer = transformer; + FailurePredicate = failurePredicate; + UnitDownloader = new UnitDownloader(Web, Transformer, FailurePredicate); + LinksPerDownload = fragmentSize; + Logger = logger; + } + + public HtmlWeb Web { get; } + public AsyncHtmlTransformer Transformer { get; } + public AsyncDownloadFailurePredicate?[]? FailurePredicate { get; } + public int LinksPerDownload { get; set; } + public ILogger? Logger { get; set; } + + private readonly UnitDownloader UnitDownloader; + + async Task<(bool, Fragment>?)> IUnitDownloader>>.TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount, IProgress? tryProgress) { + Fragment> fragment = new Fragment>(link.Length); + if (!Fragment>.TryAcquireUpdater(fragment, out var updater)) + throw new S.AssertionException(S.M.NewFragmentShouldBeFree); + bool isFailure = false; + await Parallel.ForEachAsync(link, async (x, pct) => { + pct.ThrowIfCancellationRequested(); + ct.ThrowIfCancellationRequested(); + var (result, downloadedT) = await UnitDownloader.TryDownload([x], ct, maximumRetryCount, tryProgress); + if (!result) { + Interlocked.Exchange(ref isFailure, true); + Logger?.LogError("Failed to retrieve {} order={}", x.Data, x.Order); + return; + } + if (downloadedT == null) { + Interlocked.Exchange(ref isFailure, true); + Logger?.LogCritical("Failed to retrieve {} order={}", x.Data, x.Order); + return; + } + updater(new Ordered(downloadedT, x.Order)); + }); + + if (!isFailure) + Fragment>.SetComplete(fragment, true); + + Fragment>.TryReleaseUpdater(fragment, updater); + + return (!isFailure, fragment); + + } + } +}