Add project files.
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Reference Include="aeqw89.DataKeys">
|
||||
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="aeqw89.PersistentData">
|
||||
<HintPath>..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll</HintPath>
|
||||
</Reference>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,68 @@
|
||||
|
||||
using aeqw89.DataKeys;
|
||||
using HtmlAgilityPack;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class Binding(DataKey<Binding> key) : IKeyed<Binding> {
|
||||
public Binding(string key) : this(new DataKey<Binding>(key)) { }
|
||||
public Binding() : this("") { }
|
||||
|
||||
[JsonRequired]
|
||||
public DataKey<Binding> Key { get; set; } = key;
|
||||
[JsonRequired]
|
||||
public BindingType Type { get; set; }
|
||||
|
||||
public string? ArrayDelimiters { get; set; }
|
||||
public string? XPath { get; set; }
|
||||
public string? CssPath { get; set; }
|
||||
private IDataProvider? Provider_;
|
||||
public IDataProvider? Provider {
|
||||
get => Provider_;
|
||||
set {
|
||||
if (value is null)
|
||||
return;
|
||||
if (value is not IDataProvider)
|
||||
throw new InvalidOperationException();
|
||||
var constructor = value.GetType().GetConstructor([]);
|
||||
if (!constructor?.IsPublic ?? true)
|
||||
throw new InvalidOperationException();
|
||||
Provider_ = value;
|
||||
}
|
||||
}
|
||||
|
||||
public HtmlNode? ResolveNode(HtmlDocument doc) {
|
||||
if (XPath is not null)
|
||||
return doc.DocumentNode.SelectSingleNode(XPath);
|
||||
if (CssPath is not null)
|
||||
return doc.DocumentNode.ThenByClasses(CssPath.Split('/'));
|
||||
if (Provider is not null)
|
||||
return Provider.GetNode(doc);
|
||||
return null;
|
||||
}
|
||||
|
||||
public string ResolveString(HtmlDocument doc) {
|
||||
if (XPath is not null)
|
||||
return doc.DocumentNode.SelectSingleNode(XPath)?.InnerText ?? "";
|
||||
if (CssPath is not null)
|
||||
return doc.DocumentNode.ThenByClasses(CssPath.Split('/'))?.InnerText ?? "";
|
||||
if (Provider is not null)
|
||||
return Provider.Get(doc);
|
||||
return "";
|
||||
}
|
||||
|
||||
public string[] ResolveArray(HtmlDocument doc) {
|
||||
if (Type is not BindingType.Array)
|
||||
return [];
|
||||
var str = ResolveString(doc);
|
||||
return str.Split(ArrayDelimiters);
|
||||
}
|
||||
|
||||
public dynamic? Resolve(HtmlDocument doc) => Type switch {
|
||||
BindingType.Single => ResolveString(doc),
|
||||
BindingType.Array => ResolveArray(doc),
|
||||
BindingType.UseProvider => Provider?.Get(doc),
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace Beam.Dynamic {
|
||||
public enum BindingType {
|
||||
Single,
|
||||
Array,
|
||||
UseProvider
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class DataBindings {
|
||||
public Binding? Title { get; set; }
|
||||
public Binding? Authors { get; set; }
|
||||
public Binding? Description { get; set; }
|
||||
public Binding? Content { get; set; }
|
||||
public Binding? Language { get; set; }
|
||||
public Binding? Tags { get; set; }
|
||||
|
||||
public ResolvedBindings Resolve(HtmlDocument doc) {
|
||||
return new ResolvedBindings() {
|
||||
Title = Title?.Resolve(doc),
|
||||
Authors = Authors?.Resolve(doc) ?? Array.Empty<string>(),
|
||||
Language = Language?.Resolve(doc) ?? Array.Empty<string>(),
|
||||
Content = Content?.Resolve(doc),
|
||||
Description = Description?.Resolve(doc),
|
||||
Tags = Tags?.Resolve(doc) ?? Array.Empty<string>()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public class ResolvedBindings {
|
||||
public string? Title { get; set; }
|
||||
public string[]? Authors { get; set; }
|
||||
public string? Description { get; set; }
|
||||
public string? Content { get; set; }
|
||||
public string[]? Language { get; set; }
|
||||
public string[]? Tags { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public static partial class HtmlNodeExtensions {
|
||||
public static HtmlNode? ThenByClasses(this HtmlNode node, params string[] classes) {
|
||||
return node.DescendCollectionTree(ThenByClass, classes);
|
||||
}
|
||||
|
||||
public static HtmlNode? ThenByClass(this HtmlNode node, string @class, int count = 1) {
|
||||
return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.HasClass(@class)), count);
|
||||
}
|
||||
|
||||
public static HtmlNode? ThenByName(this HtmlNode node, string name, int count = 1) {
|
||||
return node.ThenByFunc((x) => x.ChildNodes.FirstOrDefault((x) => x.Name == name), count);
|
||||
}
|
||||
|
||||
public static HtmlNode? ThenByNames(this HtmlNode node, params string[] name) {
|
||||
return node.DescendCollectionTree(ThenByName, name);
|
||||
}
|
||||
|
||||
public static HtmlNode? DescendCollectionTree<T>(this HtmlNode node, Func<HtmlNode, T, int, HtmlNode?> func, params T[] values) {
|
||||
return node.DescendCollectionTree((a, b) => func(a, b, 1), values);
|
||||
}
|
||||
|
||||
public static HtmlNode? DescendCollectionTree<T>(this HtmlNode node, Func<HtmlNode, T, HtmlNode?> func, params T[] values) {
|
||||
HtmlNode? result = node;
|
||||
foreach (var value in values) {
|
||||
if (result is null)
|
||||
return result;
|
||||
result = func(result, value);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static HtmlNode? ThenByFunc(this HtmlNode node, Func<HtmlNode, HtmlNode?> func, int count = 1) {
|
||||
var ret = func(node);
|
||||
if (count <= 1)
|
||||
return ret;
|
||||
return ret?.ThenByFunc(func, count - 1);
|
||||
}
|
||||
|
||||
public static string[]? SplitInnerText(this HtmlNode? node, string separators)
|
||||
=> node?.InnerText.Split(separators, StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
public static bool TextNodesOnly(HtmlNode n) => TextNodesOnlyRegex().Match(n.Name).Success;
|
||||
|
||||
public static string InnerLineSeparatedText(this HtmlNode? node, Func<HtmlNode, bool>? filter = null) {
|
||||
if (node?.ChildNodes is null || node?.ChildNodes.Count == 0)
|
||||
return "";
|
||||
return node?.ChildNodes
|
||||
.Where(filter ?? ((x) => true))
|
||||
.DefaultIfEmpty()
|
||||
.Select((x) => x?.InnerText)
|
||||
.Where((x) => !string.IsNullOrWhiteSpace(x))
|
||||
.DefaultIfEmpty()
|
||||
.Aggregate((x, y) => $"{x}\n{y}")
|
||||
?? "";
|
||||
}
|
||||
|
||||
[GeneratedRegex("p|h\\d")]
|
||||
private static partial Regex TextNodesOnlyRegex();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
[System.Text.Json.Serialization.JsonDerivedType(typeof(ParagraphedContentDataProvider), 20)]
|
||||
[System.Text.Json.Serialization.JsonDerivedType(typeof(ListContentDataProvider), 21)]
|
||||
public interface IDataProvider {
|
||||
public string Get(HtmlDocument document);
|
||||
public HtmlNode? GetNode(HtmlDocument document);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
using HtmlAgilityPack;
|
||||
using System.Text;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class ListContentDataProvider : IDataProvider {
|
||||
public Binding? Content { get; set; }
|
||||
|
||||
public string Get(HtmlDocument document) {
|
||||
if (Content is null)
|
||||
return "";
|
||||
|
||||
var node = Content.ResolveNode(document);
|
||||
if (node is null)
|
||||
return "";
|
||||
|
||||
StringBuilder content = new();
|
||||
foreach(var childNode in node.ChildNodes.SkipLast(1)) {
|
||||
if (childNode.Name != "li")
|
||||
continue;
|
||||
content.Append(childNode.InnerText.Trim() + ";");
|
||||
}
|
||||
|
||||
content.Append(node.ChildNodes.Last().InnerText.Trim());
|
||||
return content.ToString();
|
||||
}
|
||||
|
||||
public HtmlNode? GetNode(HtmlDocument document) {
|
||||
return Content?.ResolveNode(document);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using System.Web;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public static partial class OnlineCleaner {
|
||||
[GeneratedRegex("&#x?[\\d\\w]{1,4};")]
|
||||
public static partial Regex MochaBlendUnicodeEscapeSequence();
|
||||
|
||||
private static string UnicodeEscapeSequences(string text) {
|
||||
return MochaBlendUnicodeEscapeSequence().Replace(text, (x) => {
|
||||
int numOfDigits = x.Value.Length - 3;
|
||||
int sequence = 0;
|
||||
if (x.Value[2] == 'x')
|
||||
sequence = int.Parse(x.Value[3..(3 + (numOfDigits - 1))], System.Globalization.NumberStyles.HexNumber);
|
||||
else
|
||||
sequence = int.Parse(x.Value[2..(2 + numOfDigits)]);
|
||||
var uni = Encoding.Unicode.GetString(BitConverter.GetBytes(sequence));
|
||||
return uni.Length == 1 ? uni : uni[0].ToString();
|
||||
});
|
||||
}
|
||||
|
||||
public static string Clean(string? onlineText) {
|
||||
if (string.IsNullOrWhiteSpace(onlineText))
|
||||
return "";
|
||||
var decoded = HttpUtility.HtmlDecode(onlineText);
|
||||
var escaped = UnicodeEscapeSequences(onlineText);
|
||||
return escaped;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class ParagraphedContentDataProvider : IDataProvider {
|
||||
public Binding? Content { get; set; }
|
||||
|
||||
public string Get(HtmlDocument document) {
|
||||
if (Content is null)
|
||||
return "";
|
||||
|
||||
var node = Content.ResolveNode(document);
|
||||
if (node is null)
|
||||
return "";
|
||||
|
||||
StringBuilder content = new();
|
||||
foreach(var childNode in node.ChildNodes) {
|
||||
if (childNode.Name != "p")
|
||||
continue;
|
||||
content.AppendLine(childNode.InnerText);
|
||||
}
|
||||
|
||||
return content.ToString();
|
||||
}
|
||||
|
||||
public HtmlNode? GetNode(HtmlDocument document) {
|
||||
return Content?.ResolveNode(document);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,37 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Exports {
|
||||
public class PlainTextExporter : IExporter, IAsyncExporter {
|
||||
public PlainTextExporter(IDocument document) {
|
||||
Document = document;
|
||||
}
|
||||
|
||||
public IDocument Document { get; }
|
||||
|
||||
protected virtual string Convert() {
|
||||
return Document.ToString();
|
||||
}
|
||||
|
||||
protected virtual Task<string> ConvertAsync() {
|
||||
return Task.FromResult(Document.ToString());
|
||||
}
|
||||
|
||||
public virtual void Write(string path) {
|
||||
var text = Convert();
|
||||
if (!Directory.Exists(Path.GetDirectoryName(path)))
|
||||
throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path));
|
||||
File.WriteAllText(path, text, Encoding.Unicode);
|
||||
}
|
||||
|
||||
public virtual async Task WriteAsync(string path) {
|
||||
var text = await ConvertAsync();
|
||||
if (!Directory.Exists(path))
|
||||
throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path));
|
||||
await File.WriteAllTextAsync(path, text);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
using System.Text;
|
||||
|
||||
namespace Beam.Exports {
|
||||
public class HtmlExporter : PlainTextExporter {
|
||||
public HtmlExporter(IDocument document,
|
||||
ArticleData? meta = null,
|
||||
Dictionary<string, string>? linkButtons = null,
|
||||
string? eofHtml = null) : base(document) {
|
||||
Meta = meta;
|
||||
LinkButtons = linkButtons;
|
||||
EofHtml = eofHtml;
|
||||
}
|
||||
|
||||
public ArticleData? Meta { get; }
|
||||
public Dictionary<string, string>? LinkButtons { get; }
|
||||
public string? EofHtml { get; }
|
||||
|
||||
protected override string Convert() {
|
||||
var text = Document.ToString();
|
||||
// Convert newlines to <p></p> tags
|
||||
text = "<p>" + text.Replace("\n", "</p><p>") + "</p>";
|
||||
if (Meta is null)
|
||||
return text;
|
||||
text = $"<h1>{Meta.Name}</h1>" + text;
|
||||
if (LinkButtons is null || LinkButtons.Count == 0)
|
||||
return text;
|
||||
StringBuilder buttons = new();
|
||||
foreach(var (btnText, btnLink) in LinkButtons.Select((x) => (x.Key, x.Value))) {
|
||||
buttons.AppendLine($"<a href=\"{btnLink}\">{btnText}</a>");
|
||||
}
|
||||
var buttonsDiv = $"<div class=\"controls\">{buttons}</div>";
|
||||
text = buttonsDiv + text + buttonsDiv;
|
||||
text += EofHtml ?? "";
|
||||
text = "<!DOCTYPE html>\n<html>" + text + "</html>";
|
||||
return text;
|
||||
}
|
||||
|
||||
protected override Task<string> ConvertAsync() {
|
||||
return Task.FromResult(Convert());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
namespace Beam.Exports {
|
||||
public interface IAsyncExporter : IExporter {
|
||||
/// <summary>
|
||||
/// Asynchronously writes the object to the desired path, creating it if it does not exist.
|
||||
/// </summary>
|
||||
/// <param name="path">The path of the exported object</param>
|
||||
/// <returns></returns>
|
||||
public Task WriteAsync(string path);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
namespace Beam.Exports {
|
||||
public interface IExporter {
|
||||
/// <summary>
|
||||
/// Synchronously writes the object to the desired path, creating it if it does not exist.
|
||||
/// </summary>
|
||||
/// <param name="path">The path of the exported object</param>
|
||||
public void Write(string path);
|
||||
|
||||
protected void EnsurePathExists(string path) {
|
||||
if (File.Exists(path)) {
|
||||
File.Delete(path);
|
||||
return;
|
||||
}
|
||||
else if (!Directory.Exists(path))
|
||||
Directory.CreateDirectory(path);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
namespace Beam.Exports {
|
||||
public interface IStreamExporter : IAsyncExporter {
|
||||
/// <summary>
|
||||
/// Asynchronously writes the object to the desired path in many parts, returning the path
|
||||
/// of each written file as a stream
|
||||
/// </summary>
|
||||
/// <param name="path">The path of the exported object</param>
|
||||
/// <returns>The async enumerator of each written file</returns>
|
||||
public IAsyncEnumerator<string> WriteAsyncStream(string path);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Exports {
|
||||
internal static class S {
|
||||
internal static class M {
|
||||
internal const string FileDirectoryDoesNotExist = "Part of the path supplied does not exist.";
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
using aeqw89.DataKeys;
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
/// <summary>
|
||||
/// <para>
|
||||
/// A collection of specific useful methods and constants that facilitate the use of the application; allows other parts of the application to depend on architecture-specific arbitrary choices without compromising the Single-Responsibility principle or increasing redundant code.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
partial interface IArchitecture {
|
||||
/// <summary>
|
||||
/// Gets the metadata associated with a <see cref="TextResource"/>
|
||||
/// </summary>
|
||||
/// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||
/// <param name="pieceKey">The key of the <see cref="TextResource"/> stored in the <paramref name="sdd"/></param>
|
||||
/// <param name="sdd">The <see cref="SharedDataDictionary"/> to be used to retrieve information</param>
|
||||
/// <param name="logger">Optional logger for logging debug information</param>
|
||||
/// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, SharedDataDictionary sdd, ILogger? logger = null);
|
||||
/// <summary>
|
||||
/// Gets the <see cref="DownloadContext{T}"/> of the text record associated with <see cref="TextResource"/>
|
||||
/// </summary>
|
||||
/// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||
/// <param name="pieceKey">The key of the <see cref="TextResource"/> stored in the <paramref name="sdd"/></param>
|
||||
/// <param name="sdd">The <see cref="SharedDataDictionary"/> to be used to retrieve information</param>
|
||||
/// <param name="metadata">Optional book metadata to include with the final text record</param>
|
||||
/// <param name="logger">Optional logger for logging debug information</param>
|
||||
/// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> pieceKey, SharedDataDictionary sdd, IDocumentMetaData? metadata = null, ILogger? logger = null);
|
||||
|
||||
/// <summary>
|
||||
/// The <see cref="DataKey{IDocumentMetaData}"/> to use when looking for the chapter metadata
|
||||
/// </summary>
|
||||
public DataKey<IDocumentMetaData> ChapterKey { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The <see cref="DataKey{IDocumentMetaData}"/> to use when looking for the book metadata
|
||||
/// </summary>
|
||||
public DataKey<IDocumentMetaData> BookKey { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The default architecture
|
||||
/// </summary>
|
||||
public static IArchitecture Default => new MainArchitecture();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1" />
|
||||
<PackageReference Include="Spectre.Console" Version="0.49.2-preview.0.70" />
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj" />
|
||||
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj" />
|
||||
<ProjectReference Include="..\Beam\Beam.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Reference Include="aeqw89.DataKeys">
|
||||
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="aeqw89.PersistentData">
|
||||
<HintPath>..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll</HintPath>
|
||||
</Reference>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,30 @@
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class CssData {
|
||||
// Primary background color (e.g., for the body)
|
||||
public string PrimaryColor { get; set; } = "#f5f5f5";
|
||||
|
||||
// Secondary color (e.g., for header background)
|
||||
public string SecondaryColor { get; set; } = "#e0e0e0";
|
||||
|
||||
// Tertiary color (e.g., for content sections)
|
||||
public string TertiaryColor { get; set; } = "#ffffff";
|
||||
|
||||
// Button background color
|
||||
public string ButtonColor { get; set; } = "#007bff";
|
||||
|
||||
// Foreground text color
|
||||
public string ForegroundColor { get; set; } = "#333333";
|
||||
|
||||
// Font family for main content
|
||||
public string ContentFont { get; set; } = "Arial, sans-serif";
|
||||
|
||||
// Font size for main content
|
||||
public string ContentFontSize { get; set; } = "16px";
|
||||
|
||||
// Font family for titles
|
||||
public string TitleFont { get; set; } = "Georgia, serif";
|
||||
|
||||
// Font size for titles
|
||||
public string TitleFontSize { get; set; } = "32px";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
|
||||
|
||||
using aeqw89.DataKeys;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal static class DataKeyExtensions {
|
||||
public static DataKey WithNamespace(this DataKey dk, string @namespace) {
|
||||
string[] names = @namespace.Split(':');
|
||||
var agg = (string x, string y) => $"{x}:{y}";
|
||||
for (int i = 0; i < names.Length; i++) {
|
||||
string test = names.SkipLast(i).Aggregate(agg);
|
||||
if (dk.Identifier.StartsWith(test)) {
|
||||
return new DataKey(dk.Identifier.Replace(test, @namespace));
|
||||
}
|
||||
}
|
||||
|
||||
return new DataKey(@namespace + ":" + dk.Identifier);
|
||||
}
|
||||
|
||||
public static DataKey<T> WithNamespace<T>(this DataKey<T> dk, string @namespace) {
|
||||
return ((DataKey)dk).WithNamespace(@namespace).As<T>();
|
||||
}
|
||||
|
||||
public static DataKey<T> WithSuffix<T>(this DataKey<T> dk, string suffix) {
|
||||
return new DataKey<T>(dk.Identifier + suffix);
|
||||
}
|
||||
|
||||
public static DataKey ToAggregator(this DataKey dk)
|
||||
=> dk.WithNamespace("aeqw89:document:aggregators");
|
||||
public static DataKey ToAuxiliary(this DataKey dk)
|
||||
=> dk.WithNamespace("aeqw89:document:auxillaries");
|
||||
public static DataKey<T> As<T>(this DataKey dk) => new DataKey<T>(dk.Identifier);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal class File(string path, params string[] tags) {
|
||||
public string Path { get; set; } = path;
|
||||
public string[] Tags { get; set; } = tags;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,132 @@
|
||||
//using aeqw89.DataKeys;
|
||||
//using System;
|
||||
//using System.Collections.Generic;
|
||||
//using System.Linq;
|
||||
//using System.Text;
|
||||
//using System.Threading.Tasks;
|
||||
|
||||
//namespace Beam.Temporary.Cli {
|
||||
// internal class HtmlBook : Document {
|
||||
// public class Keys {
|
||||
// public static DataKey<File> ContentPage => new DataKey<File>("content_page");
|
||||
// public static DataKey<File> NoContentPage => new DataKey<File>("no_content_page");
|
||||
// public static DataKey<File> TitlePage => new DataKey<File>("title_page");
|
||||
// public static DataKey<File> StylesPage => new DataKey<File>("styles_page");
|
||||
// }
|
||||
|
||||
// public List<Tracked<IDocument>> Documents { get; set; }
|
||||
// public IReadOnlyList<string> Pages => _Pages;
|
||||
// private List<string> _Pages { get; set; } = [];
|
||||
|
||||
// private const string EMTPY_PAGE = "EMPTY";
|
||||
|
||||
// public CssData CssData { get; }
|
||||
// public ArticleData BookData { get; set; }
|
||||
// public HtmlBookTemplates Templates { get; set; }
|
||||
|
||||
// public HtmlBook(string bookname, CssData cssData, ArticleData bookData, HtmlBookTemplates templates, List<IDocument>? documents = null, Encoding? encoding = null)
|
||||
// : base(bookname, encoding) {
|
||||
// Documents = [];
|
||||
// CssData = cssData;
|
||||
// BookData = bookData;
|
||||
// Templates = templates;
|
||||
// if (documents is not null)
|
||||
// Documents = documents.Select((x) => new Tracked<IDocument>(x)).ToList();
|
||||
// }
|
||||
|
||||
// public void Update(bool ignoreDirty = false) {
|
||||
// if (!Directory.Exists(Filename))
|
||||
// Directory.CreateDirectory(Filename);
|
||||
|
||||
// //System.IO.File.WriteAllLines(Path.Combine(Filename, "styles.css"), Format())
|
||||
|
||||
// List<string> newpages = [];
|
||||
// if (Pages.Count < Documents.Count)
|
||||
// _Pages.AddRange(Enumerable.Repeat(EMTPY_PAGE, Documents.Count - Pages.Count));
|
||||
// foreach (var (doc, page) in Documents.Zip(Pages)) {
|
||||
// if (!doc.IsDirty)
|
||||
// newpages.Add(page);
|
||||
// else if (doc.TrackedObject.MetaData.Count == 0)
|
||||
// newpages.Add(PlainPage(doc.TrackedObject));
|
||||
// else if (doc.TrackedObject.MetaData.TryGetValue(Program.Architecture.ChapterKey, out var meta) && meta is ArticleData articleData)
|
||||
// newpages.Add(ArticlePage(doc.TrackedObject, articleData));
|
||||
// else {
|
||||
// Console.WriteLine("Unhandlable Metadata detected!");
|
||||
// newpages.Add(PlainPage(doc.TrackedObject));
|
||||
// }
|
||||
|
||||
// System.IO.File.WriteAllText(Path.Combine(Filename, Path.GetRandomFileName() + ".html"), newpages[^1]);
|
||||
// doc.IsDirty = false;
|
||||
// }
|
||||
|
||||
// _Pages = newpages;
|
||||
// }
|
||||
|
||||
// public void UpdateCss() {
|
||||
|
||||
// }
|
||||
|
||||
// public void UpateTitle() {
|
||||
|
||||
// }
|
||||
|
||||
// private string Format(string template, Dictionary<string, string> table) {
|
||||
// ArgumentNullException.ThrowIfNull(template);
|
||||
// ArgumentNullException.ThrowIfNull(table);
|
||||
|
||||
// foreach (var kvp in table) {
|
||||
// template = template.Replace(kvp.Key, kvp.Value);
|
||||
// }
|
||||
// return template;
|
||||
// }
|
||||
|
||||
// private Dictionary<string, string> GetDocumentTable(IDocument doc, bool keepPlaceholders = false) {
|
||||
// var table = new Dictionary<string, string>() {
|
||||
// { "{" + nameof(doc.Filename) + "}", doc.Filename },
|
||||
// { "{Content}", doc.ToString() }
|
||||
// };
|
||||
|
||||
// return SolvePlaceholders(table, keepPlaceholders);
|
||||
// }
|
||||
|
||||
// private Dictionary<string, string> GetArticleDataTable(IDocument doc, ArticleData ad, bool keepPlaceholders = false) {
|
||||
// var table = new Dictionary<string, string>() {
|
||||
// { "{" + nameof(ad.Language) + "}", ad.Language ?? "" },
|
||||
// { "{" + nameof(ad.Authors) + "}", ad.Authors.Aggregate("; ")},
|
||||
// { "{" + nameof(ad.Categories) + "}", ad.Categories.Aggregate("; ") },
|
||||
// { "{" + nameof(ad.Version) + "}", ad.Version ?? "" },
|
||||
// { "{" + nameof(ad.Description) + "}", ad.Description ?? "" },
|
||||
// { "{" + nameof(ad.Name) + "}", ad.Name ?? "" },
|
||||
// { "{" + nameof(doc.Filename) + "}", doc.Filename },
|
||||
// { "{Content}", doc.ToString() }
|
||||
// };
|
||||
|
||||
// return SolvePlaceholders(table, keepPlaceholders);
|
||||
// }
|
||||
|
||||
// private Dictionary<string, string> SolvePlaceholders(Dictionary<string, string> table, bool keepPlaceholders) {
|
||||
// if (keepPlaceholders)
|
||||
// return table.Select(
|
||||
// (x) => new KeyValuePair<string, string>(x.Key, x.Value == "" ? $"{x.Key}" : x.Value))
|
||||
// .ToDictionary();
|
||||
// return table;
|
||||
// }
|
||||
|
||||
// private string PlainPage(IDocument doc, bool keepPlaceholders = false) {
|
||||
// return Format(Templates.ContentPageTemplate, GetDocumentTable(doc, keepPlaceholders));
|
||||
// }
|
||||
|
||||
// private string ArticlePage(IDocument doc, ArticleData data, bool keepPlaceholders = false) {
|
||||
// return Format(Templates.ContentPageTemplate, GetArticleDataTable(doc, data, keepPlaceholders));
|
||||
// }
|
||||
|
||||
// public override byte[] ToBytes() {
|
||||
// throw new NotImplementedException();
|
||||
// }
|
||||
|
||||
// public override string ToString() {
|
||||
// throw new NotImplementedException();
|
||||
// }
|
||||
|
||||
// }
|
||||
//}
|
||||
@@ -0,0 +1,8 @@
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal struct HtmlBookTemplates {
|
||||
public string TitlePageTemplate { get; set; }
|
||||
public string ContentPageTemplate { get; set; }
|
||||
public string CssTemplate { get; set; }
|
||||
public string NoContentTemplate { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
partial interface IArchitecture {
|
||||
private class MainArchitecture : IArchitecture {
|
||||
public MainArchitecture() { }
|
||||
|
||||
public DataKey<IDocumentMetaData> ChapterKey { get; set; } = new("ma:chapter");
|
||||
public DataKey<IDocumentMetaData> BookKey { get; set; } = new("ma:book");
|
||||
|
||||
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, SharedDataDictionary sdd, ILogger? logger = null) {
|
||||
var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
|
||||
var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
|
||||
|
||||
// null checks
|
||||
if (auxiliary is null) // aux is required to get metadata
|
||||
return null;
|
||||
if (piece?.Resource?.MetaTemplateInitialData is null) // sanity check to avoid null warnings
|
||||
return null;
|
||||
|
||||
// gets the link for the novel's metadata using the auxillary data retrieved from the sdd
|
||||
var link = sdd.Templates[auxiliary.Resource.Key].GenerateLink(piece?.Resource?.MetaTemplateInitialData!);
|
||||
var binding = auxiliary.Bindings;
|
||||
|
||||
return new DownloadContext<IDocumentMetaData>(web, [link], downloadLogger: logger, transformer: (x) => {
|
||||
return new ArticleData() {
|
||||
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
|
||||
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
|
||||
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> resKey, SharedDataDictionary sdd, IDocumentMetaData? metaData = null, ILogger? logger = null) {
|
||||
var res = sdd.Novels[resKey].ToRecord(sdd); // retrieves the novel data from the sdd
|
||||
var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd
|
||||
|
||||
if (aggregator is null) // ensure aggergator data was retrieved successfully
|
||||
return null;
|
||||
if (res is null) // ensure novel data was retrieved successfully
|
||||
return null;
|
||||
|
||||
var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator
|
||||
|
||||
// creates a generative enumerable of type link from 'template'
|
||||
var sle = SourceLinkEnumerable.FromGenerator(new DataBackedSourceLinkGenerator(
|
||||
template, res.Resource.TemplateInitialData));
|
||||
|
||||
return new DownloadContext<IDocument>(web, sle,
|
||||
transformer: (x) => {
|
||||
var resolved = aggregator.Bindings.Resolve(x);
|
||||
var articleData = new ArticleData() {
|
||||
Name = OnlineCleaner.Clean(resolved.Title),
|
||||
};
|
||||
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
|
||||
meta.Add(ChapterKey, articleData);
|
||||
if (metaData is not null)
|
||||
meta.Add(BookKey, metaData);
|
||||
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved.Content)) {
|
||||
MetaData = meta
|
||||
};
|
||||
},
|
||||
retryReporter: new Progress<int>((x) => Console.WriteLine($"Retrying download ({x})")),
|
||||
downloadReporter: new Progress<IDocument>((x) => Console.WriteLine($"Downloaded ({x.Filename})")),
|
||||
asyncFailurePredicates: [
|
||||
(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
|
||||
],
|
||||
timeOut: TimeSpan.FromSeconds(15),
|
||||
downloadLogger: logger
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,144 @@
|
||||
|
||||
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
|
||||
internal static class NovelStatics {
|
||||
public static void Define_LightNovelWorld_Novel_TheLegendaryMechanic(SharedDataDictionary sdd) {
|
||||
var lnwAggregator = new DataKey<WebResource>("aeqw89:document:aggregators:light_novel_world");
|
||||
var lnwAuxiliary = new DataKey<WebResource>("aeqw89:document:auxillaries:light_novel_world");
|
||||
var novel = new TextResource() {
|
||||
Key = new DataKey<TextResource>("novels:the_legendary_mechanic"),
|
||||
AssociatedSource = lnwAggregator,
|
||||
AssociatedMetaSource = lnwAuxiliary,
|
||||
TemplateInitialData = ["the-legendary-mechanic-245", "1"],
|
||||
MetaTemplateInitialData = ["the-legendary-mechanic"]
|
||||
};
|
||||
sdd.Novels.TryAdd(novel.Key, novel);
|
||||
|
||||
sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
|
||||
}
|
||||
|
||||
public static void Define_LightNovelWorl_Novel_IAloneLevelUp(SharedDataDictionary sdd) {
|
||||
var lnwAggregator = new DataKey("light_novel_world").ToAggregator().As<WebResource>();
|
||||
var lnwAuxiliary = new DataKey("light_novel_world").ToAuxiliary().As<WebResource>();
|
||||
var novel = new TextResource() {
|
||||
Key = new DataKey<TextResource>("novels:i_alone_level_up"),
|
||||
AssociatedSource = lnwAggregator,
|
||||
AssociatedMetaSource = lnwAuxiliary,
|
||||
TemplateInitialData = ["i-alone-level-up-236", "1"],
|
||||
MetaTemplateInitialData = ["i-alone-level-up-solo-leveling-05122225"]
|
||||
};
|
||||
|
||||
sdd.Novels.TryAdd(novel.Key, novel);
|
||||
|
||||
sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
|
||||
}
|
||||
|
||||
public static void Define_NovelFull(SharedDataDictionary sdd) {
|
||||
var docNamespace = "aeqw89:document";
|
||||
var nfAgg = new DataKey<WebResource>("aggregators:novel_full").WithNamespace(docNamespace);
|
||||
var nfAux = new DataKey<WebResource>("auxillaries:novel_full").WithNamespace(docNamespace);
|
||||
var nfBindings = new DataKey<DataBindings>("aeqw89:bindings:light_novel_world");
|
||||
var aggregator = new WebResource(nfAgg) {
|
||||
Name = "Novel Full",
|
||||
Description = "A novel aggregator site",
|
||||
Domain = "https://novelfull.net",
|
||||
Bindings = nfBindings
|
||||
};
|
||||
var auxiliary = new WebResource(nfAux) {
|
||||
Name = "Novel Full",
|
||||
Description = "A novel aggregator site",
|
||||
Domain = "https://novelfull.net",
|
||||
Bindings = nfBindings.WithSuffix("_aux")
|
||||
};
|
||||
|
||||
sdd.Templates.TryAdd(nfAgg, new() {
|
||||
Template = ""
|
||||
});
|
||||
}
|
||||
|
||||
public static void Define_LightNovelWorld(SharedDataDictionary sdd) {
|
||||
var lnwAggregator = new DataKey<WebResource>("aeqw89:document:aggregators:light_novel_world");
|
||||
var lnwAuxiliary = new DataKey<WebResource>("aeqw89:document:auxillaries:light_novel_world");
|
||||
const string lnwBindingsA = "aeqw89:bindings:light_novel_world";
|
||||
var aggregator = new WebResource(lnwAggregator) {
|
||||
Name = "Light Novel World",
|
||||
Description = "A novel aggregator site maintained by NetherClaw",
|
||||
Domain = "https://www.lightnovelworld.co",
|
||||
Bindings = new DataKey<DataBindings>(lnwBindingsA)
|
||||
};
|
||||
const string lnwBindingsB = "aeqw89:bindings:light_novel_world_aux";
|
||||
var auxiliary = new WebResource(lnwAuxiliary) {
|
||||
Name = "Light Novel World",
|
||||
Description = "A novel aggregator site maintained by NetherClaw",
|
||||
Domain = "https://www.lightnovelworld.co",
|
||||
Bindings = new DataKey<DataBindings>(lnwBindingsB)
|
||||
};
|
||||
|
||||
sdd.Templates.TryAdd(lnwAuxiliary, new() {
|
||||
Template = "https://www.lightnovelworld.co/novel/{0}",
|
||||
IndexOfChapterIndex = -1
|
||||
});
|
||||
sdd.Templates.TryAdd(lnwAggregator, new() {
|
||||
Template = "https://www.lightnovelworld.co/novel/{0}/chapter-{1}",
|
||||
IndexOfChapterIndex = 1
|
||||
});
|
||||
|
||||
sdd.Aggregators.TryAdd(aggregator.Key, aggregator);
|
||||
sdd.Auxillaries.TryAdd(auxiliary.Key, auxiliary);
|
||||
|
||||
var lnwBindings = new DataKey<DataBindings>(lnwBindingsA);
|
||||
var lnwBindingsAux = new DataKey<DataBindings>(lnwBindingsB);
|
||||
sdd.Bindings.TryAdd(lnwBindings, new DataBindings() {
|
||||
Title = new Binding("aeqw89:binding:light_novel_world:title") {
|
||||
XPath = "/html/body/main/article/section/div[1]/h1/span[2]",
|
||||
Type = BindingType.Single
|
||||
},
|
||||
Content = new("aeqw89:binding:light_novel_world:content") {
|
||||
Provider = new ParagraphedContentDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "//*[@id=\"chapter-container\"]"
|
||||
}
|
||||
},
|
||||
Type = BindingType.UseProvider
|
||||
},
|
||||
});
|
||||
sdd.Bindings.TryAdd(lnwBindingsAux, new DataBindings() {
|
||||
Title = new("aeqw89:binding:light_novel_world_aux:title") {
|
||||
XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/h1",
|
||||
Type = BindingType.Single
|
||||
},
|
||||
Authors = new("aeqw89:binding:light_novel_world_aux:authors") {
|
||||
XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/div[1]/a",
|
||||
Type = BindingType.Single
|
||||
},
|
||||
Description = new("aeqw89:binding:light_novel_world_aux:description") {
|
||||
Provider = new ParagraphedContentDataProvider() {
|
||||
Content = new() {
|
||||
XPath = "/html/body/main/article/div/section/div[1]/div"
|
||||
}
|
||||
},
|
||||
Type = BindingType.UseProvider
|
||||
},
|
||||
Tags = new("aeqw89:binding:light_novel_world_aux:tags") {
|
||||
Provider = new ListContentDataProvider() {
|
||||
Content = new() {
|
||||
XPath = "/html/body/main/article/header/div[2]/div[2]/div[3]/ul"
|
||||
}
|
||||
},
|
||||
Type = BindingType.UseProvider
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
using aeqw89.PersistentData;
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Text.Json.Serialization.Metadata;
|
||||
using Beam.Temporary.Cli.Templates.Classic;
|
||||
using Beam.Exports;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal class Program {
|
||||
|
||||
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
|
||||
|
||||
public static SharedDataDictionary Shared { get; set; } = [];
|
||||
|
||||
public static IArchitecture Architecture = IArchitecture.Default;
|
||||
|
||||
const string SharedDataPath = "data/.dat";
|
||||
|
||||
static async Task Main(string[] args) {
|
||||
ConversionOptions.Converters.AddPersistentDataRequiredConverters();
|
||||
ConversionOptions.WriteIndented = true;
|
||||
|
||||
var web = new HtmlWeb();
|
||||
|
||||
var lf = LoggerFactory.Create((x) => {
|
||||
x.AddConsole();
|
||||
});
|
||||
|
||||
ILogger logger = lf
|
||||
.CreateLogger("Program");
|
||||
|
||||
await using var sharedContext = await DataDictionaryContext<SharedDataDictionary>.Create(
|
||||
SharedDataPath,
|
||||
DataKind.Shared,
|
||||
logger,
|
||||
ConversionOptions
|
||||
);
|
||||
|
||||
Shared = sharedContext.Data;
|
||||
|
||||
Shared.Clear();
|
||||
NovelStatics.Define_LightNovelWorld(Shared);
|
||||
NovelStatics.Define_LightNovelWorld_Novel_TheLegendaryMechanic(Shared);
|
||||
NovelStatics.Define_LightNovelWorl_Novel_IAloneLevelUp(Shared);
|
||||
ClassicTemplates.Register(Shared);
|
||||
|
||||
var novel = new DataKey<TextResource>("novels:i_alone_level_up");
|
||||
var context_aux = Architecture.GetMeta(web, novel, Shared);
|
||||
var metaDownloader = new DownloadEnumerable<IDocumentMetaData>(
|
||||
new SequentialFragmentDownloader<IDocumentMetaData>(
|
||||
context_aux,
|
||||
(c) => new UnitFragmentDownloader<IDocumentMetaData>(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger),
|
||||
logger)
|
||||
.UnwrapFragmented());
|
||||
var metadata = (await metaDownloader.FirstAsync());
|
||||
|
||||
var context = Architecture.GetTextRecord(web, novel, Shared, metadata.Data);
|
||||
context.DownloadReporter = new Progress<IDocument>((x) => Console.WriteLine(x.Filename));
|
||||
var downloader = new DownloadEnumerable<IDocument>(
|
||||
new SequentialFragmentDownloader<IDocument>(
|
||||
context,
|
||||
(c) => new UnitFragmentDownloader<IDocument>(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger),
|
||||
logger)
|
||||
.UnwrapFragmented());
|
||||
|
||||
List<Ordered<IDocument>> documents = [];
|
||||
|
||||
await foreach (var download in downloader.Take(20)) {
|
||||
if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
|
||||
continue;
|
||||
if (meta is not ArticleData articleMetaData)
|
||||
continue;
|
||||
//Console.WriteLine($"Title: {data.Name}");
|
||||
//Console.WriteLine($"Description: {data.Description}");
|
||||
//Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
|
||||
//Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
|
||||
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
||||
//Console.WriteLine($"Content: {download}");
|
||||
|
||||
documents.Add(download);
|
||||
}
|
||||
|
||||
string testDir = Path.Combine("txt", Path.GetRandomFileName());
|
||||
Directory.CreateDirectory(testDir);
|
||||
|
||||
int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
|
||||
foreach (var document in documents.OrderBy((x) => x.Order)) {
|
||||
document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
|
||||
Dictionary<string, string> linkButtons = new();
|
||||
if (document.Order != 0)
|
||||
linkButtons.Add("Previous", $"{document.Order - 1}.html");
|
||||
if (document.Order != len)
|
||||
linkButtons.Add("Next", $"{document.Order + 1}.html");
|
||||
new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
|
||||
}
|
||||
|
||||
Console.ReadKey();
|
||||
|
||||
//foreach (var download in documents.OrderBy((x) => x.Order)) {
|
||||
// if (download.Data.TryGetTaggedMetaData<ArticleData>(Architecture.ChapterKey, out var meta))
|
||||
// Console.WriteLine($"{download.Order}:{meta.Name}");
|
||||
//}
|
||||
|
||||
//string[] templates = new DataKey<File>[] {
|
||||
// HtmlBook.Keys.ContentPage,
|
||||
// HtmlBook.Keys.NoContentPage,
|
||||
// HtmlBook.Keys.TitlePage,
|
||||
// HtmlBook.Keys.StylesPage,
|
||||
//}.Select(
|
||||
// (x) => Shared.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
|
||||
//).ToArray();
|
||||
|
||||
//HtmlBook book = new(
|
||||
// bookname: Path.Combine(Path.GetRandomFileName(), "I Alone Level Up"),
|
||||
// new CssData(),
|
||||
// new ArticleData(),
|
||||
// new HtmlBookTemplates() {
|
||||
// ContentPageTemplate = templates[0],
|
||||
// NoContentTemplate = templates[1],
|
||||
// TitlePageTemplate = templates[2],
|
||||
// CssTemplate = templates[3],
|
||||
// },
|
||||
// documents: documents.Select((x) => x.Data).ToList()
|
||||
//);
|
||||
|
||||
//book.Update();
|
||||
//Console.WriteLine("One variable!");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
using aeqw89.PersistentData;
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class SharedDataDictionary : BaseDataDictionary {
|
||||
public Dictionary<DataKey<WebResource>, PackagedSourceLinkGenerationData> Templates {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, PackagedSourceLinkGenerationData>(nameof(Templates));
|
||||
set => Data[nameof(Templates)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, WebResource> Aggregators {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Aggregators));
|
||||
set => Data[nameof(Aggregators)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, WebResource> Auxillaries {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Auxillaries));
|
||||
set => Data[nameof(Auxillaries)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<DataBindings>, DataBindings> Bindings {
|
||||
get => GetOrCreateDictionary<DataKey<DataBindings>, DataBindings>(nameof(Bindings));
|
||||
set => Data[nameof(Bindings)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, HashSet<DataKey<TextResource>>> AggregatorNovels {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, HashSet<DataKey<TextResource>>>(nameof(AggregatorNovels));
|
||||
set => Data[nameof(AggregatorNovels)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<TextResource>, TextResource> Novels {
|
||||
get => GetOrCreateDictionary<DataKey<TextResource>, TextResource>(nameof(Novels));
|
||||
set => Data[nameof(Novels)] = value;
|
||||
}
|
||||
|
||||
internal Dictionary<DataKey<File>, File> Files {
|
||||
get => GetOrCreateDictionary<DataKey<File>, File>(nameof(Files));
|
||||
set => Data[nameof(Files)] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public static class StringExtensions {
|
||||
public static string Aggregate(this IEnumerable<string> str, string separator) {
|
||||
if (!str.Any())
|
||||
return string.Empty;
|
||||
return str.Aggregate((x, y) => $"{x}{separator}{y}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli.Templates.Classic {
|
||||
internal class ClassicTemplates {
|
||||
public static void Register(SharedDataDictionary sdd) {
|
||||
sdd.Files.TryAdd(
|
||||
new("aeqw89:files:templates:classic:content_page"),
|
||||
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Content.template.html", "htmlpage", "templates"));
|
||||
sdd.Files.TryAdd(
|
||||
new("aeqw89:files:templates:classic:title_page"),
|
||||
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Title.template.html", "htmlpage", "templates"));
|
||||
sdd.Files.TryAdd(
|
||||
new("aeqw89:files:templates:classic:styles_page"),
|
||||
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Styles.template.css", "styles", "templates"));
|
||||
sdd.Files.TryAdd(
|
||||
new("aeqw89:files:templates:classic:no_content_page"),
|
||||
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\NoContent.template.html", "htmlpage", "templates"));
|
||||
}
|
||||
}
|
||||
|
||||
internal static class DictionaryOfFileExtensions {
|
||||
public static string ReadToString<T>(this Dictionary<T, File> dict, T key) where T: notnull {
|
||||
return System.IO.File.ReadAllText(dict[key].Path);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>{Name}</title>
|
||||
<link rel="stylesheet" href="styles.css">
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>{Name}</h1>
|
||||
<p><em>{Description}</em></p>
|
||||
<div>
|
||||
<span><strong>Authors:</strong> {Authors}</span> |
|
||||
<span><strong>Language:</strong> {Language}</span> |
|
||||
<span><strong>Categories:</strong> {Categories}</span> |
|
||||
<span><strong>Version:</strong> {Version}</span>
|
||||
</div>
|
||||
</header>
|
||||
<article>
|
||||
{Content}
|
||||
</article>
|
||||
<div class="navigation">
|
||||
<button id="prev">Previous</button>
|
||||
<button id="next">Next</button>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,15 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>404 - Not Found</title>
|
||||
<link rel="stylesheet" href="styles.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="error-container">
|
||||
<h1>404 - Content Not Found</h1>
|
||||
<p>The file <strong>{Filename}</strong> was not found.</p>
|
||||
<p>{Content}</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,60 @@
|
||||
/* styles.css */
|
||||
/* Placeholders:
|
||||
{PrimaryColor}, {SecondaryColor}, {TertiaryColor}, {ButtonColor},
|
||||
{ForegroundColor}, {ContentFont}, {ContentFontSize}, {TitleFont}, {TitleFontSize}
|
||||
*/
|
||||
body {
|
||||
font-family: {ContentFont};
|
||||
font-size: {ContentFontSize};
|
||||
background-color: {PrimaryColor};
|
||||
color: {ForegroundColor};
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
header {
|
||||
background-color: {SecondaryColor};
|
||||
padding: 20px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
header h1 {
|
||||
font-family: {TitleFont};
|
||||
font-size: {TitleFontSize};
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
header p {
|
||||
font-style: italic;
|
||||
margin: 5px 0;
|
||||
}
|
||||
|
||||
section, article, nav {
|
||||
background: {TertiaryColor};
|
||||
padding: 15px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
margin: 20px auto;
|
||||
max-width: 800px;
|
||||
}
|
||||
|
||||
.navigation {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
max-width: 800px;
|
||||
margin: 20px auto;
|
||||
}
|
||||
|
||||
button {
|
||||
background-color: {ButtonColor};
|
||||
color: {ForegroundColor};
|
||||
border: none;
|
||||
padding: 10px 20px;
|
||||
cursor: pointer;
|
||||
font-size: {ContentFontSize};
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
nav h2 {
|
||||
margin-top: 0;
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>{Name}</title>
|
||||
<link rel="stylesheet" href="styles.css">
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>{Name}</h1>
|
||||
<p><em>{Description}</em></p>
|
||||
</header>
|
||||
<section>
|
||||
<div><strong>Authors:</strong> {Authors}</div>
|
||||
<div><strong>Language:</strong> {Language}</div>
|
||||
<div><strong>Categories:</strong> {Categories}</div>
|
||||
<div><strong>Version:</strong> {Version}</div>
|
||||
</section>
|
||||
<nav>
|
||||
<h2>Table of Contents</h2>
|
||||
<ul>
|
||||
{TOC} <!-- Expected to be a list of items (e.g. <li>Chapter 1</li>, etc.) -->
|
||||
</ul>
|
||||
</nav>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,26 @@
|
||||
|
||||
|
||||
using aeqw89.DataKeys;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class TextResource : IKeyed<TextResource> {
|
||||
public required DataKey<TextResource> Key { get; set; }
|
||||
public DataKey<WebResource>? AssociatedSource { get; set; }
|
||||
public DataKey<WebResource>? AssociatedMetaSource { get; set; }
|
||||
public required string[] TemplateInitialData { get; set; }
|
||||
public string?[]? MetaTemplateInitialData { get; set; }
|
||||
|
||||
public TextResourceRecord ToRecord(SharedDataDictionary sdd) {
|
||||
return new(this,
|
||||
AssociatedSource is null ? null : sdd.Aggregators[AssociatedSource],
|
||||
AssociatedMetaSource is null ? null : sdd.Auxillaries[AssociatedMetaSource]);
|
||||
}
|
||||
}
|
||||
|
||||
public record TextResourceRecord(TextResource Resource, WebResource? AssociatedSource, WebResource? AssociatedMetaSource);
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal class Tracked<T>(T obj) {
|
||||
public T TrackedObject { get; set; } = obj;
|
||||
public bool IsDirty { get; set; } = true;
|
||||
|
||||
public Tracked<T> SetDirty() {
|
||||
IsDirty = true;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
using aeqw89.PersistentData;
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class WebResource(DataKey<WebResource> key) : IKeyed<WebResource> {
|
||||
public DataKey<WebResource> Key { get; set; } = key;
|
||||
|
||||
public required DataKey<DataBindings> Bindings { get; set; }
|
||||
public string? Name { get; set; }
|
||||
public string? Domain { get; set; }
|
||||
public string? Description { get; set; }
|
||||
|
||||
|
||||
public WebResource() : this(new(string.Empty)) { }
|
||||
|
||||
public WebResourceRecord ToRecord(SharedDataDictionary sdd) {
|
||||
return new WebResourceRecord(this, sdd.Bindings[Bindings]);
|
||||
}
|
||||
}
|
||||
|
||||
public record WebResourceRecord(WebResource Resource, DataBindings Bindings);
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.12.35506.116
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam", "Beam\Beam.csproj", "{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Temporary.Cli", "Beam.Temporary.Cli\Beam.Temporary.Cli.csproj", "{8F650BBA-3800-4B5E-A6FF-9057633601EE}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Dynamic", "Beam.Dynamic\Beam.Dynamic.csproj", "{DDEABE82-096C-4799-87F1-56F494D35FAA}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Exports", "Beam.Exports\Beam.Exports.csproj", "{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{3BC9A070-85B0-405D-A6F8-D0AEEE625B81}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{8F650BBA-3800-4B5E-A6FF-9057633601EE}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{DDEABE82-096C-4799-87F1-56F494D35FAA}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
@@ -0,0 +1,21 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public class ArticleData : IDocumentMetaData {
|
||||
public string? Name { get; set; }
|
||||
public string[] Authors { get; set; } = [];
|
||||
public string? Language { get; set; }
|
||||
public string[] Categories { get; set; } = [];
|
||||
public string? Version { get; set; }
|
||||
public string? Description { get; set; }
|
||||
|
||||
public string AsJson(JsonSerializerOptions? options = null) {
|
||||
return JsonSerializer.Serialize(this, options);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="FluentBuilder" Version="0.10.0">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Reference Include="aeqw89.DataKeys">
|
||||
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
|
||||
</Reference>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,15 @@
|
||||
using System.Text;
|
||||
|
||||
namespace Beam {
|
||||
internal class ByteDocument(string filename, byte[] content, Encoding? encoding = null) : Document(filename, encoding) {
|
||||
public byte[] Content { get; set; } = content;
|
||||
|
||||
public override byte[] ToBytes() {
|
||||
return Content;
|
||||
}
|
||||
|
||||
public override string ToString() {
|
||||
return Encoding.GetString(Content);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public class DataBackedSourceLinkGenerator(PackagedSourceLinkGenerationData data, params object[] initialState) : DelegateBackedSourceLinkGenerator(data.GenerateLink, data.GetBehaviour(), initialState) {}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public delegate DocumentSourceLink LinkGenerator(params object[] ps);
|
||||
public delegate object Incrementor(object obj, int amount);
|
||||
|
||||
public class DelegateBackedSourceLinkGenerator : IEnumerator<DocumentSourceLink> {
|
||||
public LinkGenerator Generator { get; set; }
|
||||
public IncrementationBehaviour Behaviour { get; }
|
||||
private object[] InitialState;
|
||||
|
||||
public DelegateBackedSourceLinkGenerator(LinkGenerator generator, IncrementationBehaviour behaviour, params object[] initialState) {
|
||||
Generator = generator;
|
||||
Behaviour = behaviour;
|
||||
InitialState = (object[])initialState.Clone();
|
||||
State = (object[])initialState.Clone();
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
public object[] State { get; set; }
|
||||
public DocumentSourceLink Current { get; private set; }
|
||||
|
||||
object IEnumerator.Current => Current;
|
||||
|
||||
public void Dispose() {
|
||||
return;
|
||||
}
|
||||
|
||||
public bool MoveNext() {
|
||||
Behaviour.Apply(State, 1);
|
||||
Current = Generator(State);
|
||||
return Current.HasValue;
|
||||
}
|
||||
|
||||
public void Reset() {
|
||||
State = (object[])InitialState.Clone();
|
||||
Behaviour.Apply(State, -1);
|
||||
Current = Generator(State);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
using aeqw89.DataKeys;
|
||||
using System.Text;
|
||||
|
||||
namespace Beam {
|
||||
public abstract class Document(string filename, Encoding? encoding = null) : IDocument {
|
||||
public string Filename { get; set; } = filename;
|
||||
public Encoding Encoding { get; set; } = encoding ?? Encoding.UTF8;
|
||||
public Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> MetaData { get; set; } = [];
|
||||
|
||||
public abstract byte[] ToBytes();
|
||||
public override abstract string ToString();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
/// <summary>
|
||||
/// Holds a collection of <see cref="IDocument"/> objects in memory to facilitate lazy loading
|
||||
/// </summary>
|
||||
public class DocumentCache : Dictionary<object, IDocument>, IDisposable {
|
||||
private bool disposedValue;
|
||||
|
||||
/// <summary>
|
||||
/// Calculates memory usage and checks if it does not exceed a certain limit
|
||||
/// </summary>
|
||||
/// <param name="allocatedSpaceInBytes">The memory limit</param>
|
||||
/// <returns></returns>
|
||||
public bool IsCapacityLessThan(int allocatedSpaceInBytes) {
|
||||
return this.Count < CalculateMemorySpaceUsage();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets an estimate of the space used by the IDocument objects (disregarding metadata) in bytes.
|
||||
/// </summary>
|
||||
/// <returns>Estimated memory usage in bytes</returns>
|
||||
public long CalculateMemorySpaceUsage() {
|
||||
return this.Select((x) => (x.Value.ToBytes().LongLength)).Aggregate((x, y) => x + y);
|
||||
}
|
||||
|
||||
protected virtual void Dispose(bool disposing) {
|
||||
if (!disposedValue) {
|
||||
if (disposing) {
|
||||
// TODO: dispose managed state (managed objects)
|
||||
this.Clear();
|
||||
}
|
||||
|
||||
// TODO: free unmanaged resources (unmanaged objects) and override finalizer
|
||||
// TODO: set large fields to null
|
||||
disposedValue = true;
|
||||
}
|
||||
}
|
||||
|
||||
// // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources
|
||||
// ~DocumentCache()
|
||||
// {
|
||||
// // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
||||
// Dispose(disposing: false);
|
||||
// }
|
||||
|
||||
public void Dispose() {
|
||||
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
||||
Dispose(disposing: true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public readonly struct DocumentSourceLink(string link) {
|
||||
private readonly string Link_ { get; } = link;
|
||||
public readonly Uri Link => new(Link_);
|
||||
|
||||
public bool HasValue => !string.IsNullOrWhiteSpace(Link_);
|
||||
|
||||
public static DocumentSourceLink InvalidLink { get; } = new("https://invalid.link");
|
||||
|
||||
public static bool operator ==(DocumentSourceLink lhs, DocumentSourceLink rhs) {
|
||||
return lhs.Link == rhs.Link;
|
||||
}
|
||||
public static bool operator !=(DocumentSourceLink lhs, DocumentSourceLink rhs) {
|
||||
return lhs.Link != rhs.Link;
|
||||
}
|
||||
|
||||
public override bool Equals(object? obj) {
|
||||
return GetHashCode() == obj?.GetHashCode();
|
||||
}
|
||||
|
||||
public override int GetHashCode() {
|
||||
return Link.GetHashCode();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using FluentBuilder;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam {
|
||||
public delegate T HtmlTransformer<out T>(HtmlDocument doc);
|
||||
public delegate Task<T> AsyncHtmlTransformer<T>(HtmlDocument doc);
|
||||
|
||||
public class DownloadContext<T> : IDisposable {
|
||||
private bool disposedValue;
|
||||
|
||||
public HtmlWeb Web { get; }
|
||||
public HtmlTransformer<T> Transformer { get; }
|
||||
public AsyncHtmlTransformer<T> AsyncTranformer { get; }
|
||||
public IProgress<T>? DownloadReporter { get; set; }
|
||||
public IProgress<int>? RetryReporter { get; set; }
|
||||
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? AsyncFailurePredicates { get; }
|
||||
public TimeSpan TimeOut { get; set; }
|
||||
public IEnumerable<DocumentSourceLink> Links { get; }
|
||||
public CancellationToken CancellationToken { get; }
|
||||
public DocumentCache Cache { get; private set; } = [];
|
||||
public ILogger? DownloadLogger { get; set; }
|
||||
|
||||
public DownloadContext(HtmlWeb web,
|
||||
IEnumerable<DocumentSourceLink> links,
|
||||
CancellationToken cancellationToken = default,
|
||||
HtmlTransformer<T>? transformer = null,
|
||||
AsyncHtmlTransformer<T>? asyncTransformer = null,
|
||||
IProgress<T>? downloadReporter = null,
|
||||
IProgress<int>? retryReporter = null,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>?[]? asyncFailurePredicates = null,
|
||||
TimeSpan? timeOut = null,
|
||||
ILogger? downloadLogger = null) {
|
||||
ArgumentNullException.ThrowIfNull(web, nameof(web));
|
||||
ArgumentNullException.ThrowIfNull(links, nameof(links));
|
||||
|
||||
Web = web;
|
||||
Links = links;
|
||||
CancellationToken = cancellationToken;
|
||||
if (transformer is null && asyncTransformer is null)
|
||||
throw new ArgumentException($"Either {nameof(transformer)} or {nameof(asyncTransformer)} must be not null.");
|
||||
|
||||
Transformer = transformer!;
|
||||
AsyncTranformer = asyncTransformer!;
|
||||
if (transformer is null && asyncTransformer is not null)
|
||||
Transformer = (x) => asyncTransformer(x).Result;
|
||||
if (asyncTransformer is null && transformer is not null)
|
||||
AsyncTranformer = (x) => Task.FromResult(transformer(x));
|
||||
|
||||
DownloadReporter = downloadReporter;
|
||||
RetryReporter = retryReporter;
|
||||
AsyncFailurePredicates = asyncFailurePredicates;
|
||||
TimeOut = timeOut ?? TimeSpan.FromMinutes(1);
|
||||
DownloadLogger = downloadLogger;
|
||||
}
|
||||
|
||||
protected virtual void Dispose(bool disposing) {
|
||||
if (!disposedValue) {
|
||||
if (disposing) {
|
||||
// TODO: dispose managed state (managed objects)
|
||||
Cache = null;
|
||||
}
|
||||
|
||||
// TODO: free unmanaged resources (unmanaged objects) and override finalizer
|
||||
// TODO: set large fields to null
|
||||
disposedValue = true;
|
||||
}
|
||||
}
|
||||
|
||||
// // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources
|
||||
// ~DownloadContext()
|
||||
// {
|
||||
// // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
||||
// Dispose(disposing: false);
|
||||
// }
|
||||
|
||||
public void Dispose() {
|
||||
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
||||
Dispose(disposing: true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public class DownloadEnumerable<T>(IAsyncEnumerator<Ordered<T>> download) : IAsyncEnumerable<Ordered<T>> {
|
||||
public IAsyncEnumerator<Ordered<T>> Download { get; } = download;
|
||||
|
||||
public IAsyncEnumerator<Ordered<T>> GetAsyncEnumerator(CancellationToken cancellationToken = default)
|
||||
=> Download;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace Beam {
|
||||
public sealed class Fragment<T>(int size) {
|
||||
public int Size => FragmentBag.Count;
|
||||
public int MaxSize { get; } = size;
|
||||
private ConcurrentBag<T> FragmentBag { get; set; } = new();
|
||||
public bool TryTake([NotNullWhen(true)] out T? shard) {
|
||||
return FragmentBag.TryTake(out shard) && shard != null;
|
||||
}
|
||||
|
||||
private bool? Complete = false;
|
||||
public bool IsComplete => Complete ?? Size == MaxSize;
|
||||
|
||||
private bool UpdaterLocked = false;
|
||||
|
||||
public static bool TryAcquireUpdater(Fragment<T> fragment, [NotNullWhen(true)] out Action<T>? updater) {
|
||||
updater = null;
|
||||
if (Interlocked.CompareExchange(ref fragment.UpdaterLocked, true, false) == true)
|
||||
// equivalent to : fragment.UpdaterLocked == true, side-effect: sets fragment.UpdaterLocked to true
|
||||
return false;
|
||||
updater = fragment.FragmentBag.Add;
|
||||
return true;
|
||||
}
|
||||
public static bool TryReleaseUpdater(Fragment<T> fragment, Action<T> updater) {
|
||||
if (updater == fragment.FragmentBag.Add) {
|
||||
Interlocked.Exchange(ref fragment.UpdaterLocked, false);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
public static void SetComplete(Fragment<T> fragment, bool status) {
|
||||
fragment.Complete = status;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
namespace Beam {
|
||||
public static class IAsyncEnumeratorExtensions {
|
||||
public static async IAsyncEnumerator<T> UnwrapFragmented<T>(this IAsyncEnumerator<Fragment<T>> fragmented) {
|
||||
if (fragmented is null)
|
||||
throw new ArgumentNullException();
|
||||
try {
|
||||
while(await fragmented.MoveNextAsync().ConfigureAwait(false)) {
|
||||
if (fragmented.Current is null)
|
||||
yield break;
|
||||
if (!fragmented.Current.IsComplete)
|
||||
yield break;
|
||||
while (fragmented.Current.TryTake(out var item))
|
||||
if (item is null)
|
||||
yield break;
|
||||
else
|
||||
yield return item;
|
||||
}
|
||||
} finally {
|
||||
await fragmented.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
using aeqw89.DataKeys;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace Beam {
|
||||
public interface IDocument {
|
||||
/// <summary>
|
||||
/// The file name of the document. Must be valid in both <c>UNIX</c>,
|
||||
/// <c>WINDOWS</c>, <c>APPLE</c>, and <c>ANDROID</c> file systems.
|
||||
/// </summary>
|
||||
string Filename { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Additional descriptive data
|
||||
/// </summary>
|
||||
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> MetaData { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves the binary representation for the <see cref="IDocument"/>
|
||||
/// </summary>
|
||||
/// <returns>Binary representation of the <see cref="IDocument"/></returns>
|
||||
byte[] ToBytes();
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves the string representation for the <see cref="IDocument"/>
|
||||
/// </summary>
|
||||
/// <returns>String representation of the <see cref="IDocument"/></returns>
|
||||
string ToString();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace Beam {
|
||||
public static class IDocumentExtensions {
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
using System.Text.Json;
|
||||
|
||||
namespace Beam {
|
||||
public interface IDocumentMetaData {
|
||||
string AsJson(JsonSerializerOptions? options = null);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
namespace Beam {
|
||||
internal interface IDocumentSourceLinkFactory {
|
||||
DocumentSourceLink GetNextLink(DocumentSourceLink current);
|
||||
DocumentSourceLink GetPrecedingLink(DocumentSourceLink current);
|
||||
DocumentSourceLink GetArbitraryLink(DocumentSourceLink current, int offset) => offset switch {
|
||||
0 => current,
|
||||
> 0 => GetArbitraryLink(GetNextLink(current), offset - 1),
|
||||
< 0 => GetArbitraryLink(GetPrecedingLink(current), offset + 1)
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace Beam {
|
||||
public interface IUnitDownloader<T> {
|
||||
public int LinksPerDownload { get; }
|
||||
public Task<(bool, T?)> TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress<int>? tryProgress = null);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
namespace Beam {
|
||||
/// <summary>
|
||||
/// Defines how a url template should should be updated, in what order, and by how much
|
||||
/// </summary>
|
||||
public struct IncrementationBehaviour {
|
||||
public Dictionary<int, Incrementor> Map { get; set; }
|
||||
|
||||
public readonly void Apply(object[] objects, int amount) {
|
||||
foreach(var (i, inc) in Map) {
|
||||
if (i < objects.Length)
|
||||
objects[i] = inc(objects[i], amount)?.ToString();
|
||||
else
|
||||
throw new S.MapException(S.M.MapDoesNotMatchArgs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
namespace Beam {
|
||||
public struct PackagedSourceLinkGenerationData {
|
||||
public string Template { get; set; }
|
||||
public int IndexOfChapterIndex { get; set; }
|
||||
|
||||
public readonly DocumentSourceLink GenerateLink(params object[] ps)
|
||||
=> new(string.Format(Template, ps));
|
||||
public IncrementationBehaviour GetBehaviour() {
|
||||
return new IncrementationBehaviour() {
|
||||
Map = new Dictionary<int, Incrementor>() { {
|
||||
IndexOfChapterIndex,
|
||||
(x, i) => int.Parse(x.ToString() ?? throw new ArgumentException()) + i
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
using HtmlAgilityPack;
|
||||
using System.Collections;
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace Beam {
|
||||
public record Ordered<T>(T Data, int Order);
|
||||
[Obsolete("Use chunk downloader instead.")]
|
||||
public class ParallelDownloader<T>(DownloadContext<T> context, int maximumConcurrentDownloads = 4) : IAsyncEnumerator<Ordered<T>> {
|
||||
|
||||
public DownloadContext<T> Context { get; } = context;
|
||||
public int MaximumConcurrentDownloads { get; } = maximumConcurrentDownloads;
|
||||
|
||||
private Task? CacheFiller { get; set; }
|
||||
private int Count = 0;
|
||||
private ConcurrentBag<Ordered<T>> Cache { get; set; } = [];
|
||||
public Ordered<T> Current { get; set; }
|
||||
|
||||
private UnitDownloader<T> GetUnitDownloader()
|
||||
=> new(Context.Web, Context.AsyncTranformer, Context.AsyncFailurePredicates);
|
||||
private ParallelOptions GetOptions()
|
||||
=> new() {
|
||||
CancellationToken = Context.CancellationToken,
|
||||
MaxDegreeOfParallelism = MaximumConcurrentDownloads
|
||||
};
|
||||
|
||||
private async Task FillCache() {
|
||||
List<Ordered<DocumentSourceLink>> chunk = [];
|
||||
int i = 0;
|
||||
foreach (var link in Context.Links.Take(MaximumConcurrentDownloads * 2))
|
||||
chunk.Add(new Ordered<DocumentSourceLink>(link, i++));
|
||||
Console.WriteLine(chunk.Select((x) => $"{x.Order}: {x.Data.Link}").Aggregate((x, y) => $"{x}\n{y}"));
|
||||
var unitDownloader = GetUnitDownloader();
|
||||
int downloadedCount = 0;
|
||||
|
||||
await Parallel.ForEachAsync(chunk, GetOptions(), async (x, ct) => {
|
||||
var (result, doc) = await unitDownloader.TryDownload([new Ordered<string>(x.Data.Link.ToString(), x.Order)], ct, tryProgress: Context.RetryReporter);
|
||||
if (!result || doc is null) {
|
||||
Console.WriteLine($"FAILED to download {x.Data.Link}");
|
||||
return;
|
||||
}
|
||||
Cache.Add(new(doc, x.Order));
|
||||
Context.DownloadReporter?.Report(doc);
|
||||
Interlocked.Increment(ref downloadedCount);
|
||||
Interlocked.Increment(ref Count);
|
||||
});
|
||||
|
||||
Console.WriteLine("Downloaded Chunk");
|
||||
CacheFiller = null;
|
||||
}
|
||||
|
||||
public async ValueTask<bool> MoveNextAsync() {
|
||||
TimeSpan waited = TimeSpan.Zero;
|
||||
TimeSpan delta = TimeSpan.FromSeconds(0.01);
|
||||
while(waited < Context.TimeOut) {
|
||||
if (Cache.Count < MaximumConcurrentDownloads && CacheFiller is null) // strange
|
||||
CacheFiller ??= FillCache();
|
||||
|
||||
Cache.TryTake(out var k);
|
||||
if (k is not null) {
|
||||
Current = k;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
waited += delta;
|
||||
await Task.Delay(delta);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public ValueTask DisposeAsync() {
|
||||
GC.SuppressFinalize(this);
|
||||
return ValueTask.CompletedTask;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace Beam {
|
||||
internal partial class RegexGenerated {
|
||||
[GeneratedRegex("(?<!{){\\d*}(?!})")]
|
||||
public static partial Regex CurlyBracketedParameters();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
internal class S {
|
||||
|
||||
[Serializable]
|
||||
public class MapException : ArgumentException {
|
||||
public MapException() { }
|
||||
public MapException(string message) : base(message) { }
|
||||
public MapException(string message, Exception inner) : base(message, inner) { }
|
||||
protected MapException(
|
||||
System.Runtime.Serialization.SerializationInfo info,
|
||||
System.Runtime.Serialization.StreamingContext context) : base(info, context) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The kind of exception that should never happen
|
||||
/// </summary>
|
||||
[Serializable]
|
||||
public class AssertionException : Exception {
|
||||
public AssertionException() { }
|
||||
public AssertionException(string message) : base(message) { }
|
||||
public AssertionException(string message, Exception inner) : base(message, inner) { }
|
||||
protected AssertionException(
|
||||
System.Runtime.Serialization.SerializationInfo info,
|
||||
System.Runtime.Serialization.StreamingContext context) : base(info, context) { }
|
||||
}
|
||||
|
||||
public class M {
|
||||
public const string MapDoesNotMatchArgs = "Error; Map contains indicies that exceed the argument list passed.";
|
||||
public const string NewFragmentShouldBeFree = "Assertion Error: Could not acquire lock of newly created fragment";
|
||||
public const string LinksCannotBeEmpty = "Cannot construct downloader with empty links collection!";
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace Beam {
|
||||
public class SequentialFragmentDownloader<T> : SequentialDownloader<T, Fragment<Ordered<T>>> {
|
||||
public SequentialFragmentDownloader(
|
||||
DownloadContext<T> context,
|
||||
Func<DownloadContext<T>, IUnitDownloader<Fragment<Ordered<T>>>> getUnitDownloader,
|
||||
ILogger? logger = null)
|
||||
: base(context, getUnitDownloader, logger) {}
|
||||
}
|
||||
|
||||
// public class SequentialChunkDownloader<T> : IAsyncEnumerator<Fragment<Ordered<T>>> {
|
||||
// public Fragment<Ordered<T>> Current { get; protected set; }
|
||||
// public DownloadContext<T> Context { get; }
|
||||
// protected IEnumerator<DocumentSourceLink> LinksEnumerator;
|
||||
// protected ConcurrentQueue<Fragment<Ordered<T>>> DownloadQueue { get; set; } = [];
|
||||
// public int ChunkSize { get; }
|
||||
|
||||
// private ILogger? Logger => Context.DownloadLogger;
|
||||
|
||||
// public UnitDownloader<T> GetUnitDownloader()
|
||||
// => new(Context.Web, Context.AsyncTranformer, Context.AsyncFailurePredicates);
|
||||
|
||||
// public SequentialChunkDownloader(DownloadContext<T> context, int chunkSize) {
|
||||
// Context = context;
|
||||
// LinksEnumerator = Context.Links.GetEnumerator();
|
||||
// Current = new Fragment<Ordered<T>>(0);
|
||||
// ChunkSize = chunkSize;
|
||||
// }
|
||||
|
||||
// public ValueTask DisposeAsync() {
|
||||
// GC.SuppressFinalize(this);
|
||||
// return ValueTask.CompletedTask;
|
||||
// }
|
||||
|
||||
// protected Task<bool>? DownloadsTask = null;
|
||||
// protected virtual async Task<bool> ProcessDownloads() {
|
||||
// if (DownloadQueue.IsEmpty)
|
||||
// return true;
|
||||
// if (DownloadsTask is null) {
|
||||
// DownloadsTask = Task.Run(async () => {
|
||||
// if (!DownloadQueue.TryDequeue(out var fragment))
|
||||
// return true; // no fragments left, likely race condition but return true as technically all items have been downloaded
|
||||
// var unit = GetUnitDownloader(); // instantiates unit downloader per request (okay)
|
||||
// if (!Fragment<Ordered<T>>.TryAcquireUpdater(fragment, out var updater)) { // gets the add method for the current fragment
|
||||
// Logger?.LogError("Failed to acquire updater for fragment {{{}}}", fragment.GetHashCode());
|
||||
// return false; // fragment is unsafe to modify
|
||||
// }
|
||||
// try {
|
||||
// var links = Enumerable.Range(0, ChunkSize).Select((x) => {
|
||||
// if (!LinksEnumerator.MoveNext())
|
||||
// return new Ordered<DocumentSourceLink>(DocumentSourceLink.InvalidLink, -1); // stops link collection if end-of-links is reached
|
||||
// return new Ordered<DocumentSourceLink>(LinksEnumerator.Current, x);
|
||||
// }).Where((x) => x.Data != DocumentSourceLink.InvalidLink); // filter invalid links
|
||||
// await Parallel.ForEachAsync(links, async (x, ct) => {
|
||||
// Logger?.LogInformation("Started download for {} order={}", x.Data.Link, x.Order);
|
||||
// var (result, downloadedT) = await unit.TryDownload( // download (parallel) objects
|
||||
// x.Data.Link.ToString(), // use link from links collection (exposed as x)
|
||||
// ct, // use ct provided with method call
|
||||
// tryProgress: Context.RetryReporter);
|
||||
// if (!result) { // download failure (soft because it was detected)
|
||||
// Logger?.LogError("Failed to retrieve {} order={}", x.Data.Link, x.Order);
|
||||
// return;
|
||||
// }
|
||||
// if (downloadedT is null) { // download failure (hard because it was not detected)
|
||||
// Logger?.LogCritical("Failed to retrieve {} order={}", x.Data.Link, x.Order);
|
||||
// return;
|
||||
// }
|
||||
// Logger?.LogInformation("Retrieved {} order={} successfully", x.Data.Link, x.Order);
|
||||
// updater(new Ordered<T>(downloadedT, x.Order)); // update the fragment
|
||||
// });
|
||||
// Fragment<Ordered<T>>.SetComplete(fragment, true);
|
||||
// } finally {
|
||||
// Fragment<Ordered<T>>.TryReleaseUpdater(fragment, updater); // returns updater to allow modification
|
||||
// }
|
||||
|
||||
|
||||
// return fragment.Size == fragment.MaxSize;
|
||||
// });
|
||||
// }
|
||||
// if (DownloadsTask.IsCompleted) {
|
||||
// DownloadsTask = null;
|
||||
// return await ProcessDownloads();
|
||||
// }
|
||||
// return true; // if task is still processing return should be neither true or false...
|
||||
// }
|
||||
|
||||
// public async ValueTask<bool> MoveNextAsync() {
|
||||
// if (Current.IsComplete && Current.Size < Current.MaxSize)
|
||||
// return false; // if a fragment is marked complete despite being unsaturated, we've run out links!
|
||||
// if (DownloadQueue.Count == 0) {
|
||||
// Current = new Fragment<Ordered<T>>(ChunkSize);
|
||||
// DownloadQueue.Enqueue(Current);
|
||||
// }
|
||||
|
||||
// return await ProcessDownloads();
|
||||
// }
|
||||
// }
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam {
|
||||
public class SequentialDownloader<TInput, TOutput> : IAsyncEnumerator<TOutput> {
|
||||
public TOutput Current { get; protected set; }
|
||||
public DownloadContext<TInput> Context { get; }
|
||||
public ILogger? Logger { get; set; }
|
||||
public int LastOrder { get; set; } = 0;
|
||||
|
||||
protected IEnumerator<DocumentSourceLink> LinksEnumerator;
|
||||
|
||||
public Func<IUnitDownloader<TOutput>> GetUnitDownloader { get; set; }
|
||||
|
||||
public SequentialDownloader(DownloadContext<TInput> context, Func<DownloadContext<TInput>, IUnitDownloader<TOutput>> getUnitDownloader, ILogger? logger = null) {
|
||||
Context = context;
|
||||
Logger = logger;
|
||||
LinksEnumerator = Context.Links.GetEnumerator();
|
||||
LinksEnumerator.Reset();
|
||||
if (!LinksEnumerator.MoveNext())
|
||||
throw new ArgumentOutOfRangeException(S.M.LinksCannotBeEmpty);
|
||||
Current = default(TOutput);
|
||||
GetUnitDownloader = () => getUnitDownloader(Context);
|
||||
}
|
||||
|
||||
public ValueTask DisposeAsync() {
|
||||
GC.SuppressFinalize(this);
|
||||
return ValueTask.CompletedTask;
|
||||
}
|
||||
|
||||
public async ValueTask<bool> MoveNextAsync() {
|
||||
var unit = GetUnitDownloader(); // safe to instantiate per request.
|
||||
var idealLinkCount = unit.LinksPerDownload;
|
||||
List<Ordered<string>> links = [];
|
||||
|
||||
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
|
||||
|
||||
while (LinksEnumerator.MoveNext() && links.Count < idealLinkCount)
|
||||
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
|
||||
if (links.Count == 0) {
|
||||
Logger?.LogInformation("Out of links!");
|
||||
return false;
|
||||
}
|
||||
|
||||
var (result, downloadedT) = await unit.TryDownload(
|
||||
links.ToArray(),
|
||||
Context.CancellationToken,
|
||||
tryProgress: Context.RetryReporter);
|
||||
|
||||
if (!result) {
|
||||
Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name);
|
||||
return false; // unit download failed
|
||||
}
|
||||
if (downloadedT is null) {
|
||||
Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name);
|
||||
return false; // unit download failed
|
||||
}
|
||||
|
||||
Current = downloadedT;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public class SourceLinkEnumerable : IEnumerable<DocumentSourceLink> {
|
||||
private SourceLinkEnumerable(IEnumerator<DocumentSourceLink> enumerator) {
|
||||
Enumerator = enumerator;
|
||||
}
|
||||
|
||||
public IEnumerator<DocumentSourceLink> Enumerator { get; }
|
||||
|
||||
public static SourceLinkEnumerable FromGenerator(IEnumerator<DocumentSourceLink> generator)
|
||||
=> new SourceLinkEnumerable(generator);
|
||||
|
||||
public IEnumerator<DocumentSourceLink> GetEnumerator() {
|
||||
return Enumerator;
|
||||
}
|
||||
|
||||
IEnumerator IEnumerable.GetEnumerator() {
|
||||
return Enumerator;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
using System.Text;
|
||||
|
||||
namespace Beam {
|
||||
internal class StreamDocument(string filename, Stream content, Encoding? encoding = null) : Document(filename) {
|
||||
public Stream Content { get; set; } = content;
|
||||
public Encoding Encoding { get; set; } = encoding ?? Encoding.UTF8;
|
||||
|
||||
byte[] Content_ { get; set; } = [];
|
||||
|
||||
public override byte[] ToBytes() {
|
||||
return Content_;
|
||||
}
|
||||
|
||||
public override string ToString() {
|
||||
return Encoding.GetString(Content_);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public class StringDocument(string filename, string content, Encoding? encoding = null) : Document(filename, encoding) {
|
||||
public string Content { get; set; } = content;
|
||||
|
||||
public override byte[] ToBytes() {
|
||||
return Encoding.GetBytes(Content);
|
||||
}
|
||||
|
||||
public override string ToString() {
|
||||
return Content;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam {
|
||||
public delegate Task<bool> AsyncDownloadFailurePredicate<in T>(T download);
|
||||
|
||||
/// <summary>
|
||||
/// A download managing class that manages a singular download with failure-detection and exponential-backoff retries. This class is safe to instantiate per request.
|
||||
/// </summary>
|
||||
/// <typeparam name="T"></typeparam>
|
||||
/// <param name="web"></param>
|
||||
/// <param name="transformer"></param>
|
||||
/// <param name="failurePredicate"></param>
|
||||
public class UnitDownloader<T>(HtmlWeb web, AsyncHtmlTransformer<T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null) : IUnitDownloader<T> {
|
||||
public HtmlWeb Web { get; } = web;
|
||||
public virtual AsyncHtmlTransformer<T> Transformer { get; } = transformer;
|
||||
public virtual AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicates { get; } = failurePredicate;
|
||||
|
||||
public int LinksPerDownload { get; } = 1;
|
||||
|
||||
protected virtual async Task<bool> IsFailure(HtmlDocument doc) {
|
||||
if (FailurePredicates is null)
|
||||
return false;
|
||||
var failed = false;
|
||||
await Parallel.ForEachAsync(FailurePredicates, async (x, ct) => {
|
||||
if (failed == true)
|
||||
return;
|
||||
if (x is null)
|
||||
return;
|
||||
if (await x(doc))
|
||||
failed = true;
|
||||
});
|
||||
|
||||
return failed;
|
||||
}
|
||||
|
||||
protected virtual async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
||||
try {
|
||||
var html = await Web.LoadFromWebAsync(link, ct);
|
||||
if (FailurePredicates is null || !(await IsFailure(html)))
|
||||
return (true, await Transformer(html));
|
||||
else
|
||||
return (false, default);
|
||||
} catch(Exception) {
|
||||
return (false, default);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<(bool, T?)> TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress<int>? tryProgress = null) {
|
||||
if (link.Length == 0)
|
||||
return (false, default);
|
||||
|
||||
T? doc = default;
|
||||
int tryCount = 0;
|
||||
while (tryCount < maximumRetryCount) {
|
||||
ct.ThrowIfCancellationRequested();
|
||||
(var success, doc) = await TryDownloadWithNoRetries(link[0].Data, ct);
|
||||
if (success && doc != null)
|
||||
return (true, doc);
|
||||
tryProgress?.Report(++tryCount);
|
||||
await Task.Delay((int)Math.Pow(2, tryCount) * 1000);
|
||||
}
|
||||
|
||||
return (false, doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public class UnitFragmentDownloader<T> : IUnitDownloader<Fragment<Ordered<T>>> {
|
||||
public UnitFragmentDownloader(HtmlWeb web,
|
||||
AsyncHtmlTransformer<T> transformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null,
|
||||
int fragmentSize = 4,
|
||||
ILogger? logger = null) {
|
||||
Web = web;
|
||||
Transformer = transformer;
|
||||
FailurePredicate = failurePredicate;
|
||||
UnitDownloader = new UnitDownloader<T>(Web, Transformer, FailurePredicate);
|
||||
LinksPerDownload = fragmentSize;
|
||||
Logger = logger;
|
||||
}
|
||||
|
||||
public HtmlWeb Web { get; }
|
||||
public AsyncHtmlTransformer<T> Transformer { get; }
|
||||
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicate { get; }
|
||||
public int LinksPerDownload { get; set; }
|
||||
public ILogger? Logger { get; set; }
|
||||
|
||||
private readonly UnitDownloader<T> UnitDownloader;
|
||||
|
||||
async Task<(bool, Fragment<Ordered<T>>?)> IUnitDownloader<Fragment<Ordered<T>>>.TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<int>? tryProgress) {
|
||||
Fragment<Ordered<T>> fragment = new Fragment<Ordered<T>>(link.Length);
|
||||
if (!Fragment<Ordered<T>>.TryAcquireUpdater(fragment, out var updater))
|
||||
throw new S.AssertionException(S.M.NewFragmentShouldBeFree);
|
||||
bool isFailure = false;
|
||||
await Parallel.ForEachAsync(link, async (x, pct) => {
|
||||
pct.ThrowIfCancellationRequested();
|
||||
ct.ThrowIfCancellationRequested();
|
||||
var (result, downloadedT) = await UnitDownloader.TryDownload([x], ct, maximumRetryCount, tryProgress);
|
||||
if (!result) {
|
||||
Interlocked.Exchange(ref isFailure, true);
|
||||
Logger?.LogError("Failed to retrieve {} order={}", x.Data, x.Order);
|
||||
return;
|
||||
}
|
||||
if (downloadedT == null) {
|
||||
Interlocked.Exchange(ref isFailure, true);
|
||||
Logger?.LogCritical("Failed to retrieve {} order={}", x.Data, x.Order);
|
||||
return;
|
||||
}
|
||||
updater(new Ordered<T>(downloadedT, x.Order));
|
||||
});
|
||||
|
||||
if (!isFailure)
|
||||
Fragment<Ordered<T>>.SetComplete(fragment, true);
|
||||
|
||||
Fragment<Ordered<T>>.TryReleaseUpdater(fragment, updater);
|
||||
|
||||
return (!isFailure, fragment);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user