Introduce Beam.Fluent and Beam.Models projects
Added new Beam.Fluent and Beam.Models projects with staged download builder and data context models. Refactored and moved model classes from Beam.Temporary.Cli to Beam.Models. Added new data providers and extended DataBindings in Beam.Dynamic. Renamed Beam.Puppeteer to Beam.Playwright and updated related classes. Updated project references and package versions. Removed obsolete and unused files from Beam.Temporary.Cli.
This commit is contained in:
@@ -14,25 +14,25 @@ namespace Beam.Temporary.Cli {
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public partial interface IArchitecture {
|
||||
/// <summary>
|
||||
/// Gets the metadata associated with a <see cref="TextResource"/>
|
||||
/// </summary>
|
||||
/// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||
/// <param name="pieceKey">The key of the <see cref="TextResource"/> stored in the <paramref name="sdd"/></param>
|
||||
/// <param name="sdd">The <see cref="BeamDataDictionary"/> to be used to retrieve information</param>
|
||||
/// <param name="logger">Optional logger for logging debug information</param>
|
||||
/// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null);
|
||||
/// <summary>
|
||||
/// Gets the <see cref="DownloadContext{T}"/> of the text record associated with <see cref="TextResource"/>
|
||||
/// </summary>
|
||||
/// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||
/// <param name="pieceKey">The key of the <see cref="TextResource"/> stored in the <paramref name="sdd"/></param>
|
||||
/// <param name="sdd">The <see cref="BeamDataDictionary"/> to be used to retrieve information</param>
|
||||
/// <param name="metadata">Optional book metadata to include with the final text record</param>
|
||||
/// <param name="logger">Optional logger for logging debug information</param>
|
||||
/// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> pieceKey, BeamDataDictionary sdd, IDocumentMetaData? metadata = null, CancellationToken ct = default, ILogger? logger = null);
|
||||
///// <summary>
|
||||
///// Gets the metadata associated with a <see cref="ResourceDictionary"/>
|
||||
///// </summary>
|
||||
///// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||
///// <param name="pieceKey">The key of the <see cref="ResourceDictionary"/> stored in the <paramref name="sdd"/></param>
|
||||
///// <param name="sdd">The <see cref="BeamDataDictionary"/> to be used to retrieve information</param>
|
||||
///// <param name="logger">Optional logger for logging debug information</param>
|
||||
///// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||
//public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<ResourceDictionary> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null);
|
||||
///// <summary>
|
||||
///// Gets the <see cref="DownloadContext{T}"/> of the text record associated with <see cref="ResourceDictionary"/>
|
||||
///// </summary>
|
||||
///// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||
///// <param name="pieceKey">The key of the <see cref="ResourceDictionary"/> stored in the <paramref name="sdd"/></param>
|
||||
///// <param name="sdd">The <see cref="BeamDataDictionary"/> to be used to retrieve information</param>
|
||||
///// <param name="metadata">Optional book metadata to include with the final text record</param>
|
||||
///// <param name="logger">Optional logger for logging debug information</param>
|
||||
///// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||
//public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<ResourceDictionary> pieceKey, BeamDataDictionary sdd, IDocumentMetaData? metadata = null, CancellationToken ct = default, ILogger? logger = null);
|
||||
|
||||
/// <summary>
|
||||
/// The <see cref="DataKey{IDocumentMetaData}"/> to use when looking for the chapter metadata
|
||||
|
||||
@@ -7,11 +7,15 @@
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1" />
|
||||
<Compile Include="HtmlBook.cs.obsolete" />
|
||||
<Compile Include="HtmlBookTemplates.cs.obsolete" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.1.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.7" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.7" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.7" />
|
||||
<PackageReference Include="OpenAI" Version="2.1.0" />
|
||||
<PackageReference Include="Spectre.Console" Version="0.49.2-preview.0.70" />
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||
@@ -23,7 +27,12 @@
|
||||
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Puppeteer\Beam.Puppeteer.csproj">
|
||||
<ProjectReference Include="..\Beam.Fluent\Beam.Fluent.csproj" />
|
||||
<ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
|
||||
<ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Stealth\Beam.Stealth.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
using aeqw89.PersistentData;
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class BeamDataDictionary : BaseDataDictionary {
|
||||
public Dictionary<DataKey<WebResource>, Template> Templates {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, Template>(nameof(Templates));
|
||||
set => Data[nameof(Templates)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, WebResource> Aggregators {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Aggregators));
|
||||
set => Data[nameof(Aggregators)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, WebResource> Auxillaries {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Auxillaries));
|
||||
set => Data[nameof(Auxillaries)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<DataBindings>, DataBindings> Bindings {
|
||||
get => GetOrCreateDictionary<DataKey<DataBindings>, DataBindings>(nameof(Bindings));
|
||||
set => Data[nameof(Bindings)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, HashSet<DataKey<TextResource>>> AggregatorNovels {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, HashSet<DataKey<TextResource>>>(nameof(AggregatorNovels));
|
||||
set => Data[nameof(AggregatorNovels)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<TextResource>, TextResource> Novels {
|
||||
get => GetOrCreateDictionary<DataKey<TextResource>, TextResource>(nameof(Novels));
|
||||
set => Data[nameof(Novels)] = value;
|
||||
}
|
||||
|
||||
internal Dictionary<DataKey<File>, File> Files {
|
||||
get => GetOrCreateDictionary<DataKey<File>, File>(nameof(Files));
|
||||
set => Data[nameof(Files)] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,8 +9,8 @@ using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public static class CommonTransformers {
|
||||
public static AsyncTransformer<HtmlDocument, IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||
return Task.FromResult<IDocumentMetaData>(new ArticleData() {
|
||||
public static AsyncTransformer<HtmlDocument, ArticleData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||
return Task.FromResult(new ArticleData() {
|
||||
Authors = binding?.Authors?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.Get(x) ?? ""),
|
||||
Categories = binding?.Tags?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
|
||||
@@ -18,7 +18,18 @@ namespace Beam.Temporary.Cli {
|
||||
});
|
||||
};
|
||||
|
||||
public static AsyncTransformer<HtmlDocument, IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
|
||||
public static AsyncTransformer<HtmlDocument, TableOfContentsData> TableOfContentsTransformer(DataBindings? binding) => (x) => {
|
||||
return Task.FromResult(new TableOfContentsData() {
|
||||
Authors = binding?.Authors?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.Get(x) ?? ""),
|
||||
Categories = binding?.Tags?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
|
||||
Description = OnlineCleaner.Clean(binding?.Description?.Get(x) ?? ""),
|
||||
ContentLinks = binding?.TableOfContents?.Get(x) ?? [],
|
||||
PagesLinks = binding?.PagesDropDown?.Get(x) ?? []
|
||||
});
|
||||
};
|
||||
|
||||
public static AsyncTransformer<HtmlDocument, StringDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
|
||||
var resolved = binding?.Resolve(x);
|
||||
var articleData = new ArticleData() {
|
||||
Name = OnlineCleaner.Clean(resolved?.Title),
|
||||
@@ -27,7 +38,7 @@ namespace Beam.Temporary.Cli {
|
||||
meta.Add(IArchitecture.Default.ChapterKey, articleData);
|
||||
if (metaData is not null)
|
||||
meta.Add(IArchitecture.Default.BookKey, metaData);
|
||||
return Task.FromResult<IDocument>(new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
|
||||
return Task.FromResult(new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
|
||||
MetaData = meta
|
||||
});
|
||||
};
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class CssData {
|
||||
// Primary background color (e.g., for the body)
|
||||
public string PrimaryColor { get; set; } = "#f5f5f5";
|
||||
|
||||
// Secondary color (e.g., for header background)
|
||||
public string SecondaryColor { get; set; } = "#e0e0e0";
|
||||
|
||||
// Tertiary color (e.g., for content sections)
|
||||
public string TertiaryColor { get; set; } = "#ffffff";
|
||||
|
||||
// Button background color
|
||||
public string ButtonColor { get; set; } = "#007bff";
|
||||
|
||||
// Foreground text color
|
||||
public string ForegroundColor { get; set; } = "#333333";
|
||||
|
||||
// Font family for main content
|
||||
public string ContentFont { get; set; } = "Arial, sans-serif";
|
||||
|
||||
// Font size for main content
|
||||
public string ContentFontSize { get; set; } = "16px";
|
||||
|
||||
// Font family for titles
|
||||
public string TitleFont { get; set; } = "Georgia, serif";
|
||||
|
||||
// Font size for titles
|
||||
public string TitleFontSize { get; set; } = "32px";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
//namespace Beam.Temporary.Cli {
|
||||
// public class CssData {
|
||||
// // Primary background color (e.g., for the body)
|
||||
// public string PrimaryColor { get; set; } = "#f5f5f5";
|
||||
|
||||
// // Secondary color (e.g., for header background)
|
||||
// public string SecondaryColor { get; set; } = "#e0e0e0";
|
||||
|
||||
// // Tertiary color (e.g., for content sections)
|
||||
// public string TertiaryColor { get; set; } = "#ffffff";
|
||||
|
||||
// // Button background color
|
||||
// public string ButtonColor { get; set; } = "#007bff";
|
||||
|
||||
// // Foreground text color
|
||||
// public string ForegroundColor { get; set; } = "#333333";
|
||||
|
||||
// // Font family for main content
|
||||
// public string ContentFont { get; set; } = "Arial, sans-serif";
|
||||
|
||||
// // Font size for main content
|
||||
// public string ContentFontSize { get; set; } = "16px";
|
||||
|
||||
// // Font family for titles
|
||||
// public string TitleFont { get; set; } = "Georgia, serif";
|
||||
|
||||
// // Font size for titles
|
||||
// public string TitleFontSize { get; set; } = "32px";
|
||||
// }
|
||||
//}
|
||||
@@ -1,34 +0,0 @@
|
||||
|
||||
|
||||
using aeqw89.DataKeys;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal static class DataKeyExtensions {
|
||||
public static DataKey WithNamespace(this DataKey dk, string @namespace) {
|
||||
string[] names = @namespace.Split(':');
|
||||
var agg = (string x, string y) => $"{x}:{y}";
|
||||
for (int i = 0; i < names.Length; i++) {
|
||||
string test = names.SkipLast(i).Aggregate(agg);
|
||||
if (dk.Identifier.StartsWith(test)) {
|
||||
return new DataKey(dk.Identifier.Replace(test, @namespace));
|
||||
}
|
||||
}
|
||||
|
||||
return new DataKey(@namespace + ":" + dk.Identifier);
|
||||
}
|
||||
|
||||
public static DataKey<T> WithNamespace<T>(this DataKey<T> dk, string @namespace) {
|
||||
return ((DataKey)dk).WithNamespace(@namespace).As<T>();
|
||||
}
|
||||
|
||||
public static DataKey<T> WithSuffix<T>(this DataKey<T> dk, string suffix) {
|
||||
return new DataKey<T>(dk.Identifier + suffix);
|
||||
}
|
||||
|
||||
public static DataKey ToAggregator(this DataKey dk)
|
||||
=> dk.WithNamespace("aeqw89:document:aggregators");
|
||||
public static DataKey ToAuxiliary(this DataKey dk)
|
||||
=> dk.WithNamespace("aeqw89:document:auxillaries");
|
||||
public static DataKey<T> As<T>(this DataKey dk) => new DataKey<T>(dk.Identifier);
|
||||
}
|
||||
}
|
||||
@@ -1,284 +0,0 @@
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using Beam;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using HtmlAgilityPack;
|
||||
using Beam.Puppeteer;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
/// <summary>
|
||||
/// Type‑safe, staged builder that prevents callers from forgetting the mandatory steps
|
||||
/// (source → link selection → transformer) and surfaces operational knobs as first‑class
|
||||
/// methods instead of magic parameters.
|
||||
/// </summary>
|
||||
public static class DownloadBuilder<RawType, OutType> {
|
||||
/* ──────────────────────────── Entry points ─────────────────────────── */
|
||||
|
||||
public static ILinkStage FromMeta(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
|
||||
Create(novelKey, data, SourceKind.Meta);
|
||||
|
||||
public static ILinkStage FromText(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
|
||||
Create(novelKey, data, SourceKind.Text);
|
||||
|
||||
public static IAlternativeLinkStage FromScratch()
|
||||
=> new LinkStage(null!, null!, null!, new());
|
||||
|
||||
/* ────────────────────────────── Stages ─────────────────────────────── */
|
||||
|
||||
public interface ILinkStage {
|
||||
ITransformStage WithLink();
|
||||
ITransformStage WithLinkGenerator();
|
||||
ILinkStage WithRange(Range range);
|
||||
}
|
||||
|
||||
public interface IAlternativeLinkStage {
|
||||
IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links);
|
||||
}
|
||||
|
||||
public interface ITransformStage {
|
||||
IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory);
|
||||
}
|
||||
|
||||
public interface IAlternativeTransformStage {
|
||||
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
|
||||
IContextStage WithTransformer(Func<RawType, OutType> transformer) {
|
||||
return WithTransformer(rt => Task.FromResult(transformer(rt)));
|
||||
}
|
||||
}
|
||||
|
||||
public interface IContextStage {
|
||||
IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure);
|
||||
IContextStage WithParallelism(int degree);
|
||||
IContextStage WithTimeout(TimeSpan timeout);
|
||||
IContextStage WithRetryReporter(IProgress<RetryReport> reporter);
|
||||
DownloadEnumerable<OutType> Build();
|
||||
IContextStage UseFragments();
|
||||
IContextStage UsePuppet(AsyncManipulator manipulator);
|
||||
}
|
||||
|
||||
/* ────────────────────────── Implementation ────────────────────────── */
|
||||
|
||||
private enum SourceKind { Meta, Text }
|
||||
|
||||
private static ILinkStage Create(DataKey<TextResource> novelKey, BeamDataDictionary data, SourceKind kind) {
|
||||
var (source, initial) = Resolve(novelKey, data, kind);
|
||||
var ctxBuilder = new DownloadContextBuilder<RawType>().WithLinks(Array.Empty<SourceLink>()); // placeholder, filled later.
|
||||
return new LinkStage(source, initial, data, ctxBuilder);
|
||||
}
|
||||
|
||||
private static (WebResource Source, State Initial) Resolve(DataKey<TextResource> novelKey, BeamDataDictionary data, SourceKind kind) {
|
||||
if (!data.Novels.TryGetValue(novelKey, out var tr))
|
||||
throw new KeyNotFoundException($"Novel '{novelKey}' not found in BeamDataDictionary.");
|
||||
|
||||
var textRecord = tr.ToRecord(data);
|
||||
WebResource? source;
|
||||
State? initial;
|
||||
|
||||
if (kind == SourceKind.Meta) {
|
||||
source = textRecord.AssociatedMetaSource ?? throw new InvalidOperationException($"Meta source missing for '{novelKey}'.");
|
||||
initial = textRecord.Resource.MetaTemplateInitialData ?? throw new InvalidOperationException("Meta template data missing.");
|
||||
} else {
|
||||
source = textRecord.AssociatedSource ?? throw new InvalidOperationException($"Text source missing for '{novelKey}'.");
|
||||
initial = textRecord.Resource.TemplateInitialData;
|
||||
}
|
||||
|
||||
return (source, initial);
|
||||
}
|
||||
|
||||
/* ──────────────────────────── Stage types ─────────────────────────── */
|
||||
|
||||
|
||||
private sealed record LinkStage(
|
||||
WebResource Source,
|
||||
State Initial,
|
||||
BeamDataDictionary Data,
|
||||
DownloadContextBuilder<RawType> CtxBuilder) : ILinkStage, IAlternativeLinkStage {
|
||||
|
||||
private State? endState;
|
||||
private bool linksFrozen = false;
|
||||
|
||||
public ITransformStage WithLink() {
|
||||
var link = Data.Templates[Source.Key].Builder.Build(Initial);
|
||||
CtxBuilder.WithLinks(new[] { link });
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public ITransformStage WithLinkGenerator() {
|
||||
var template = Data.Templates[Source.Key];
|
||||
var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
||||
template.Builder,
|
||||
new NumberedStateChanger(template.Factory.Behavior),
|
||||
Initial, endState));
|
||||
CtxBuilder.WithLinks(generator);
|
||||
linksFrozen = true;
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links) {
|
||||
CtxBuilder.WithLinks(links);
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public ILinkStage WithRange(Range range) {
|
||||
if (linksFrozen)
|
||||
throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
|
||||
if (range.End.Value < range.Start.Value)
|
||||
throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end");
|
||||
var template = Data.Templates[Source.Key];
|
||||
var stateChanger = new NumberedStateChanger(template.Factory.Behavior);
|
||||
endState = Initial.Copy();
|
||||
stateChanger.Apply(Initial, range.Start.Value - 1);
|
||||
stateChanger.Apply(endState, range.End.Value - 1);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
private sealed record TransformStage(
|
||||
WebResource Source,
|
||||
BeamDataDictionary Data,
|
||||
DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage, IAlternativeTransformStage {
|
||||
public IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory) {
|
||||
var transformer = factory(Data.Bindings[Source.Bindings]);
|
||||
return new ContextStage(CtxBuilder, transformer);
|
||||
}
|
||||
|
||||
public IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
|
||||
return new ContextStage(CtxBuilder, transformer);
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class ContextStage : IContextStage {
|
||||
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
|
||||
private readonly AsyncTransformer<RawType, OutType> _transformer;
|
||||
private int _parallelism = 4;
|
||||
private bool _useFragments = false;
|
||||
private AsyncManipulator? _useManipulator = null;
|
||||
|
||||
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
|
||||
_ctxBuilder = ctxBuilder;
|
||||
_transformer = transformer;
|
||||
}
|
||||
|
||||
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
|
||||
configure(_ctxBuilder);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage WithParallelism(int degree) {
|
||||
_parallelism = Math.Max(1, degree);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage WithTimeout(TimeSpan timeout) {
|
||||
_ctxBuilder.WithTimeOut(timeout);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage WithRetryReporter(IProgress<RetryReport> reporter) {
|
||||
_ctxBuilder.WithRetryReporter(reporter);
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePuppet(AsyncManipulator)"/>
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public IContextStage UseFragments() {
|
||||
if (_useManipulator is not null)
|
||||
_useManipulator = null;
|
||||
|
||||
_useFragments = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Use a puppet browser to download the links. This strategy is mutually exclusive with <see cref="UseFragments"/>
|
||||
/// </summary>
|
||||
/// <param name="manipulator">The page manipulator</param>
|
||||
/// <returns></returns>
|
||||
public IContextStage UsePuppet(AsyncManipulator manipulator) {
|
||||
if (_useFragments)
|
||||
_useFragments = false;
|
||||
|
||||
_useManipulator = manipulator;
|
||||
return this;
|
||||
}
|
||||
|
||||
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
|
||||
return (_useFragments, _useManipulator, _transformer, context.AsyncFailurePredicates) switch {
|
||||
// ──────────────── fragmented HTML ────────────────
|
||||
(true, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new UnitFragmentDownloader<OutType>(
|
||||
context.Web,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates,
|
||||
_parallelism,
|
||||
context.DownloadLogger),
|
||||
// ──────────────── fragmented binary ────────────────
|
||||
(true, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new UnitFragmentDownloaderBinary<OutType>(
|
||||
context.Client,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates,
|
||||
_parallelism,
|
||||
context.DownloadLogger),
|
||||
// ──────────────── single HTML ────────────────
|
||||
(false, null, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new UnitDownloader<OutType>(
|
||||
context.Web,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates),
|
||||
// ──────────────── single binary ────────────────
|
||||
(false, null, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new UnitDownloaderBinary<OutType>(
|
||||
context.Client,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates),
|
||||
// ──────────────── single puppet binary ────────────────
|
||||
(false, AsyncManipulator manipulator, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new PuppetUnitDownloader<OutType>(
|
||||
context.Client,
|
||||
manipulator,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates),
|
||||
// ──────────────── single puppet HTML ────────────────
|
||||
(false, AsyncManipulator manipulator, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new PuppetUnitPageDownloader<OutType>(
|
||||
context.Web,
|
||||
manipulator,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates),
|
||||
_ => throw new Exception($"Unsupported transformer / failure-predicate combination. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
|
||||
};
|
||||
}
|
||||
|
||||
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
|
||||
var copyOfContext = context.CreateBuilder().Build();
|
||||
return _useFragments switch {
|
||||
true => new SequentialFragmentDownloader<RawType, OutType>(
|
||||
copyOfContext,
|
||||
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
|
||||
context.DownloadLogger).UnwrapFragmented(),
|
||||
false => new SequentialDownloader<RawType, OutType>(
|
||||
copyOfContext,
|
||||
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
|
||||
context.DownloadLogger).WrapOrdered()
|
||||
};
|
||||
}
|
||||
|
||||
public DownloadEnumerable<OutType> Build() {
|
||||
var context = _ctxBuilder.Build();
|
||||
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
|
||||
return enumerable;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal class File(string path, params string[] tags) {
|
||||
public string Path { get; set; } = path;
|
||||
public string[] Tags { get; set; } = tags;
|
||||
}
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
using aeqw89.DataKeys;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal class LinkCollection(DataKey<string> key, List<SourceLink> links) {
|
||||
public DataKey<string> Key { get; set; } = key;
|
||||
public List<SourceLink> Links { get; set; } = links;
|
||||
}
|
||||
}
|
||||
@@ -11,49 +11,49 @@ namespace Beam.Temporary.Cli {
|
||||
public DataKey<IDocumentMetaData> ChapterKey { get; set; } = new("ma:chapter");
|
||||
public DataKey<IDocumentMetaData> BookKey { get; set; } = new("ma:book");
|
||||
|
||||
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) {
|
||||
var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
|
||||
var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
|
||||
//public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<ResourceDictionary> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) {
|
||||
// var piece = sdd.ResourceDictionaries[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
|
||||
// var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
|
||||
|
||||
// null checks
|
||||
if (auxiliary is null) // aux is required to get metadata
|
||||
return null;
|
||||
if (piece?.Resource?.MetaTemplateInitialData is null) // sanity check to avoid null warnings
|
||||
return null;
|
||||
// // null checks
|
||||
// if (auxiliary is null) // aux is required to get metadata
|
||||
// return null;
|
||||
// if (piece?.Resource?.MetaTemplateInitialData is null) // sanity check to avoid null warnings
|
||||
// return null;
|
||||
|
||||
// gets the link for the novel's metadata using the auxillary data retrieved from the sdd
|
||||
var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData);
|
||||
var binding = auxiliary.Bindings;
|
||||
// // gets the link for the novel's metadata using the auxillary data retrieved from the sdd
|
||||
// var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData);
|
||||
// var binding = auxiliary.Bindings;
|
||||
|
||||
return new DownloadContext<IDocumentMetaData>(web, new(), [link], downloadLogger: logger);
|
||||
}
|
||||
// return new DownloadContext<IDocumentMetaData>(web, new(), [link], downloadLogger: logger);
|
||||
//}
|
||||
|
||||
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) {
|
||||
var res = sdd.Novels[resKey].ToRecord(sdd); // retrieves the novel data from the sdd
|
||||
var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd
|
||||
//public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<ResourceDictionary> resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) {
|
||||
// var res = sdd.ResourceDictionaries[resKey].ToRecord(sdd); // retrieves the novel data from the sdd
|
||||
// var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd
|
||||
|
||||
if (aggregator is null) // ensure aggergator data was retrieved successfully
|
||||
return null;
|
||||
if (res is null) // ensure novel data was retrieved successfully
|
||||
return null;
|
||||
// if (aggregator is null) // ensure aggergator data was retrieved successfully
|
||||
// return null;
|
||||
// if (res is null) // ensure novel data was retrieved successfully
|
||||
// return null;
|
||||
|
||||
var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator
|
||||
// var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator
|
||||
|
||||
// creates a generative enumerable of type link from 'template'
|
||||
var sle = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
||||
template.Builder, new NumberedStateChanger(template.Factory.Behavior),
|
||||
res.Resource.TemplateInitialData));
|
||||
// // creates a generative enumerable of type link from 'template'
|
||||
// var sle = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
||||
// template.Builder, new NumberedStateChanger(template.Factory.Behavior),
|
||||
// res.Resource.TemplateInitialData));
|
||||
|
||||
return new DownloadContext<IDocument>(web, new(), sle,
|
||||
retryReporter: new Progress<RetryReport>((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")),
|
||||
//downloadReporter: new Progress<DownloadReport>((x) => Console.WriteLine($"Downloaded ({x})")),
|
||||
asyncFailurePredicates: [
|
||||
//(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
|
||||
],
|
||||
timeOut: TimeSpan.FromSeconds(15),
|
||||
downloadLogger: logger
|
||||
);
|
||||
}
|
||||
// return new DownloadContext<IDocument>(web, new(), sle,
|
||||
// retryReporter: new Progress<RetryReport>((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")),
|
||||
// //downloadReporter: new Progress<DownloadReport>((x) => Console.WriteLine($"Downloaded ({x})")),
|
||||
// asyncFailurePredicates: [
|
||||
// //(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
|
||||
// ],
|
||||
// timeOut: TimeSpan.FromSeconds(15),
|
||||
// downloadLogger: logger
|
||||
// );
|
||||
//}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.Immutable;
|
||||
using System.Collections.ObjectModel;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Beam.Models;
|
||||
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
|
||||
public record class ResourceDictionaryBuilder(string SiteKey) {
|
||||
private List<Func<WebResourceBuilder>> _builders;
|
||||
|
||||
|
||||
private record class WebResourceBuilder(string ResourceKey) {
|
||||
private Func<Template> _template;
|
||||
private Func<IReadOnlyDictionary<DataKey<DataBindings>, DataBindings>> _bindings;
|
||||
private string _name;
|
||||
private string _description;
|
||||
private Uri _domain;
|
||||
|
||||
}
|
||||
|
||||
private record class ResourceDictionaryRegistrar(
|
||||
string SiteKey,
|
||||
string FriendlyName,
|
||||
IEnumerable<WebResource> Resources,
|
||||
IReadOnlyDictionary<string, Template> Templates,
|
||||
IReadOnlyDictionary<string, DataBindings> Bindings) : IResourceDictionaryRegistrar {
|
||||
|
||||
private Dictionary<string, ImmutableState> _states;
|
||||
|
||||
public IResourceDictionaryRegistrar AddInitialState(string key, ImmutableState state) {
|
||||
_states[key] = state;
|
||||
return this;
|
||||
}
|
||||
|
||||
public void Register(BeamDataContext sdd) {
|
||||
foreach (var resource in Resources)
|
||||
sdd.Resources.TryAdd(resource.Key, resource);
|
||||
foreach (var template in Templates)
|
||||
sdd.Templates.TryAdd(new DataKey<WebResource>(template.Key), template.Value);
|
||||
foreach (var binding in Bindings)
|
||||
sdd.Bindings.TryAdd(new DataKey<DataBindings>(binding.Key), binding.Value);
|
||||
foreach (var state in _states)
|
||||
sdd.InitialStates.TryAdd(new DataKey<ImmutableState>(state.Key), state.Value);
|
||||
|
||||
sdd.ResourceDictionaries.TryAdd(new DataKey<ResourceDictionary>(SiteKey), new ResourceDictionary() {
|
||||
Key = new DataKey<ResourceDictionary>(SiteKey),
|
||||
FriendlyName = FriendlyName,
|
||||
InitialStates =
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public interface IResourceDictionaryRegistrar {
|
||||
public IResourceDictionaryRegistrar AddInitialState(string key, ImmutableState state);
|
||||
public void Register(BeamDataContext sdd);
|
||||
}
|
||||
|
||||
public interface IBindingsBuilder {
|
||||
public IBindingsBuilder AddBinding(DataBindings bindings);
|
||||
public IBindingsBuilder AddBinding(Action<DataBindings> configure);
|
||||
public IReadOnlyDictionary<DataKey<DataBindings>, DataBindings> Build();
|
||||
}
|
||||
|
||||
public interface IResourceDictionaryBuilder {
|
||||
public IResourceDictionaryBuilder AddResource(Func<ITemplateBuilderStage, IWebResourceBuilderStage> configure);
|
||||
public IResourceDictionaryBuilder WithResources(Func<ITemplateBuilderStage, IWebResourceBuilderStage>[] configure);
|
||||
public IResourceDictionaryBuilder WithFriendlyName(string friendlyName);
|
||||
public IResourceDictionaryRegistrar Then();
|
||||
}
|
||||
|
||||
public interface IWebResourceBuilderStage {
|
||||
public IWebResourceBuilderStage WithName(string name); // Stage 3
|
||||
public IWebResourceBuilderStage WithDescription(string description); // Stage 3
|
||||
public IWebResourceBuilderStage WithDomain(Uri domain); // Stage 3
|
||||
public WebResource Build();
|
||||
}
|
||||
|
||||
public interface IBindingBuilderStage {
|
||||
public IWebResourceBuilderStage WithBindings(Action<IBindingsBuilder> configure);
|
||||
public IWebResourceBuilderStage WithBindings(IReadOnlyDictionary<DataKey<DataBindings>, DataBindings> bindings);
|
||||
}
|
||||
|
||||
public interface ITemplateBuilderStage {
|
||||
public IBindingBuilderStage WithTemplate(Action<ITemplateBuilder> configure);
|
||||
public IBindingBuilderStage WithTemplate(Template template);
|
||||
}
|
||||
|
||||
public interface ITemplateBuilder {
|
||||
public ITemplateBuilder WithFactory(StateChangerFactory factory);
|
||||
public ITemplateBuilder WithUrlBuilder(SourceLinkBuilder builder);
|
||||
public ITemplateBuilder WithUrlBuilder(Action<SourceLinkBuilder> configure);
|
||||
public Template Build();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -8,6 +8,7 @@ using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
|
||||
@@ -43,98 +44,373 @@ namespace Beam.Temporary.Cli {
|
||||
// sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
|
||||
//}
|
||||
|
||||
public static void Define_WoDuShu_HouseOfHorrors(BeamDataDictionary sdd) {
|
||||
var (wdsAgg, wdsAux) = CreateKeyPair<WebResource>("aggregators", "auxillaries", "wodushu", "aeqw89:document");
|
||||
var novel = new TextResource() {
|
||||
Key = new DataKey<TextResource>("novels:house_of_horrors"),
|
||||
FriendlyName = "My House Of Horrors",
|
||||
AssociatedSource = wdsAgg,
|
||||
AssociatedMetaSource = wdsAux,
|
||||
TemplateInitialData = new ImmutableState(["24349", "2896325"]),
|
||||
MetaTemplateInitialData = new ImmutableState(["24349"])
|
||||
};
|
||||
//// -----------------------------------------------------------------------------
|
||||
//// Helper: same as in the WoDuShu file
|
||||
//private static (DataKey<T>, DataKey<T>) CreateKeyPair<T>(
|
||||
// string pref1, string pref2, string common, string @namespace) {
|
||||
// return (
|
||||
// new DataKey<T>($"{pref1}:{common}").WithNamespace(@namespace),
|
||||
// new DataKey<T>($"{pref2}:{common}").WithNamespace(@namespace)
|
||||
// );
|
||||
//}
|
||||
|
||||
sdd.Novels.TryAdd(novel.Key, novel);
|
||||
// -----------------------------------------------------------------------------
|
||||
// 1) Site-wide definition – YeBiQuge (m.yebiquge.com)
|
||||
public static void Define_YeBiQuge(BeamDataContext sdd) {
|
||||
// ---------- keys ----------
|
||||
var yb = new DataKey<WebResource>("aeqw89:yebiquge");
|
||||
|
||||
sdd.AggregatorNovels.TryAdd(wdsAgg, [novel.Key]);
|
||||
}
|
||||
var bindings = new DataKey<DataBindings>($"aeqw89:yebiquge:{nameof(IDocument).ToLower()}:bindings");
|
||||
var bindings_info = new DataKey<DataBindings>($"aeqw89:yebiquge:{nameof(ArticleData).ToLower()}:bindings");
|
||||
var bindings_toc = new DataKey<DataBindings>($"aeqw89:yebiquge:{nameof(TableOfContentsData).ToLower()}:bindings");
|
||||
|
||||
private static (DataKey<T>, DataKey<T>) CreateKeyPair<T>(string pref1, string pref2, string common, string @namespace) {
|
||||
return (
|
||||
new DataKey<T>(pref1 + ":" + common).WithNamespace(@namespace),
|
||||
new DataKey<T>(pref2 + ":" + common).WithNamespace(@namespace)
|
||||
);
|
||||
}
|
||||
|
||||
public static void Define_WoDuShu(BeamDataDictionary sdd) {
|
||||
var (wdsAgg, wdsAux) = CreateKeyPair<WebResource>("aggregators", "auxillaries", "wodushu", "aeqw89:document");
|
||||
var bindings = new DataKey<DataBindings>("aeqw89:bindings:wodushu");
|
||||
var aggregator = new WebResource(wdsAgg) {
|
||||
Name = "WoDuShu.com",
|
||||
Description = "A Chinese novel aggregator site",
|
||||
Domain = "https://wodushu.com",
|
||||
// ---------- web resources ----------
|
||||
var aggregator = new WebResource(yb.InsertEnd(nameof(IDocument).ToLower())) {
|
||||
Name = "YeBiQuge – Chapters",
|
||||
Description = "Chapter pages (mobile)",
|
||||
Domain = "https://m.yebiquge.com",
|
||||
Bindings = bindings
|
||||
};
|
||||
var auxiliary = new WebResource(wdsAux) {
|
||||
Name = "WoDuShu.com",
|
||||
Description = "A Chinese novel aggregator site",
|
||||
Domain = "https://wodushu.com",
|
||||
Bindings = bindings.WithSuffix("_aux")
|
||||
|
||||
var bookInfo = new WebResource(yb.InsertEnd(nameof(ArticleData).ToLower())) {
|
||||
Name = "YeBiQuge – Book Info",
|
||||
Description = "Book information / latest updates page",
|
||||
Domain = "https://m.yebiquge.com",
|
||||
Bindings = bindings_info
|
||||
};
|
||||
|
||||
sdd.Templates.TryAdd(wdsAgg, new() {
|
||||
var tocPage = new WebResource(yb.InsertEnd(nameof(TableOfContentsData).ToLower())) {
|
||||
Name = "YeBiQuge – TOC",
|
||||
Description = "Full chapter list (index*.html)",
|
||||
Domain = "https://m.yebiquge.com",
|
||||
Bindings = bindings_toc
|
||||
};
|
||||
|
||||
sdd.Resources.TryAdd(aggregator.Key, aggregator);
|
||||
sdd.Resources.TryAdd(bookInfo.Key, bookInfo);
|
||||
sdd.Resources.TryAdd(tocPage.Key, tocPage);
|
||||
|
||||
// ---------- URL templates ----------
|
||||
// 1-a) Chapter page /{catId}/{bookId}/{chapterId}.html
|
||||
sdd.Templates.TryAdd(aggregator.Key, new() {
|
||||
Factory = new(StateChangerFactory.LastAsNumber),
|
||||
Builder = new SourceLinkBuilder("www.wodushu.com")
|
||||
.WithSegments("read", "", "")
|
||||
.WithParameters(1, "")
|
||||
.WithParameters(2, (".html", Position.After))
|
||||
Builder = new SourceLinkBuilder("m.yebiquge.com")
|
||||
.WithSegments("", "", "") // /<cat>/<book>/<chap>
|
||||
.WithParameters(0, "")
|
||||
.WithParameters(1, "")
|
||||
.WithParameters(2, (".html", Position.After)) // chapId.html
|
||||
});
|
||||
sdd.Templates.TryAdd(wdsAux, new() {
|
||||
|
||||
// 1-b) Book-info page /{catId}/{bookId}/
|
||||
sdd.Templates.TryAdd(bookInfo.Key, new() {
|
||||
Factory = new(StateChangerFactory.Constant),
|
||||
Builder = new SourceLinkBuilder("www.wodushu.com")
|
||||
.WithSegments("book", "")
|
||||
.WithParameters(1, "")
|
||||
Builder = new SourceLinkBuilder("m.yebiquge.com")
|
||||
.WithSegments("", "") // /<cat>/<book>/
|
||||
.WithParameters(0, "")
|
||||
.WithParameters(1, "")
|
||||
});
|
||||
|
||||
sdd.Aggregators.TryAdd(wdsAgg, aggregator);
|
||||
sdd.Auxillaries.TryAdd(wdsAux, auxiliary);
|
||||
// 1-c) TOC page /{catId}/{bookId}/index.html (first page)
|
||||
sdd.Templates.TryAdd(tocPage.Key, new() {
|
||||
Factory = new(StateChangerFactory.Constant),
|
||||
Builder = new SourceLinkBuilder("m.yebiquge.com")
|
||||
.WithSegments("", "", "index.html") // /<cat>/<book>/index.html
|
||||
.WithParameters(0, "")
|
||||
.WithParameters(1, "")
|
||||
});
|
||||
|
||||
var binding_agg = new DataKey<DataBindings>("aeqw89:bindings:wodushu");
|
||||
var binding_aux = new DataKey<DataBindings>("aeqw89:bindings:wodushu_aux");
|
||||
|
||||
sdd.Bindings.Add(binding_agg, new() {
|
||||
Title = new ContentsDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "/html/body/div[4]/div/div/div[2]/h1"
|
||||
}
|
||||
// ---------- bindings ----------
|
||||
// ── chapter page ────────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings, new() {
|
||||
Title = new ContentsDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='nr_title']" }
|
||||
},
|
||||
|
||||
Content = new ParagraphedContentDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "//*[@id=\"content\"]"
|
||||
}
|
||||
Content = new ParagraphedContentDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='chaptercontent']" }
|
||||
},
|
||||
});
|
||||
|
||||
sdd.Bindings.Add(binding_aux, new() {
|
||||
Title = new ContentsDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/h1"
|
||||
// ── book-info page ──────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings_info, new() {
|
||||
Title = new ContentsDataProvider {
|
||||
Content = new Binding { XPath = "//div[@class='book_info']//dt[@class='name']" }
|
||||
},
|
||||
Authors = new ContentsArrayDataProvider {
|
||||
Content = new Binding {
|
||||
XPath = "//div[@class='book_info']//span[contains(text(),'作者')]"
|
||||
}
|
||||
},
|
||||
Authors = new ContentsArrayDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/div/p[1]/a"
|
||||
}
|
||||
Description = new ParagraphedContentDataProvider {
|
||||
Content = new Binding { XPath = "//div[@class='book_about']/dl/dd" }
|
||||
}
|
||||
});
|
||||
|
||||
// ── TOC page ────────────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings_toc, new() {
|
||||
PagesDropDown = new DropDownDataProvider {
|
||||
Content = new Binding { XPath = "//div[@class='fenye']//select" },
|
||||
RelativeTo = tocPage.Domain
|
||||
},
|
||||
Description = new ParagraphedContentDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[2]"
|
||||
}
|
||||
TableOfContents = new AnchorCollectionDataProvider {
|
||||
Content = new Binding { XPath = "//div[@class='book_last']/dl" },
|
||||
RelativeTo = tocPage.Domain
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 2) Concrete novel – 《诡秘之主》 / Lord of the Mysteries
|
||||
public static void Define_YeBiQuge_LordOfMysteries(BeamDataContext sdd) {
|
||||
var yb = new DataKey<WebResource>("aeqw89:yebiquge:novels:lord_of_the_mysteries");
|
||||
var ybAgg = yb.InsertEnd(nameof(IDocument).ToLower());
|
||||
var ybInfo = yb.InsertEnd(nameof(ArticleData).ToLower());
|
||||
var ybToc = yb.InsertEnd(nameof(TableOfContentsData).ToLower());
|
||||
|
||||
var novel = new ResourceDictionary {
|
||||
Key = yb.To<ResourceDictionary>(),
|
||||
FriendlyName = "Lord of the Mysteries",
|
||||
Resources = {
|
||||
{ nameof(IDocument) , ybAgg }, // chapters
|
||||
{ nameof(ArticleData) , ybInfo }, // book info
|
||||
{ nameof(TableOfContentsData), ybToc } // full TOC
|
||||
},
|
||||
|
||||
// catId = 2 , bookId = 2958 , sample chapterId = 8699808
|
||||
InitialStates = new Dictionary<DataKey<WebResource>, ImmutableState> {
|
||||
{ ybAgg, new ImmutableState(["2","2958","8699808"]) },
|
||||
{ ybInfo, new ImmutableState(["2","2958"]) },
|
||||
{ ybToc, new ImmutableState(["2","2958"]) },
|
||||
}
|
||||
};
|
||||
|
||||
sdd.ResourceDictionaries.TryAdd(novel.Key, novel);
|
||||
sdd.AggregatorNovels.TryAdd(ybAgg, [novel.Key]);
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 1) Site-wide definition – KuaiShu5 (www.kuaishu5.com)
|
||||
public static void Define_KuaiShu5(BeamDataContext sdd) {
|
||||
// ---------- keys ----------
|
||||
var ks = new DataKey<WebResource>("aeqw89:kuaishu5");
|
||||
|
||||
var bindings_chapter = new DataKey<DataBindings>($"aeqw89:kuaishu5:{nameof(IDocument).ToLower()}:bindings");
|
||||
var bindings_info = new DataKey<DataBindings>($"aeqw89:kuaishu5:{nameof(ArticleData).ToLower()}:bindings");
|
||||
var bindings_toc = new DataKey<DataBindings>($"aeqw89:kuaishu5:{nameof(TableOfContentsData).ToLower()}:bindings");
|
||||
|
||||
// ---------- web resources ----------
|
||||
var chapters = new WebResource(ks.InsertEnd(nameof(IDocument).ToLower())) {
|
||||
Name = "KuaiShu5 – Chapters",
|
||||
Description = "Chapter pages",
|
||||
Domain = "https://www.kuaishu5.com",
|
||||
Bindings = bindings_chapter
|
||||
};
|
||||
|
||||
var bookInfo = new WebResource(ks.InsertEnd(nameof(ArticleData).ToLower())) {
|
||||
Name = "KuaiShu5 – Book Info",
|
||||
Description = "Book information / landing page",
|
||||
Domain = "https://www.kuaishu5.com",
|
||||
Bindings = bindings_info
|
||||
};
|
||||
|
||||
var tocPage = new WebResource(ks.InsertEnd(nameof(TableOfContentsData).ToLower())) {
|
||||
Name = "KuaiShu5 – TOC",
|
||||
Description = "Full chapter list (index page)",
|
||||
Domain = "https://www.kuaishu5.com",
|
||||
Bindings = bindings_toc
|
||||
};
|
||||
|
||||
sdd.Resources.TryAdd(chapters.Key, chapters);
|
||||
sdd.Resources.TryAdd(bookInfo.Key, bookInfo);
|
||||
sdd.Resources.TryAdd(tocPage.Key, tocPage);
|
||||
|
||||
// ---------- URL templates ----------
|
||||
// 1-a) Chapter page /b{bookId}/{chapterId}.html
|
||||
sdd.Templates.TryAdd(chapters.Key, new() {
|
||||
Factory = new(StateChangerFactory.LastAsNumber),
|
||||
Builder = new SourceLinkBuilder("www.kuaishu5.com")
|
||||
.WithSegments("", "") // /<seg0>/<seg1>
|
||||
.WithParameters(0, ("b", Position.Before)) // seg0: b{bookId}
|
||||
.WithParameters(1, (".html", Position.After)) // seg1: {chapterId}.html
|
||||
});
|
||||
|
||||
// 1-b) Book-info page /b{bookId}/
|
||||
sdd.Templates.TryAdd(bookInfo.Key, new() {
|
||||
Factory = new(StateChangerFactory.Constant),
|
||||
Builder = new SourceLinkBuilder("www.kuaishu5.com")
|
||||
.WithSegments("") // /<seg0>
|
||||
.WithParameters(0, ("b", Position.Before)) // seg0: b{bookId}
|
||||
});
|
||||
|
||||
// 1-c) TOC page /b{bookId}/ (same as book-info)
|
||||
sdd.Templates.TryAdd(tocPage.Key, new() {
|
||||
Factory = new(StateChangerFactory.Constant),
|
||||
Builder = new SourceLinkBuilder("www.kuaishu5.com")
|
||||
.WithSegments("") // /<seg0>
|
||||
.WithParameters(0, ("b", Position.Before)) // seg0: b{bookId}
|
||||
});
|
||||
|
||||
// ---------- bindings ----------
|
||||
// ── chapter page ────────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings_chapter, new() {
|
||||
Title = new ContentsDataProvider {
|
||||
Content = new Binding { XPath = "//h1[@class='bookname']" }
|
||||
},
|
||||
Content = new ParagraphedContentDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='booktxt']" }
|
||||
}
|
||||
});
|
||||
|
||||
// ── book-info page ──────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings_info, new() {
|
||||
Title = new ContentsDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='info']/h1" }
|
||||
},
|
||||
Authors = new ContentsArrayDataProvider {
|
||||
Content = new Binding {
|
||||
XPath = "//*[@id='info']//p[contains(text(),'作者')]/a"
|
||||
}
|
||||
},
|
||||
Description = new ParagraphedContentDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='intro']" }
|
||||
}
|
||||
});
|
||||
|
||||
// ── TOC page ────────────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings_toc, new() {
|
||||
PagesDropDown = new DropDownDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='indexselect']" },
|
||||
RelativeTo = tocPage.Domain
|
||||
},
|
||||
TableOfContents = new AnchorCollectionDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='content_1']" },
|
||||
RelativeTo = tocPage.Domain
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 2) Concrete novel – 《诡秘之主》 / Lord of the Mysteries
|
||||
public static void Define_KuaiShu5_LordOfMysteries(BeamDataContext sdd) {
|
||||
var ks = new DataKey<WebResource>("aeqw89:kuaishu5");
|
||||
var ksChapters = ks.InsertEnd(nameof(IDocument).ToLower());
|
||||
var ksInfo = ks.InsertEnd(nameof(ArticleData).ToLower());
|
||||
var ksToc = ks.InsertEnd(nameof(TableOfContentsData).ToLower());
|
||||
|
||||
var novel = new ResourceDictionary {
|
||||
Key = new DataKey<ResourceDictionary>("kuaishu5:novels:lord_of_the_mysteries"),
|
||||
FriendlyName = "Lord of the Mysteries",
|
||||
Resources =
|
||||
{
|
||||
{ nameof(IDocument) , ksChapters },
|
||||
{ nameof(ArticleData) , ksInfo },
|
||||
{ nameof(TableOfContentsData), ksToc }
|
||||
}
|
||||
};
|
||||
|
||||
// bookId = 122722 , sample chapterId = 288372
|
||||
sdd.InitialStates = new Dictionary<DataKey<ImmutableState>, ImmutableState>
|
||||
{
|
||||
{ ksChapters.To<ImmutableState>(), new ImmutableState(["122722", "288372"]) },
|
||||
{ ksInfo .To<ImmutableState>(), new ImmutableState(["122722"]) },
|
||||
{ ksToc .To<ImmutableState>(), new ImmutableState(["122722"]) }
|
||||
};
|
||||
|
||||
sdd.ResourceDictionaries.TryAdd(novel.Key, novel);
|
||||
sdd.AggregatorNovels.TryAdd(ksChapters, [novel.Key]);
|
||||
}
|
||||
//public static void Define_WoDuShu_HouseOfHorrors(BeamDataDictionary sdd) {
|
||||
// var (wdsAgg, wdsAux) = CreateKeyPair<WebResource>("aggregators", "auxillaries", "wodushu", "aeqw89:document");
|
||||
// var novel = new ResourceDictionary() {
|
||||
// Key = new DataKey<ResourceDictionary>("novels:house_of_horrors"),
|
||||
// FriendlyName = "My House Of Horrors",
|
||||
// AssociatedSource = wdsAgg,
|
||||
// AssociatedMetaSource = wdsAux,
|
||||
// TemplateInitialData = new ImmutableState(["24349", "2896325"]),
|
||||
// MetaTemplateInitialData = new ImmutableState(["24349"])
|
||||
// };
|
||||
|
||||
// sdd.ResourceDictionaries.TryAdd(novel.Key, novel);
|
||||
|
||||
// sdd.AggregatorNovels.TryAdd(wdsAgg, [novel.Key]);
|
||||
//}
|
||||
|
||||
private static (DataKey<T>, DataKey<T>) CreateKeyPair<T>(string pref1, string pref2, string common, string @namespace) {
|
||||
return (
|
||||
new DataKey<T>(pref1 + ":" + common).InsertStart(@namespace),
|
||||
new DataKey<T>(pref2 + ":" + common).InsertStart(@namespace)
|
||||
);
|
||||
}
|
||||
|
||||
//public static void Define_WoDuShu(BeamDataDictionary sdd) {
|
||||
// var (wdsAgg, wdsAux) = CreateKeyPair<WebResource>("aggregators", "auxillaries", "wodushu", "aeqw89:document");
|
||||
// var bindings = new DataKey<DataBindings>("aeqw89:bindings:wodushu");
|
||||
// var aggregator = new WebResource(wdsAgg) {
|
||||
// Name = "WoDuShu.com",
|
||||
// Description = "A Chinese novel aggregator site",
|
||||
// Domain = "https://wodushu.com",
|
||||
// Bindings = bindings
|
||||
// };
|
||||
// var auxiliary = new WebResource(wdsAux) {
|
||||
// Name = "WoDuShu.com",
|
||||
// Description = "A Chinese novel aggregator site",
|
||||
// Domain = "https://wodushu.com",
|
||||
// Bindings = bindings.WithSuffix("_aux")
|
||||
// };
|
||||
|
||||
// sdd.Templates.TryAdd(wdsAgg, new() {
|
||||
// Factory = new(StateChangerFactory.LastAsNumber),
|
||||
// Builder = new SourceLinkBuilder("www.wodushu.com")
|
||||
// .WithSegments("read", "", "")
|
||||
// .WithParameters(1, "")
|
||||
// .WithParameters(2, (".html", Position.After))
|
||||
// });
|
||||
// sdd.Templates.TryAdd(wdsAux, new() {
|
||||
// Factory = new(StateChangerFactory.Constant),
|
||||
// Builder = new SourceLinkBuilder("www.wodushu.com")
|
||||
// .WithSegments("book", "")
|
||||
// .WithParameters(1, "")
|
||||
// });
|
||||
|
||||
// sdd.Resources.TryAdd(wdsAgg, aggregator);
|
||||
// sdd.Auxillaries.TryAdd(wdsAux, auxiliary);
|
||||
|
||||
// var binding_agg = new DataKey<DataBindings>("aeqw89:bindings:wodushu");
|
||||
// var binding_aux = new DataKey<DataBindings>("aeqw89:bindings:wodushu_aux");
|
||||
|
||||
// sdd.Bindings.Add(binding_agg, new() {
|
||||
// Title = new ContentsDataProvider() {
|
||||
// Content = new Binding() {
|
||||
// XPath = "/html/body/div[4]/div/div/div[2]/h1"
|
||||
// }
|
||||
// },
|
||||
|
||||
// Content = new ParagraphedContentDataProvider() {
|
||||
// Content = new Binding() {
|
||||
// XPath = "//*[@id=\"content\"]"
|
||||
// }
|
||||
// },
|
||||
// });
|
||||
|
||||
// sdd.Bindings.Add(binding_aux, new() {
|
||||
// Title = new ContentsDataProvider() {
|
||||
// Content = new Binding() {
|
||||
// XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/h1"
|
||||
// }
|
||||
// },
|
||||
// Authors = new ContentsArrayDataProvider() {
|
||||
// Content = new Binding() {
|
||||
// XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/div/p[1]/a"
|
||||
// }
|
||||
// },
|
||||
// Description = new ParagraphedContentDataProvider() {
|
||||
// Content = new Binding() {
|
||||
// XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[2]"
|
||||
// }
|
||||
// },
|
||||
// });
|
||||
//}
|
||||
|
||||
//public static void Define_NovelFull(SharedDataDictionary sdd) {
|
||||
// var docNamespace = "aeqw89:document";
|
||||
// var nfAgg = new DataKey<WebResource>("aggregators:novel_full").WithNamespace(docNamespace);
|
||||
|
||||
+137
-66
@@ -9,13 +9,16 @@ using System.Text.Json.Serialization;
|
||||
using System.Text.Json.Serialization.Metadata;
|
||||
using Beam.Temporary.Cli.Templates.Classic;
|
||||
using Beam.Exports;
|
||||
using System.Diagnostics;
|
||||
using Beam.Models;
|
||||
using Beam.Stealth;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal class Program {
|
||||
|
||||
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
|
||||
|
||||
public static BeamDataDictionary BeamData { get; set; } = [];
|
||||
public static BeamDataContext BeamData { get; set; } = [];
|
||||
|
||||
public static IArchitecture Architecture = IArchitecture.Default;
|
||||
|
||||
@@ -27,14 +30,15 @@ namespace Beam.Temporary.Cli {
|
||||
|
||||
var web = new HtmlWeb();
|
||||
|
||||
var lf = LoggerFactory.Create((x) => {
|
||||
x.AddConsole();
|
||||
});
|
||||
var lf = LoggerFactory.Create((x) => x
|
||||
.AddConsole()
|
||||
.SetMinimumLevel(LogLevel.Trace)
|
||||
);
|
||||
|
||||
ILogger logger = lf
|
||||
.CreateLogger("Program");
|
||||
|
||||
await using var sharedContext = await DataDictionaryContext<BeamDataDictionary>.Create(
|
||||
await using var sharedContext = await DataDictionaryContext<BeamDataContext>.Create(
|
||||
BeamDataPath,
|
||||
false,
|
||||
DataKind.Shared,
|
||||
@@ -45,82 +49,149 @@ namespace Beam.Temporary.Cli {
|
||||
BeamData = sharedContext.Data;
|
||||
|
||||
BeamData.Clear();
|
||||
NovelStatics.Define_WoDuShu(BeamData);
|
||||
NovelStatics.Define_WoDuShu_HouseOfHorrors(BeamData);
|
||||
NovelStatics.Define_YeBiQuge(BeamData);
|
||||
NovelStatics.Define_YeBiQuge_LordOfMysteries(BeamData);
|
||||
NovelStatics.Define_KuaiShu5(BeamData);
|
||||
NovelStatics.Define_KuaiShu5_LordOfMysteries(BeamData);
|
||||
ClassicTemplates.Register(BeamData);
|
||||
|
||||
await sharedContext.ForceSave();
|
||||
BeamData = sharedContext.Data; // need to refresh instance after forced save!
|
||||
|
||||
CancellationTokenSource cts = new();
|
||||
|
||||
var novel = new DataKey<TextResource>("novels:house_of_horrors");
|
||||
using var config = StealthConfig.Create(true, null, TimeSpan.FromMinutes(2), Browser.Chrome, lf.CreateLogger<StealthConfig>());
|
||||
var unit = new StealthUnitPageDownloader<HtmlDocument>(new(), config, (x) => {
|
||||
return Task.CompletedTask;
|
||||
}, x => Task.FromResult(x));
|
||||
|
||||
var metadata2 = await DownloadBuilder<HtmlDocument, IDocumentMetaData>.FromMeta(novel, BeamData)
|
||||
.WithLink()
|
||||
.WithTransformer(CommonTransformers.ArticleDataTransformer)
|
||||
.Configure((x) => x
|
||||
.WithDownloadLogger(logger)
|
||||
.WithRetryReporter(new Progress<RetryReport>())
|
||||
.WithTimeOut(TimeSpan.FromSeconds(15)))
|
||||
.Build()
|
||||
.FirstAsync();
|
||||
var (success, result) = await unit.TryDownload([new("https://duckduckgo.com/?t=ffab&q=C%23+stealth+headless+browser&ia=web", 0)], default);
|
||||
if (success)
|
||||
logger?.LogInformation("Success! Downloaded '{}'", result?.DocumentNode.Name);
|
||||
else
|
||||
logger?.LogError("Failed to download!");
|
||||
|
||||
var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
|
||||
.WithRange(1..5)
|
||||
.WithLinkGenerator()
|
||||
.WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
|
||||
.Configure((x) => x
|
||||
.WithDownloadLogger(logger)
|
||||
.WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
|
||||
.WithTimeOut(TimeSpan.FromSeconds(15))
|
||||
)
|
||||
.Build();
|
||||
Console.WriteLine(result?.DocumentNode.OuterHtml);
|
||||
|
||||
|
||||
//var novelResDict = new DataKey<ResourceDictionary>("kuaishu5:novels:lord_of_the_mysteries");
|
||||
|
||||
List<Task<Ordered<IDocument>>> translationTasks = [];
|
||||
List<Ordered<IDocument>> documents = [];
|
||||
//var metadata2 = await DownloadBuilder<HtmlDocument, TableOfContentsData>.FromResource(novelResDict, nameof(TableOfContentsData), BeamData)
|
||||
// .WithLink()
|
||||
// .WithTransformer(CommonTransformers.TableOfContentsTransformer)
|
||||
// .Configure((x) => x
|
||||
// .WithDownloadLogger(logger)
|
||||
// .WithRetryReporter(new Progress<RetryReport>())
|
||||
// .WithTimeOut(TimeSpan.FromSeconds(15)))
|
||||
// .Build()
|
||||
// .FirstAsync();
|
||||
|
||||
await foreach (var download in downloader2.Take(10)) {
|
||||
if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
|
||||
continue;
|
||||
if (meta is not ArticleData articleMetaData)
|
||||
continue;
|
||||
if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
|
||||
continue;
|
||||
if (meta is not ArticleData bookMetaData)
|
||||
continue;
|
||||
//Console.WriteLine($"Title: {data.Name}");
|
||||
//Console.WriteLine($"Description: {data.Description}");
|
||||
//Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
|
||||
//Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
|
||||
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
||||
Console.WriteLine($"Book title: {bookMetaData.Name}");
|
||||
//Console.WriteLine($"Content: {download}");
|
||||
//if (metadata2.Data.PagesLinks is null || metadata2.Data.PagesLinks.Length == 0)
|
||||
// Debugger.Break();
|
||||
|
||||
//translationTasks.Add(Task.Run(async () => {
|
||||
// logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
|
||||
// var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
|
||||
// logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
|
||||
// return ret;
|
||||
//}));
|
||||
}
|
||||
//var pageLinks = DownloadBuilder<HtmlDocument, TableOfContentsData>.FromScratch()
|
||||
// .WithLinks(metadata2.Data.PagesLinks)
|
||||
// .WithTransformer(CommonTransformers.TableOfContentsTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(TableOfContentsData)]].Bindings]))
|
||||
// .Configure(x => x
|
||||
// .WithDownloadLogger(logger)
|
||||
// .WithRetryReporter(new Progress<RetryReport>())
|
||||
// .WithTimeOut(TimeSpan.FromSeconds(15)))
|
||||
// .Build();
|
||||
|
||||
documents = (await Task.WhenAll(translationTasks)).ToList();
|
||||
//var links = (await pageLinks
|
||||
// .ToListAsync())
|
||||
// .Where(x => x?.Data?.ContentLinks is not null)
|
||||
// .SelectMany(x => x.Data.ContentLinks!)
|
||||
// .DistinctBy(x => x.Link.AbsoluteUri);
|
||||
|
||||
string testDir = Path.Combine("txt", Path.GetRandomFileName());
|
||||
Directory.CreateDirectory(testDir);
|
||||
//var downloader = DownloadBuilder<HtmlDocument, StringDocument>.FromScratch()
|
||||
// .WithLinks(links)
|
||||
// .WithTransformer(CommonTransformers.DocumentTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(IDocument)]].Bindings]))
|
||||
// .Configure(x => x
|
||||
// .WithDownloadLogger(logger)
|
||||
// .WithRetryReporter(new Progress<RetryReport>(x => logger?.LogWarning("Retrying download {} for the {} time", x.Link, x.TryNumber)))
|
||||
// .WithTimeOut(TimeSpan.FromSeconds(15)))
|
||||
// .WithParallelism(4)
|
||||
// .UseFragments()
|
||||
// .Build();
|
||||
|
||||
int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
|
||||
foreach (var document in documents.OrderBy((x) => x.Order)) {
|
||||
document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
|
||||
Dictionary<string, string> linkButtons = new();
|
||||
if (document.Order != 0)
|
||||
linkButtons.Add("Previous", $"{document.Order - 1}.html");
|
||||
if (document.Order != len)
|
||||
linkButtons.Add("Next", $"{document.Order + 1}.html");
|
||||
new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
|
||||
}
|
||||
//HashSet<Ordered<StringDocument>> downloaded = [];
|
||||
//try {
|
||||
// await foreach (var download in downloader) {
|
||||
// logger?.LogInformation("Downloaded chapter with order={}", download.Order);
|
||||
// try {
|
||||
// downloaded.Add(download);
|
||||
// } catch (Exception e) {
|
||||
// logger?.LogError(e, "Unknown error occurred");
|
||||
// }
|
||||
// }
|
||||
//} catch (Exception e) {
|
||||
// logger?.LogError(e, "Uncaught error detected!");
|
||||
//} finally {
|
||||
// logger?.LogInformation("Done with loop, downloaded {}", downloaded.Count);
|
||||
// try {
|
||||
// string serialized = JsonSerializer.Serialize(downloaded.Select(x => new { x.Order, x.Data.Content, x.Data.MetaData }).ToArray(), ConversionOptions);
|
||||
// System.IO.File.WriteAllText("lordOfTheMysteries.json", serialized);
|
||||
// } catch (Exception e) {
|
||||
// logger?.LogInformation(e, "Failed to serialize chapters");
|
||||
// }
|
||||
//}
|
||||
|
||||
//var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
|
||||
// .WithRange(1..5)
|
||||
// .WithLinkGenerator()
|
||||
// .WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
|
||||
// .Configure((x) => x
|
||||
// .WithDownloadLogger(logger)
|
||||
// .WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
|
||||
// .WithTimeOut(TimeSpan.FromSeconds(15))
|
||||
// )
|
||||
// .Build();
|
||||
|
||||
|
||||
|
||||
//List<Task<Ordered<IDocument>>> translationTasks = [];
|
||||
//List<Ordered<IDocument>> documents = [];
|
||||
|
||||
//await foreach (var download in downloader2.Take(10)) {
|
||||
// if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
|
||||
// continue;
|
||||
// if (meta is not ArticleData articleMetaData)
|
||||
// continue;
|
||||
// if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
|
||||
// continue;
|
||||
// if (meta is not ArticleData bookMetaData)
|
||||
// continue;
|
||||
// //Console.WriteLine($"Title: {data.Name}");
|
||||
// //Console.WriteLine($"Description: {data.Description}");
|
||||
// //Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
|
||||
// //Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
|
||||
// Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
||||
// Console.WriteLine($"Book title: {bookMetaData.Name}");
|
||||
// //Console.WriteLine($"Content: {download}");
|
||||
|
||||
// //translationTasks.Add(Task.Run(async () => {
|
||||
// // logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
|
||||
// // var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
|
||||
// // logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
|
||||
// // return ret;
|
||||
// //}));
|
||||
//}
|
||||
|
||||
//documents = (await Task.WhenAll(translationTasks)).ToList();
|
||||
|
||||
//string testDir = Path.Combine("txt", Path.GetRandomFileName());
|
||||
//Directory.CreateDirectory(testDir);
|
||||
|
||||
//int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
|
||||
//foreach (var document in documents.OrderBy((x) => x.Order)) {
|
||||
// document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
|
||||
// Dictionary<string, string> linkButtons = new();
|
||||
// if (document.Order != 0)
|
||||
// linkButtons.Add("Previous", $"{document.Order - 1}.html");
|
||||
// if (document.Order != len)
|
||||
// linkButtons.Add("Next", $"{document.Order + 1}.html");
|
||||
// new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
|
||||
//}
|
||||
|
||||
Console.ReadKey();
|
||||
|
||||
|
||||
@@ -14,10 +14,10 @@ namespace Beam.Temporary.Cli {
|
||||
public string StateChangerKey { get; set; }
|
||||
|
||||
[JsonConstructor]
|
||||
public StateChangerFactory(string key) {
|
||||
if (!Keys.Contains(key))
|
||||
throw new ArgumentException($"{key} not in keys list", nameof(key));
|
||||
StateChangerKey = key;
|
||||
public StateChangerFactory(string stateChangerKey) {
|
||||
if (!Keys.Contains(stateChangerKey))
|
||||
throw new ArgumentException($"{stateChangerKey} not in keys list", nameof(stateChangerKey));
|
||||
StateChangerKey = stateChangerKey;
|
||||
}
|
||||
|
||||
public static Dictionary<string, Func<IStateChangeBehaviour>> FactoryTable = new() {
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public record class TableOfContentsData : ArticleData {
|
||||
/// <summary>
|
||||
/// The link collection of the actual content
|
||||
/// </summary>
|
||||
public SourceLink[]? ContentLinks { get; set; }
|
||||
/// <summary>
|
||||
/// The link collection of all the Table Of Content pages for this specific resource.
|
||||
/// </summary>
|
||||
public SourceLink[]? PagesLinks { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class Template {
|
||||
public StateChangerFactory Factory { get; set; }
|
||||
public SourceLinkBuilder Builder { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
|
||||
|
||||
using aeqw89.DataKeys;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class TextResource : IKeyed<TextResource> {
|
||||
public required DataKey<TextResource> Key { get; set; }
|
||||
public string? FriendlyName { get; set; }
|
||||
public DataKey<WebResource>? AssociatedSource { get; set; }
|
||||
public DataKey<WebResource>? AssociatedMetaSource { get; set; }
|
||||
public required ImmutableState TemplateInitialData { get; set; }
|
||||
public ImmutableState? MetaTemplateInitialData { get; set; }
|
||||
|
||||
public TextResourceRecord ToRecord(BeamDataDictionary sdd) {
|
||||
return new(this,
|
||||
AssociatedSource is null ? null : sdd.Aggregators[AssociatedSource],
|
||||
AssociatedMetaSource is null ? null : sdd.Auxillaries[AssociatedMetaSource]);
|
||||
}
|
||||
}
|
||||
|
||||
public record TextResourceRecord(TextResource Resource, WebResource? AssociatedSource, WebResource? AssociatedMetaSource);
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal class Tracked<T>(T obj) {
|
||||
public T TrackedObject { get; set; } = obj;
|
||||
public bool IsDirty { get; set; } = true;
|
||||
|
||||
public Tracked<T> SetDirty() {
|
||||
IsDirty = true;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
using aeqw89.PersistentData;
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class WebResource(DataKey<WebResource> key) : IKeyed<WebResource> {
|
||||
public DataKey<WebResource> Key { get; set; } = key;
|
||||
|
||||
public required DataKey<DataBindings> Bindings { get; set; }
|
||||
public string? Name { get; set; }
|
||||
public string? Domain { get; set; }
|
||||
public string? Description { get; set; }
|
||||
|
||||
|
||||
public WebResource() : this(new(string.Empty)) { }
|
||||
|
||||
public WebResourceRecord ToRecord(BeamDataDictionary sdd) {
|
||||
return new WebResourceRecord(this, sdd.Bindings[Bindings]);
|
||||
}
|
||||
}
|
||||
|
||||
public record WebResourceRecord(WebResource Resource, DataBindings Bindings);
|
||||
}
|
||||
Reference in New Issue
Block a user