2317db9d3f
Refactor the transformers in the downloader classes to use ByteDocument instead of byte arrays. This change improves type safety and clarity in handling document content during downloads, ensuring that the transformations are more consistent and maintainable.
246 lines
12 KiB
C#
246 lines
12 KiB
C#
using aeqw89.DataKeys;
|
||
using Beam.Dynamic;
|
||
using Beam;
|
||
using Microsoft.Extensions.Logging;
|
||
using System;
|
||
using System.Collections.Generic;
|
||
using HtmlAgilityPack;
|
||
|
||
namespace Beam.Temporary.Cli {
|
||
/// <summary>
|
||
/// Type‑safe, staged builder that prevents callers from forgetting the mandatory steps
|
||
/// (source → link selection → transformer) and surfaces operational knobs as first‑class
|
||
/// methods instead of magic parameters.
|
||
/// </summary>
|
||
public static class DownloadBuilder<RawType, OutType> {
|
||
/* ──────────────────────────── Entry points ─────────────────────────── */
|
||
|
||
public static ILinkStage FromMeta(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
|
||
Create(novelKey, data, SourceKind.Meta);
|
||
|
||
public static ILinkStage FromText(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
|
||
Create(novelKey, data, SourceKind.Text);
|
||
|
||
public static IAlternativeLinkStage FromScratch()
|
||
=> new LinkStage(null!, null!, null!, new());
|
||
|
||
/* ────────────────────────────── Stages ─────────────────────────────── */
|
||
|
||
public interface ILinkStage {
|
||
ITransformStage WithLink();
|
||
ITransformStage WithLinkGenerator();
|
||
ILinkStage WithRange(Range range);
|
||
}
|
||
|
||
public interface IAlternativeLinkStage {
|
||
IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links);
|
||
}
|
||
|
||
public interface ITransformStage {
|
||
IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory);
|
||
}
|
||
|
||
public interface IAlternativeTransformStage {
|
||
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
|
||
IContextStage WithTransformer(Func<RawType, OutType> transformer) {
|
||
return WithTransformer(rt => Task.FromResult(transformer(rt)));
|
||
}
|
||
}
|
||
|
||
public interface IContextStage {
|
||
IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure);
|
||
IContextStage WithParallelism(int degree);
|
||
IContextStage WithTimeout(TimeSpan timeout);
|
||
IContextStage WithRetryReporter(IProgress<RetryReport> reporter);
|
||
DownloadEnumerable<OutType> Build();
|
||
IContextStage UseFragments();
|
||
}
|
||
|
||
/* ────────────────────────── Implementation ────────────────────────── */
|
||
|
||
private enum SourceKind { Meta, Text }
|
||
|
||
private static ILinkStage Create(DataKey<TextResource> novelKey, BeamDataDictionary data, SourceKind kind) {
|
||
var (source, initial) = Resolve(novelKey, data, kind);
|
||
var ctxBuilder = new DownloadContextBuilder<RawType>().WithLinks(Array.Empty<SourceLink>()); // placeholder, filled later.
|
||
return new LinkStage(source, initial, data, ctxBuilder);
|
||
}
|
||
|
||
private static (WebResource Source, State Initial) Resolve(DataKey<TextResource> novelKey, BeamDataDictionary data, SourceKind kind) {
|
||
if (!data.Novels.TryGetValue(novelKey, out var tr))
|
||
throw new KeyNotFoundException($"Novel '{novelKey}' not found in BeamDataDictionary.");
|
||
|
||
var textRecord = tr.ToRecord(data);
|
||
WebResource? source;
|
||
State? initial;
|
||
|
||
if (kind == SourceKind.Meta) {
|
||
source = textRecord.AssociatedMetaSource ?? throw new InvalidOperationException($"Meta source missing for '{novelKey}'.");
|
||
initial = textRecord.Resource.MetaTemplateInitialData ?? throw new InvalidOperationException("Meta template data missing.");
|
||
} else {
|
||
source = textRecord.AssociatedSource ?? throw new InvalidOperationException($"Text source missing for '{novelKey}'.");
|
||
initial = textRecord.Resource.TemplateInitialData;
|
||
}
|
||
|
||
return (source, initial);
|
||
}
|
||
|
||
/* ──────────────────────────── Stage types ─────────────────────────── */
|
||
|
||
|
||
private sealed record LinkStage(
|
||
WebResource Source,
|
||
State Initial,
|
||
BeamDataDictionary Data,
|
||
DownloadContextBuilder<RawType> CtxBuilder) : ILinkStage, IAlternativeLinkStage {
|
||
|
||
private State? endState;
|
||
private bool linksFrozen = false;
|
||
|
||
public ITransformStage WithLink() {
|
||
var link = Data.Templates[Source.Key].Builder.Build(Initial);
|
||
CtxBuilder.WithLinks(new[] { link });
|
||
return new TransformStage(Source, Data, CtxBuilder);
|
||
}
|
||
|
||
public ITransformStage WithLinkGenerator() {
|
||
var template = Data.Templates[Source.Key];
|
||
var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
||
template.Builder,
|
||
new NumberedStateChanger(template.Factory.Behavior),
|
||
Initial, endState));
|
||
CtxBuilder.WithLinks(generator);
|
||
linksFrozen = true;
|
||
return new TransformStage(Source, Data, CtxBuilder);
|
||
}
|
||
|
||
public IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links) {
|
||
CtxBuilder.WithLinks(links);
|
||
return new TransformStage(Source, Data, CtxBuilder);
|
||
}
|
||
|
||
public ILinkStage WithRange(Range range) {
|
||
if (linksFrozen)
|
||
throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
|
||
if (range.End.Value < range.Start.Value)
|
||
throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end");
|
||
var template = Data.Templates[Source.Key];
|
||
var stateChanger = new NumberedStateChanger(template.Factory.Behavior);
|
||
endState = Initial.Copy();
|
||
stateChanger.Apply(Initial, range.Start.Value - 1);
|
||
stateChanger.Apply(endState, range.End.Value - 1);
|
||
return this;
|
||
}
|
||
}
|
||
|
||
private sealed record TransformStage(
|
||
WebResource Source,
|
||
BeamDataDictionary Data,
|
||
DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage, IAlternativeTransformStage {
|
||
public IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory) {
|
||
var transformer = factory(Data.Bindings[Source.Bindings]);
|
||
return new ContextStage(CtxBuilder, transformer);
|
||
}
|
||
|
||
public IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
|
||
return new ContextStage(CtxBuilder, transformer);
|
||
}
|
||
}
|
||
|
||
private sealed class ContextStage : IContextStage {
|
||
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
|
||
private readonly AsyncTransformer<RawType, OutType> _transformer;
|
||
private int _parallelism = 4;
|
||
private bool useFragments = false;
|
||
|
||
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
|
||
_ctxBuilder = ctxBuilder;
|
||
_transformer = transformer;
|
||
}
|
||
|
||
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
|
||
configure(_ctxBuilder);
|
||
return this;
|
||
}
|
||
|
||
public IContextStage WithParallelism(int degree) {
|
||
_parallelism = Math.Max(1, degree);
|
||
return this;
|
||
}
|
||
|
||
public IContextStage WithTimeout(TimeSpan timeout) {
|
||
_ctxBuilder.WithTimeOut(timeout);
|
||
return this;
|
||
}
|
||
|
||
public IContextStage WithRetryReporter(IProgress<RetryReport> reporter) {
|
||
_ctxBuilder.WithRetryReporter(reporter);
|
||
return this;
|
||
}
|
||
|
||
public IContextStage UseFragments() {
|
||
useFragments = true;
|
||
return this;
|
||
}
|
||
|
||
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
|
||
return (useFragments, _transformer, context.AsyncFailurePredicates) switch {
|
||
// ──────────────── fragmented HTML ────────────────
|
||
(true, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||
=> new UnitFragmentDownloader<OutType>(
|
||
context.Web,
|
||
asyncHtmlTransformer,
|
||
documentFailurePredicates,
|
||
_parallelism,
|
||
context.DownloadLogger),
|
||
// ──────────────── fragmented binary ────────────────
|
||
(true, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||
=> new UnitFragmentDownloaderBinary<OutType>(
|
||
context.Client,
|
||
asyncBinaryTransformer,
|
||
responseFailurePredicates,
|
||
_parallelism,
|
||
context.DownloadLogger),
|
||
// ──────────────── single HTML ────────────────
|
||
(false, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||
=> new UnitDownloader<OutType>(
|
||
context.Web,
|
||
asyncHtmlTransformer,
|
||
documentFailurePredicates),
|
||
// ──────────────── single binary ────────────────
|
||
(false, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||
=> new UnitDownloaderBinary<OutType>(
|
||
context.Client,
|
||
asyncBinaryTransformer,
|
||
responseFailurePredicates),
|
||
_ => throw new Exception($"Unsupported transformer / failure-predicate combination. Missing pattern: {useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
|
||
};
|
||
}
|
||
|
||
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
|
||
var copyOfContext = context.CreateBuilder().Build();
|
||
return useFragments switch {
|
||
true => new SequentialFragmentDownloader<RawType, OutType>(
|
||
copyOfContext,
|
||
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
|
||
context.DownloadLogger).UnwrapFragmented(),
|
||
false => new SequentialDownloader<RawType, OutType>(
|
||
copyOfContext,
|
||
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
|
||
context.DownloadLogger).WrapOrdered()
|
||
};
|
||
}
|
||
|
||
public DownloadEnumerable<OutType> Build() {
|
||
var context = _ctxBuilder.Build();
|
||
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
|
||
return enumerable;
|
||
}
|
||
}
|
||
}
|
||
}
|