Files
Beam/Beam.Temporary.Cli/DownloadBuilder.cs
T
qwsdcvghyu89 056e426572 Enhance async capabilities and refactor project structure
Updated project files for `Beam.Dynamic`, `Beam.Exports`, `Beam.Puppeteer`, `Beam.Temporary.Cli`, and `Beam` to include new XML headers, reorganize property groups, and add project references.

Modified `PuppetedUnitDownloader` to support additional parameters for async transformers. Changed return types in `CommonTransformers` to `AsyncTransformer` for asynchronous processing.

Significant refactoring in `DownloadBuilder`, `DownloadContext`, and `DownloadContextBuilder` to introduce generic parameters and improve context management. Updated `SequentialDownloader`, `SequentialFragmentDownloader`, and `UnitDownloader` to accommodate new async transformer types.

Introduced `TypeExtensions` for unique type name generation and added `UnitFragmentDownloaderBinary` for handling binary downloads. Updated solution file to include the new `aeqw89.Beam` project, ensuring proper references across the solution.

These changes enhance the asynchronous capabilities of the Beam library, improve type safety, and streamline the downloading process.
2025-06-23 20:30:09 +03:00

243 lines
12 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using aeqw89.DataKeys;
using Beam.Dynamic;
using Beam;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using HtmlAgilityPack;
namespace Beam.Temporary.Cli {
/// <summary>
/// Typesafe, staged builder that prevents callers from forgetting the mandatory steps
/// (source → link selection → transformer) and surfaces operational knobs as firstclass
/// methods instead of magic parameters.
/// </summary>
public static class DownloadBuilder<RawType, OutType> {
/* ──────────────────────────── Entry points ─────────────────────────── */
public static ILinkStage FromMeta(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
Create(novelKey, data, SourceKind.Meta);
public static ILinkStage FromText(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
Create(novelKey, data, SourceKind.Text);
public static IAlternativeLinkStage FromScratch()
=> new LinkStage(null!, null!, null!, new());
/* ────────────────────────────── Stages ─────────────────────────────── */
public interface ILinkStage {
ITransformStage WithLink();
ITransformStage WithLinkGenerator();
ILinkStage WithRange(Range range);
}
public interface IAlternativeLinkStage {
IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links);
}
public interface ITransformStage {
IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory);
}
public interface IAlternativeTransformStage {
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
}
public interface IContextStage {
IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure);
IContextStage WithParallelism(int degree);
IContextStage WithTimeout(TimeSpan timeout);
IContextStage WithRetryReporter(IProgress<RetryReport> reporter);
DownloadEnumerable<OutType> Build();
IContextStage UseFragments();
}
/* ────────────────────────── Implementation ────────────────────────── */
private enum SourceKind { Meta, Text }
private static ILinkStage Create(DataKey<TextResource> novelKey, BeamDataDictionary data, SourceKind kind) {
var (source, initial) = Resolve(novelKey, data, kind);
var ctxBuilder = new DownloadContextBuilder<RawType>().WithLinks(Array.Empty<SourceLink>()); // placeholder, filled later.
return new LinkStage(source, initial, data, ctxBuilder);
}
private static (WebResource Source, State Initial) Resolve(DataKey<TextResource> novelKey, BeamDataDictionary data, SourceKind kind) {
if (!data.Novels.TryGetValue(novelKey, out var tr))
throw new KeyNotFoundException($"Novel '{novelKey}' not found in BeamDataDictionary.");
var textRecord = tr.ToRecord(data);
WebResource? source;
State? initial;
if (kind == SourceKind.Meta) {
source = textRecord.AssociatedMetaSource ?? throw new InvalidOperationException($"Meta source missing for '{novelKey}'.");
initial = textRecord.Resource.MetaTemplateInitialData ?? throw new InvalidOperationException("Meta template data missing.");
} else {
source = textRecord.AssociatedSource ?? throw new InvalidOperationException($"Text source missing for '{novelKey}'.");
initial = textRecord.Resource.TemplateInitialData;
}
return (source, initial);
}
/* ──────────────────────────── Stage types ─────────────────────────── */
private sealed record LinkStage(
WebResource Source,
State Initial,
BeamDataDictionary Data,
DownloadContextBuilder<RawType> CtxBuilder) : ILinkStage, IAlternativeLinkStage {
private State? endState;
private bool linksFrozen = false;
public ITransformStage WithLink() {
var link = Data.Templates[Source.Key].Builder.Build(Initial);
CtxBuilder.WithLinks(new[] { link });
return new TransformStage(Source, Data, CtxBuilder);
}
public ITransformStage WithLinkGenerator() {
var template = Data.Templates[Source.Key];
var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
template.Builder,
new NumberedStateChanger(template.Factory.Behavior),
Initial, endState));
CtxBuilder.WithLinks(generator);
linksFrozen = true;
return new TransformStage(Source, Data, CtxBuilder);
}
public IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links) {
CtxBuilder.WithLinks(links);
return new TransformStage(Source, Data, CtxBuilder);
}
public ILinkStage WithRange(Range range) {
if (linksFrozen)
throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
if (range.End.Value < range.Start.Value)
throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end");
var template = Data.Templates[Source.Key];
var stateChanger = new NumberedStateChanger(template.Factory.Behavior);
endState = Initial.Copy();
stateChanger.Apply(Initial, range.Start.Value - 1);
stateChanger.Apply(endState, range.End.Value - 1);
return this;
}
}
private sealed record TransformStage(
WebResource Source,
BeamDataDictionary Data,
DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage, IAlternativeTransformStage {
public IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory) {
var transformer = factory(Data.Bindings[Source.Bindings]);
return new ContextStage(CtxBuilder, transformer);
}
public IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
return new ContextStage(CtxBuilder, transformer);
}
}
private sealed class ContextStage : IContextStage {
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
private readonly AsyncTransformer<RawType, OutType> _transformer;
private int _parallelism = 4;
private bool useFragments = false;
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
}
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
configure(_ctxBuilder);
return this;
}
public IContextStage WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
}
public IContextStage WithTimeout(TimeSpan timeout) {
_ctxBuilder.WithTimeOut(timeout);
return this;
}
public IContextStage WithRetryReporter(IProgress<RetryReport> reporter) {
_ctxBuilder.WithRetryReporter(reporter);
return this;
}
public IContextStage UseFragments() {
useFragments = true;
return this;
}
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
return (useFragments, _transformer, context.AsyncFailurePredicates) switch {
// ──────────────── fragmented HTML ────────────────
(true, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new UnitFragmentDownloader<OutType>(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── fragmented binary ────────────────
(true, AsyncTransformer<byte[], OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<byte[]>[] responseFailurePredicates)
=> new UnitFragmentDownloaderBinary<OutType>(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── single HTML ────────────────
(false, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new UnitDownloader<OutType>(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single binary ────────────────
(false, AsyncTransformer<byte[], OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<byte[]>[] responseFailurePredicates)
=> new UnitDownloaderBinary<OutType>(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates),
_ => throw new Exception($"Unsupported transformer / failure-predicate combination. Missing pattern: {useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
};
}
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
var copyOfContext = context.CreateBuilder().Build();
return useFragments switch {
true => new SequentialFragmentDownloader<RawType, OutType>(
copyOfContext,
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
false => new SequentialDownloader<RawType, OutType>(
copyOfContext,
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered()
};
}
public DownloadEnumerable<OutType> Build() {
var context = _ctxBuilder.Build();
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
return enumerable;
}
}
}
}