diff --git a/Beam.Dynamic/Beam.Dynamic.csproj b/Beam.Dynamic/Beam.Dynamic.csproj index 2a6cd5c..0fe0a46 100644 --- a/Beam.Dynamic/Beam.Dynamic.csproj +++ b/Beam.Dynamic/Beam.Dynamic.csproj @@ -1,24 +1,19 @@ - - - - net9.0 - enable - enable - Beam Dynamic - aeqw89 - qwsdcvghyu - - Beam utilities facilitating dynamic fetching of elements of webpages - https://github.com/qwsdcvghyu89/Beam - aeqw89.Beam.Dynamic - - - - - - - - - - - + + + + net9.0 + enable + enable + + + + + + + + + + all + + + \ No newline at end of file diff --git a/Beam.Exports/Beam.Exports.csproj b/Beam.Exports/Beam.Exports.csproj index 1ad0212..9ff9a61 100644 --- a/Beam.Exports/Beam.Exports.csproj +++ b/Beam.Exports/Beam.Exports.csproj @@ -1,20 +1,16 @@ - - - - net9.0 - enable - enable - Beam.Exports - aeqw89 - qwsdcvghyu - Beam library that facilitates exporting different kinds of views for IDocuments - https://github.com/qwsdcvghyu89/Beam - https://github.com/qwsdcvghyu89/Beam - aeqw89.Beam.Exports - - - - + + + + net9.0 + enable + enable + + + - - + + + all + + + \ No newline at end of file diff --git a/Beam.Puppeteer/Beam.Puppeteer.csproj b/Beam.Puppeteer/Beam.Puppeteer.csproj index 6e23fcf..bc23420 100644 --- a/Beam.Puppeteer/Beam.Puppeteer.csproj +++ b/Beam.Puppeteer/Beam.Puppeteer.csproj @@ -1,17 +1,16 @@ - - - - net9.0 - enable - enable - - - - - - - - - - - + + + + net9.0 + enable + enable + + + + + + + all + + + \ No newline at end of file diff --git a/Beam.Puppeteer/PuppetedUnitDownloader.cs b/Beam.Puppeteer/PuppetedUnitDownloader.cs index dd571af..2e62992 100644 --- a/Beam.Puppeteer/PuppetedUnitDownloader.cs +++ b/Beam.Puppeteer/PuppetedUnitDownloader.cs @@ -11,8 +11,8 @@ namespace Beam.Puppeteer { public class PuppetUnitDownloader : UnitDownloader { public PuppetContext Context { get; } - public PuppetUnitDownloader(PuppetContext pc, DownloadContext context) - : base(context.Web, context.AsyncTranformer, context.AsyncFailurePredicates) { + public PuppetUnitDownloader(PuppetContext pc, DownloadContext context, AsyncTransformer asyncHtmlTransformer, AsyncDownloadFailurePredicate[] asyncDownloadFailurePredicates) + : base(context.Web, asyncHtmlTransformer, asyncDownloadFailurePredicates) { Context = pc; } diff --git a/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj b/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj index a404a59..4015cee 100644 --- a/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj +++ b/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj @@ -1,32 +1,30 @@ - - - - Exe - net9.0 - enable - enable - - Beam.Temporary.Cli - aeqw89 - qwsdcvghyu - A temporary CLI for Beam providing several useful mechanisms - https://github.com/qwsdcvghyu89/Beam - https://github.com/qwsdcvghyu89/Beam - aeqw89.Beam.Temporary.Cli - - - - - - - - - - - - - - - - - + + + + Exe + net9.0 + enable + enable + + + + + + + + + + + + + + all + + + all + + + all + + + \ No newline at end of file diff --git a/Beam.Temporary.Cli/CommonTransformers.cs b/Beam.Temporary.Cli/CommonTransformers.cs index 08c6c91..ceeb090 100644 --- a/Beam.Temporary.Cli/CommonTransformers.cs +++ b/Beam.Temporary.Cli/CommonTransformers.cs @@ -1,5 +1,6 @@ using aeqw89.DataKeys; using Beam.Dynamic; +using HtmlAgilityPack; using System; using System.Collections.Generic; using System.Linq; @@ -8,16 +9,16 @@ using System.Threading.Tasks; namespace Beam.Temporary.Cli { public static class CommonTransformers { - public static HtmlTransformer ArticleDataTransformer(DataBindings? binding) => (x) => { - return new ArticleData() { + public static AsyncTransformer ArticleDataTransformer(DataBindings? binding) => (x) => { + return Task.FromResult(new ArticleData() { Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")], Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""), Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [], Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "") - }; + }); }; - public static HtmlTransformer DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => { + public static AsyncTransformer DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => { var resolved = binding?.Resolve(x); var articleData = new ArticleData() { Name = OnlineCleaner.Clean(resolved?.Title), @@ -26,9 +27,9 @@ namespace Beam.Temporary.Cli { meta.Add(IArchitecture.Default.ChapterKey, articleData); if (metaData is not null) meta.Add(IArchitecture.Default.BookKey, metaData); - return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) { + return Task.FromResult(new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) { MetaData = meta - }; + }); }; } } diff --git a/Beam.Temporary.Cli/DownloadBuilder.cs b/Beam.Temporary.Cli/DownloadBuilder.cs index 8d71ead..87cb95e 100644 --- a/Beam.Temporary.Cli/DownloadBuilder.cs +++ b/Beam.Temporary.Cli/DownloadBuilder.cs @@ -4,6 +4,7 @@ using Beam; using Microsoft.Extensions.Logging; using System; using System.Collections.Generic; +using HtmlAgilityPack; namespace Beam.Temporary.Cli { /// @@ -11,7 +12,7 @@ namespace Beam.Temporary.Cli { /// (source → link selection → transformer) and surfaces operational knobs as first‑class /// methods instead of magic parameters. /// - public static class DownloadBuilder { + public static class DownloadBuilder { /* ──────────────────────────── Entry points ─────────────────────────── */ public static ILinkStage FromMeta(DataKey novelKey, BeamDataDictionary data) => @@ -20,6 +21,9 @@ namespace Beam.Temporary.Cli { public static ILinkStage FromText(DataKey novelKey, BeamDataDictionary data) => Create(novelKey, data, SourceKind.Text); + public static IAlternativeLinkStage FromScratch() + => new LinkStage(null!, null!, null!, new()); + /* ────────────────────────────── Stages ─────────────────────────────── */ public interface ILinkStage { @@ -28,16 +32,25 @@ namespace Beam.Temporary.Cli { ILinkStage WithRange(Range range); } - public interface ITransformStage { - IContextStage WithTransformer(Func> factory); + public interface IAlternativeLinkStage { + IAlternativeTransformStage WithLinks(IEnumerable links); } - public interface IContextStage { - IContextStage Configure(Action> configure); - IContextStage WithParallelism(int degree); - IContextStage WithTimeout(TimeSpan timeout); - IContextStage WithRetryReporter(IProgress reporter); - DownloadEnumerable Build(); + public interface ITransformStage { + IContextStage WithTransformer(Func> factory); + } + + public interface IAlternativeTransformStage { + IContextStage WithTransformer(AsyncTransformer transformer); + } + + public interface IContextStage { + IContextStage Configure(Action> configure); + IContextStage WithParallelism(int degree); + IContextStage WithTimeout(TimeSpan timeout); + IContextStage WithRetryReporter(IProgress reporter); + DownloadEnumerable Build(); + IContextStage UseFragments(); } /* ────────────────────────── Implementation ────────────────────────── */ @@ -46,7 +59,7 @@ namespace Beam.Temporary.Cli { private static ILinkStage Create(DataKey novelKey, BeamDataDictionary data, SourceKind kind) { var (source, initial) = Resolve(novelKey, data, kind); - var ctxBuilder = new DownloadContextBuilder().WithLinks(Array.Empty()); // placeholder, filled later. + var ctxBuilder = new DownloadContextBuilder().WithLinks(Array.Empty()); // placeholder, filled later. return new LinkStage(source, initial, data, ctxBuilder); } @@ -71,11 +84,12 @@ namespace Beam.Temporary.Cli { /* ──────────────────────────── Stage types ─────────────────────────── */ + private sealed record LinkStage( WebResource Source, State Initial, BeamDataDictionary Data, - DownloadContextBuilder CtxBuilder) : ILinkStage { + DownloadContextBuilder CtxBuilder) : ILinkStage, IAlternativeLinkStage { private State? endState; private bool linksFrozen = false; @@ -97,6 +111,11 @@ namespace Beam.Temporary.Cli { return new TransformStage(Source, Data, CtxBuilder); } + public IAlternativeTransformStage WithLinks(IEnumerable links) { + CtxBuilder.WithLinks(links); + return new TransformStage(Source, Data, CtxBuilder); + } + public ILinkStage WithRange(Range range) { if (linksFrozen) throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator"); @@ -114,24 +133,29 @@ namespace Beam.Temporary.Cli { private sealed record TransformStage( WebResource Source, BeamDataDictionary Data, - DownloadContextBuilder CtxBuilder) : ITransformStage { - public IContextStage WithTransformer(Func> factory) { + DownloadContextBuilder CtxBuilder) : ITransformStage, IAlternativeTransformStage { + public IContextStage WithTransformer(Func> factory) { var transformer = factory(Data.Bindings[Source.Bindings]); - return new ContextStage(CtxBuilder, transformer); + return new ContextStage(CtxBuilder, transformer); + } + + public IContextStage WithTransformer(AsyncTransformer transformer) { + return new ContextStage(CtxBuilder, transformer); } } - private sealed class ContextStage : IContextStage { - private readonly DownloadContextBuilder _ctxBuilder; - private readonly Func _transformer; + private sealed class ContextStage : IContextStage { + private readonly DownloadContextBuilder _ctxBuilder; + private readonly AsyncTransformer _transformer; private int _parallelism = 4; + private bool useFragments = false; - public ContextStage(DownloadContextBuilder ctxBuilder, Func transformer) { + public ContextStage(DownloadContextBuilder ctxBuilder, AsyncTransformer transformer) { _ctxBuilder = ctxBuilder; _transformer = transformer; } - public IContextStage Configure(Action> configure) { + public IContextStage Configure(Action> configure) { configure(_ctxBuilder); return this; } @@ -151,21 +175,66 @@ namespace Beam.Temporary.Cli { return this; } - public DownloadEnumerable Build() { - var context = _ctxBuilder.Build(); - SequentialFragmentDownloader sequentialDownloader = new( - context, - ctx => new UnitFragmentDownloader( + public IContextStage UseFragments() { + useFragments = true; + return this; + } + + private object ConstructUnitDownloader(DownloadContext context) { + return (useFragments, _transformer, context.AsyncFailurePredicates) switch { + // ──────────────── fragmented HTML ──────────────── + (true, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] documentFailurePredicates) + => new UnitFragmentDownloader( context.Web, - _transformer, - context.AsyncFailurePredicates, + asyncHtmlTransformer, + documentFailurePredicates, _parallelism, context.DownloadLogger), - context.DownloadLogger); - var enumerable = new DownloadEnumerable( - sequentialDownloader - .UnwrapFragmented()); - sequentialDownloader.DisposeAsync().AsTask().Wait(); + // ──────────────── fragmented binary ──────────────── + (true, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new UnitFragmentDownloaderBinary( + context.Client, + asyncBinaryTransformer, + responseFailurePredicates, + _parallelism, + context.DownloadLogger), + // ──────────────── single HTML ──────────────── + (false, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] documentFailurePredicates) + => new UnitDownloader( + context.Web, + asyncHtmlTransformer, + documentFailurePredicates), + // ──────────────── single binary ──────────────── + (false, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new UnitDownloaderBinary( + context.Client, + asyncBinaryTransformer, + responseFailurePredicates), + _ => throw new Exception($"Unsupported transformer / failure-predicate combination. Missing pattern: {useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"), + }; + } + + private IAsyncEnumerator> ConstructDownloader(DownloadContext context) { + var copyOfContext = context.CreateBuilder().Build(); + return useFragments switch { + true => new SequentialFragmentDownloader( + copyOfContext, + ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx), + context.DownloadLogger).UnwrapFragmented(), + false => new SequentialDownloader( + copyOfContext, + ctx => (IUnitDownloader)ConstructUnitDownloader(ctx), + context.DownloadLogger).WrapOrdered() + }; + } + + public DownloadEnumerable Build() { + var context = _ctxBuilder.Build(); + var enumerable = new DownloadEnumerable(ConstructDownloader(context)); return enumerable; } } diff --git a/Beam.Temporary.Cli/MainArchitecture.cs b/Beam.Temporary.Cli/MainArchitecture.cs index b5d4f30..d44d8b9 100644 --- a/Beam.Temporary.Cli/MainArchitecture.cs +++ b/Beam.Temporary.Cli/MainArchitecture.cs @@ -11,15 +11,6 @@ namespace Beam.Temporary.Cli { public DataKey ChapterKey { get; set; } = new("ma:chapter"); public DataKey BookKey { get; set; } = new("ma:book"); - public HtmlTransformer ArticleDataTransformer(DataBindings? binding) => (x) => { - return new ArticleData() { - Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")], - Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""), - Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [], - Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "") - }; - }; - public DownloadContext? GetMeta(HtmlWeb web, DataKey pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) { var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd @@ -34,14 +25,7 @@ namespace Beam.Temporary.Cli { var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData); var binding = auxiliary.Bindings; - return new DownloadContext(web, [link], downloadLogger: logger, transformer: (x) => { - return new ArticleData() { - Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")], - Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""), - Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [], - Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "") - }; - }); + return new DownloadContext(web, new(), [link], downloadLogger: logger); } public DownloadContext? GetTextRecord(HtmlWeb web, DataKey resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) { @@ -60,20 +44,7 @@ namespace Beam.Temporary.Cli { template.Builder, new NumberedStateChanger(template.Factory.Behavior), res.Resource.TemplateInitialData)); - return new DownloadContext(web, sle, - transformer: (x) => { - var resolved = aggregator.Bindings.Resolve(x); - var articleData = new ArticleData() { - Name = OnlineCleaner.Clean(resolved.Title), - }; - Dictionary, IDocumentMetaData> meta = []; - meta.Add(ChapterKey, articleData); - if (metaData is not null) - meta.Add(BookKey, metaData); - return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved.Content)) { - MetaData = meta - }; - }, + return new DownloadContext(web, new(), sle, retryReporter: new Progress((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")), //downloadReporter: new Progress((x) => Console.WriteLine($"Downloaded ({x})")), asyncFailurePredicates: [ diff --git a/Beam.Temporary.Cli/Program.cs b/Beam.Temporary.Cli/Program.cs index 0c1917d..ea7ba98 100644 --- a/Beam.Temporary.Cli/Program.cs +++ b/Beam.Temporary.Cli/Program.cs @@ -53,34 +53,11 @@ namespace Beam.Temporary.Cli { CancellationTokenSource cts = new(); - HtmlTransformer ArticleDataTransformer(DataBindings? binding) => (x) => { - return new ArticleData() { - Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")], - Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""), - Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [], - Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "") - }; - }; - - HtmlTransformer DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => { - var resolved = binding?.Resolve(x); - var articleData = new ArticleData() { - Name = OnlineCleaner.Clean(resolved?.Title), - }; - Dictionary, IDocumentMetaData> meta = []; - meta.Add(IArchitecture.Default.ChapterKey, articleData); - if (metaData is not null) - meta.Add(IArchitecture.Default.BookKey, metaData); - return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) { - MetaData = meta - }; - }; - var novel = new DataKey("novels:house_of_horrors"); - var metadata2 = await DownloadBuilder.FromMeta(novel, BeamData) + var metadata2 = await DownloadBuilder.FromMeta(novel, BeamData) .WithLink() - .WithTransformer(ArticleDataTransformer) + .WithTransformer(CommonTransformers.ArticleDataTransformer) .Configure((x) => x .WithDownloadLogger(logger) .WithRetryReporter(new Progress()) @@ -88,10 +65,10 @@ namespace Beam.Temporary.Cli { .Build() .FirstAsync(); - var downloader2 = DownloadBuilder.FromText(novel, BeamData) + var downloader2 = DownloadBuilder.FromText(novel, BeamData) .WithRange(1..5) .WithLinkGenerator() - .WithTransformer((x) => DocumentTransformer(x, metadata2.Data)) + .WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data)) .Configure((x) => x .WithDownloadLogger(logger) .WithDownloadReporter(new Progress((x) => logger.LogInformation(x.ToString()))) diff --git a/Beam.sln b/Beam.sln index e482b22..b92fe2d 100644 --- a/Beam.sln +++ b/Beam.sln @@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Tests", "Beam.Tests\Be EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Puppeteer", "Beam.Puppeteer\Beam.Puppeteer.csproj", "{1A967563-D643-401D-A031-68DD43FACE8D}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "aeqw89.Beam", "aeqw89.Beam\aeqw89.Beam.csproj", "{583236EC-0CE8-4FA3-ADA3-860405E1F16F}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -45,6 +47,10 @@ Global {1A967563-D643-401D-A031-68DD43FACE8D}.Debug|Any CPU.Build.0 = Debug|Any CPU {1A967563-D643-401D-A031-68DD43FACE8D}.Release|Any CPU.ActiveCfg = Release|Any CPU {1A967563-D643-401D-A031-68DD43FACE8D}.Release|Any CPU.Build.0 = Release|Any CPU + {583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Beam/Beam.csproj b/Beam/Beam.csproj index f5421bc..cb6f8cd 100644 --- a/Beam/Beam.csproj +++ b/Beam/Beam.csproj @@ -1,29 +1,18 @@ - - - - net9.0 - enable - enable - - True - Beam - aeqw89 - qwsdcvghyu - A library for downloading internet resources - https://github.com/qwsdcvghyu89/Beam - https://github.com/qwsdcvghyu89/Beam - aeqw89.Beam - - - - - - - - - - + + + + net9.0 + enable + enable + True + + + + + + + + - - + \ No newline at end of file diff --git a/Beam/DownloadContext.cs b/Beam/DownloadContext.cs index ba4789d..56e4fce 100644 --- a/Beam/DownloadContext.cs +++ b/Beam/DownloadContext.cs @@ -8,21 +8,22 @@ using System.Threading.Tasks; using Microsoft.Extensions.Logging; namespace Beam { - public delegate T HtmlTransformer(HtmlDocument doc); + //public delegate T HtmlTransformer(HtmlDocument doc); public delegate Task AsyncTransformer(T elem); - public delegate Task AsyncHtmlTransformer(HtmlDocument doc); - public delegate Task AsyncBinaryTransformer(byte[] bin); + //public delegate Task AsyncHtmlTransformer(HtmlDocument doc); + //public delegate Task AsyncBinaryTransformer(byte[] bin); - public class DownloadContext : IDisposable { + public class DownloadContext : IDisposable { private bool disposedValue; - public DownloadContextBuilder CreateBuilder() - => DownloadContextBuilder.FromContext(this); + public DownloadContextBuilder CreateBuilder() + => DownloadContextBuilder.FromContext(this); + public HttpClient Client { get; } public HtmlWeb Web { get; } public IProgress? DownloadReporter { get; set; } public IProgress? RetryReporter { get; set; } - public AsyncDownloadFailurePredicate?[]? AsyncFailurePredicates { get; } + public AsyncDownloadFailurePredicate?[]? AsyncFailurePredicates { get; } public TimeSpan TimeOut { get; set; } public IEnumerable Links { get; } public CancellationToken CancellationToken { get; } @@ -30,17 +31,19 @@ namespace Beam { public ILogger? DownloadLogger { get; set; } public DownloadContext(HtmlWeb web, + HttpClient client, IEnumerable links, CancellationToken cancellationToken = default, IProgress? downloadReporter = null, IProgress? retryReporter = null, - AsyncDownloadFailurePredicate?[]? asyncFailurePredicates = null, + AsyncDownloadFailurePredicate?[]? asyncFailurePredicates = null, TimeSpan? timeOut = null, ILogger? downloadLogger = null) { ArgumentNullException.ThrowIfNull(web, nameof(web)); ArgumentNullException.ThrowIfNull(links, nameof(links)); Web = web; + Client = client; Links = links; CancellationToken = cancellationToken; DownloadReporter = downloadReporter; diff --git a/Beam/DownloadContextBuilder.cs b/Beam/DownloadContextBuilder.cs index ac12fc4..d25e764 100644 --- a/Beam/DownloadContextBuilder.cs +++ b/Beam/DownloadContextBuilder.cs @@ -8,75 +8,83 @@ using System.Threading.Tasks; namespace Beam { - public class DownloadContextBuilder { + public class DownloadContextBuilder { private HtmlWeb _web; + private HttpClient _client; private IProgress? _downloadReporter; private IProgress? _retryReporter; - private AsyncDownloadFailurePredicate?[]? _asyncFailurePredicates; + private AsyncDownloadFailurePredicate?[] _asyncFailurePredicates = []; private TimeSpan _timeOut; private IEnumerable _links; private CancellationToken _cancellationToken; private DocumentCache _cache; private ILogger? _downloadLogger; - public DownloadContextBuilder(HtmlWeb? web = null) { + public DownloadContextBuilder(HttpClient? client = null, HtmlWeb? web = null) { // You can initialize defaults here if needed, e.g.: // _timeOut = TimeSpan.FromSeconds(30); // _cancellationToken = CancellationToken.None; + _client = client ?? new(); _web = web ?? new(); _links = []; } - public DownloadContextBuilder WithWeb(HtmlWeb web) { + public DownloadContextBuilder WithWeb(HtmlWeb web) { _web = web; return this; } - public DownloadContextBuilder WithDownloadReporter(IProgress downloadReporter) { + public DownloadContextBuilder WithClient(HttpClient client) { + _client = client; + return this; + } + + public DownloadContextBuilder WithDownloadReporter(IProgress downloadReporter) { _downloadReporter = downloadReporter; return this; } - public DownloadContextBuilder WithRetryReporter(IProgress retryReporter) { + public DownloadContextBuilder WithRetryReporter(IProgress retryReporter) { _retryReporter = retryReporter; return this; } - public DownloadContextBuilder WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate[] predicates) { + public DownloadContextBuilder WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate[] predicates) { _asyncFailurePredicates = predicates; return this; } - public DownloadContextBuilder WithTimeOut(TimeSpan timeOut) { + public DownloadContextBuilder WithTimeOut(TimeSpan timeOut) { _timeOut = timeOut; return this; } - public DownloadContextBuilder WithLinks(IEnumerable links) { + public DownloadContextBuilder WithLinks(IEnumerable links) { _links = links; return this; } - public DownloadContextBuilder WithCancellationToken(CancellationToken cancellationToken) { + public DownloadContextBuilder WithCancellationToken(CancellationToken cancellationToken) { _cancellationToken = cancellationToken; return this; } - public DownloadContextBuilder WithCache(DocumentCache cache) { + public DownloadContextBuilder WithCache(DocumentCache cache) { _cache = cache; return this; } - public DownloadContextBuilder WithDownloadLogger(ILogger downloadLogger) { + public DownloadContextBuilder WithDownloadLogger(ILogger downloadLogger) { _downloadLogger = downloadLogger; return this; } - public DownloadContext Build() { + public DownloadContext Build() { // Construct the DownloadContext using the collected values. - var context = new DownloadContext( + var context = new DownloadContext( web: _web, + client: _client, links: _links, cancellationToken: _cancellationToken, downloadReporter: _downloadReporter, @@ -95,15 +103,15 @@ namespace Beam { return context; } - public static DownloadContextBuilder FromContext(DownloadContext existing) { + public static DownloadContextBuilder FromContext(DownloadContext existing) { if (existing == null) throw new ArgumentNullException(nameof(existing)); - return new DownloadContextBuilder(existing.Web) + return new DownloadContextBuilder(existing.Client, existing.Web) .WithLinks(existing.Links) .WithCancellationToken(existing.CancellationToken) .WithDownloadReporter(existing.DownloadReporter!) .WithRetryReporter(existing.RetryReporter!) - .WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty>()) + .WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty>()) .WithTimeOut(existing.TimeOut) .WithDownloadLogger(existing.DownloadLogger!) .WithCache(existing.Cache); diff --git a/Beam/IAsyncEnumeratorExtensions.cs b/Beam/IAsyncEnumeratorExtensions.cs index 4446557..bed95ce 100644 --- a/Beam/IAsyncEnumeratorExtensions.cs +++ b/Beam/IAsyncEnumeratorExtensions.cs @@ -19,5 +19,20 @@ await fragmented.DisposeAsync().ConfigureAwait(false); } } + + public static async IAsyncEnumerator> WrapOrdered(this IAsyncEnumerator enumerator) { + if (enumerator is null) + throw new ArgumentNullException(); + try { + int index = 0; + while (await enumerator.MoveNextAsync().ConfigureAwait(false)) + if (enumerator.Current is null) + yield break; + else + yield return new Ordered(enumerator.Current, index); + } finally { + await enumerator.DisposeAsync().ConfigureAwait(false); + } + } } } diff --git a/Beam/SequentialDownloader.cs b/Beam/SequentialDownloader.cs index b0d4896..d2a9171 100644 --- a/Beam/SequentialDownloader.cs +++ b/Beam/SequentialDownloader.cs @@ -2,24 +2,24 @@ using Microsoft.Extensions.Logging; namespace Beam { - public class SequentialDownloader : IAsyncEnumerator { - public TOutput Current { get; protected set; } - public DownloadContext Context { get; } + public class SequentialDownloader : IAsyncEnumerator { + public OutType Current { get; protected set; } + public DownloadContext Context { get; } public ILogger? Logger { get; set; } public int LastOrder { get; set; } = 0; protected IEnumerator LinksEnumerator; - public Func> GetUnitDownloader { get; set; } + public Func> GetUnitDownloader { get; set; } - public SequentialDownloader(DownloadContext context, Func, IUnitDownloader> getUnitDownloader, ILogger? logger = null) { + public SequentialDownloader(DownloadContext context, Func, IUnitDownloader> getUnitDownloader, ILogger? logger = null) { Context = context; Logger = logger; LinksEnumerator = Context.Links.GetEnumerator(); LinksEnumerator.Reset(); if (!LinksEnumerator.MoveNext()) throw new ArgumentOutOfRangeException(S.M.LinksCannotBeEmpty); - Current = default(TOutput); + Current = default(OutType); GetUnitDownloader = () => getUnitDownloader(Context); } @@ -51,11 +51,11 @@ namespace Beam { tryProgress: Context.RetryReporter); if (!result) { - Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name); + Logger?.LogWarning("Failed to download Unit<{}>", typeof(OutType).Name); return false; // unit download failed } if (downloadedT is null) { - Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name); + Logger?.LogWarning("Failed to download Unit<{}>", typeof(OutType).Name); return false; // unit download failed } diff --git a/Beam/SequentialFragmentDownloader.cs b/Beam/SequentialFragmentDownloader.cs index c2fdd28..46dd51b 100644 --- a/Beam/SequentialFragmentDownloader.cs +++ b/Beam/SequentialFragmentDownloader.cs @@ -2,10 +2,10 @@ using System.Collections.Concurrent; namespace Beam { - public class SequentialFragmentDownloader : SequentialDownloader>> { + public class SequentialFragmentDownloader : SequentialDownloader>> { public SequentialFragmentDownloader( - DownloadContext context, - Func, IUnitDownloader>>> getUnitDownloader, + DownloadContext context, + Func, IUnitDownloader>>> getUnitDownloader, ILogger? logger = null) : base(context, getUnitDownloader, logger) {} } diff --git a/Beam/TypeExtensions.cs b/Beam/TypeExtensions.cs new file mode 100644 index 0000000..80998e3 --- /dev/null +++ b/Beam/TypeExtensions.cs @@ -0,0 +1,41 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + /// + /// Source-style type name (e.g. Foo<T>, Dictionary<int,string>[,], Span<byte>*, MyClass.Nested). + /// + public static class TypeExtensions { + public static string AsUniqueName(this Type type) { + if (type.IsGenericParameter) + return type.Name; + + if (type.IsArray) { + var commas = new string(',', type.GetArrayRank() - 1); + return $"{type.GetElementType().AsUniqueName()}[{commas}]"; + } + + if (type.IsPointer) + return $"{type.GetElementType().AsUniqueName()}*"; + + if (type.IsByRef) + return $"{type.GetElementType().AsUniqueName()}&"; + + if (type.IsNested && !type.IsGenericParameter) + return $"{type.DeclaringType!.AsUniqueName()}.{BuildSimpleName(type)}"; + + return BuildSimpleName(type); + + static string BuildSimpleName(Type t) { + if (!t.IsGenericType) return t.Name; + + var baseName = t.Name[..t.Name.IndexOf('`')]; + var args = t.GetGenericArguments().Select(AsUniqueName); + return $"{baseName}<{string.Join(", ", args)}>"; + } + } + } +} diff --git a/Beam/UnitDownloader.cs b/Beam/UnitDownloader.cs index e1035d2..0a200e6 100644 --- a/Beam/UnitDownloader.cs +++ b/Beam/UnitDownloader.cs @@ -10,9 +10,9 @@ namespace Beam { /// /// /// - public class UnitDownloader(HtmlWeb web, AsyncHtmlTransformer transformer, AsyncDownloadFailurePredicate?[]? failurePredicate = null) : IUnitDownloader { + public class UnitDownloader(HtmlWeb web, AsyncTransformer transformer, AsyncDownloadFailurePredicate?[]? failurePredicate = null) : IUnitDownloader { public HtmlWeb Web { get; } = web; - public virtual AsyncHtmlTransformer Transformer { get; } = transformer; + public virtual AsyncTransformer Transformer { get; } = transformer; public virtual AsyncDownloadFailurePredicate?[]? FailurePredicates { get; } = failurePredicate; public int LinksPerDownload { get; } = 1; diff --git a/Beam/UnitDownloaderBinary.cs b/Beam/UnitDownloaderBinary.cs index 17434b3..375aecc 100644 --- a/Beam/UnitDownloaderBinary.cs +++ b/Beam/UnitDownloaderBinary.cs @@ -10,12 +10,12 @@ namespace Beam { /// public class UnitDownloaderBinary( HttpClient client, - AsyncBinaryTransformer transformer, - AsyncDownloadFailurePredicate?[]? failurePredicates = null) + AsyncTransformer transformer, + AsyncDownloadFailurePredicate?[]? failurePredicates = null) : IUnitDownloader { public HttpClient Client { get; } = client; - public virtual AsyncBinaryTransformer Transformer { get; } = transformer; - public virtual AsyncDownloadFailurePredicate?[]? FailurePredicates { get; } = failurePredicates; + public virtual AsyncTransformer Transformer { get; } = transformer; + public virtual AsyncDownloadFailurePredicate?[]? FailurePredicates { get; } = failurePredicates; public int LinksPerDownload { get; } = 1; @@ -24,9 +24,9 @@ namespace Beam { if (FailurePredicates is null) return false; var failed = false; - await Parallel.ForEachAsync(FailurePredicates, async (pred, _) => { + await Parallel.ForEachAsync(FailurePredicates, async (pred, ct) => { if (failed || pred is null) return; - if (await pred(response)) failed = true; + if (await pred(await response.Content.ReadAsByteArrayAsync(ct))) failed = true; }); return failed; } diff --git a/Beam/UnitFragmentDownloader.cs b/Beam/UnitFragmentDownloader.cs index 466768d..4addda4 100644 --- a/Beam/UnitFragmentDownloader.cs +++ b/Beam/UnitFragmentDownloader.cs @@ -10,7 +10,7 @@ using System.Threading.Tasks; namespace Beam { public class UnitFragmentDownloader : IUnitDownloader>> { public UnitFragmentDownloader(HtmlWeb web, - AsyncHtmlTransformer transformer, + AsyncTransformer transformer, AsyncDownloadFailurePredicate?[]? failurePredicate = null, int fragmentSize = 4, ILogger? logger = null, @@ -24,7 +24,7 @@ namespace Beam { } public HtmlWeb Web { get; } - public AsyncHtmlTransformer Transformer { get; } + public AsyncTransformer Transformer { get; } public AsyncDownloadFailurePredicate?[]? FailurePredicate { get; } public int LinksPerDownload { get; set; } public ILogger? Logger { get; set; } diff --git a/Beam/UnitFragmentDownloaderBinary.cs b/Beam/UnitFragmentDownloaderBinary.cs new file mode 100644 index 0000000..c3ad884 --- /dev/null +++ b/Beam/UnitFragmentDownloaderBinary.cs @@ -0,0 +1,75 @@ +using System; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; + +namespace Beam { + /// + /// Groups multiple binary downloads into a single Fragment, applying + /// failure detection and exponential-back-off retries for each link. + /// + public class UnitFragmentDownloaderBinary + : IUnitDownloader>> { + public UnitFragmentDownloaderBinary(HttpClient client, + AsyncTransformer transformer, + AsyncDownloadFailurePredicate?[]? failurePredicate = null, + int fragmentSize = 4, + ILogger? logger = null, + IUnitDownloader? internalDownloader = null) { + Client = client; + Transformer = transformer; + FailurePredicate = failurePredicate; + UnitDownloader = internalDownloader + ?? new UnitDownloaderBinary(Client, Transformer, FailurePredicate); + LinksPerDownload = fragmentSize; + Logger = logger; + } + + public HttpClient Client { get; } + public AsyncTransformer Transformer { get; } + public AsyncDownloadFailurePredicate?[]? FailurePredicate { get; } + public int LinksPerDownload { get; set; } + public ILogger? Logger { get; set; } + + private readonly IUnitDownloader UnitDownloader; + + async Task<(bool, Fragment>?)> IUnitDownloader>>.TryDownload( + Ordered[] link, + CancellationToken ct, + int maximumRetryCount, + IProgress? tryProgress) { + var fragment = new Fragment>(link.Length); + if (!Fragment>.TryAcquireUpdater(fragment, out var updater)) + throw new S.AssertionException(S.M.NewFragmentShouldBeFree); + + var isFailure = false; + + await Parallel.ForEachAsync(link, async (orderedLink, pct) => { + pct.ThrowIfCancellationRequested(); + ct.ThrowIfCancellationRequested(); + + var (success, downloaded) = + await UnitDownloader.TryDownload([orderedLink], + ct, + maximumRetryCount, + tryProgress); + + if (!success || downloaded is null) { + Interlocked.Exchange(ref isFailure, true); + Logger?.LogError("Failed to retrieve {Link} order={Order}", + orderedLink.Data, orderedLink.Order); + return; + } + + updater(new Ordered(downloaded, orderedLink.Order)); + }); + + if (!isFailure) + Fragment>.SetComplete(fragment, true); + + Fragment>.TryReleaseUpdater(fragment, updater); + return (!isFailure, fragment); + } + } +} diff --git a/aeqw89.Beam/Class1.cs b/aeqw89.Beam/Class1.cs new file mode 100644 index 0000000..4302ca1 --- /dev/null +++ b/aeqw89.Beam/Class1.cs @@ -0,0 +1,5 @@ +namespace aeqw89.Beam { + public class Class1 { + + } +} diff --git a/aeqw89.Beam/aeqw89.Beam.csproj b/aeqw89.Beam/aeqw89.Beam.csproj new file mode 100644 index 0000000..2b89c7c --- /dev/null +++ b/aeqw89.Beam/aeqw89.Beam.csproj @@ -0,0 +1,121 @@ + + + + net9.0 + enable + enable + Beam + aeqw89 + qwsdcvghyu + 1.2.4 + A library for downloading internet resources + https://github.com/qwsdcvghyu89/Beam + https://github.com/qwsdcvghyu89/Beam + aeqw89.Beam + + + + all + + + all + + + all + + + all + + + all + + + + true + + + + + lib\$(TargetFramework)\ + true + + + lib\$(TargetFramework)\ + true + + + lib\$(TargetFramework)\ + true + + + lib\$(TargetFramework)\ + true + + + lib\$(TargetFramework)\ + true + + + lib\$(TargetFramework)\ + true + + + + true + + + + + + true + + + + + + true + + + + + + true + + + + + + true + + + + + + true + + + + + + true + + + + + + true + + + + + + true + + + + + + true + + + \ No newline at end of file