From 2317db9d3fe316859edb1a59a95ecc1f0cd7e5b1 Mon Sep 17 00:00:00 2001 From: qwsdcvghyu89 <61093706+qwsdcvghyu89@users.noreply.github.com> Date: Tue, 24 Jun 2025 23:45:07 +0300 Subject: [PATCH] feat: update transformers to use ByteDocument type Refactor the transformers in the downloader classes to use ByteDocument instead of byte arrays. This change improves type safety and clarity in handling document content during downloads, ensuring that the transformations are more consistent and maintainable. --- Beam.Temporary.Cli/DownloadBuilder.cs | 11 +++++++---- Beam/ByteDocument.cs | 2 +- Beam/SequentialDownloader.cs | 16 ++++++++++++++-- Beam/SourceLink.cs | 10 ++++++++-- Beam/UnitDownloaderBinary.cs | 13 +++++++------ Beam/UnitFragmentDownloaderBinary.cs | 8 ++++---- aeqw89.Beam/aeqw89.Beam.csproj | 2 +- 7 files changed, 42 insertions(+), 20 deletions(-) diff --git a/Beam.Temporary.Cli/DownloadBuilder.cs b/Beam.Temporary.Cli/DownloadBuilder.cs index 87cb95e..5b30139 100644 --- a/Beam.Temporary.Cli/DownloadBuilder.cs +++ b/Beam.Temporary.Cli/DownloadBuilder.cs @@ -42,6 +42,9 @@ namespace Beam.Temporary.Cli { public interface IAlternativeTransformStage { IContextStage WithTransformer(AsyncTransformer transformer); + IContextStage WithTransformer(Func transformer) { + return WithTransformer(rt => Task.FromResult(transformer(rt))); + } } public interface IContextStage { @@ -192,8 +195,8 @@ namespace Beam.Temporary.Cli { _parallelism, context.DownloadLogger), // ──────────────── fragmented binary ──────────────── - (true, AsyncTransformer asyncBinaryTransformer, - AsyncDownloadFailurePredicate[] responseFailurePredicates) + (true, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) => new UnitFragmentDownloaderBinary( context.Client, asyncBinaryTransformer, @@ -208,8 +211,8 @@ namespace Beam.Temporary.Cli { asyncHtmlTransformer, documentFailurePredicates), // ──────────────── single binary ──────────────── - (false, AsyncTransformer asyncBinaryTransformer, - AsyncDownloadFailurePredicate[] responseFailurePredicates) + (false, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) => new UnitDownloaderBinary( context.Client, asyncBinaryTransformer, diff --git a/Beam/ByteDocument.cs b/Beam/ByteDocument.cs index 7a6cdfe..fa2d047 100644 --- a/Beam/ByteDocument.cs +++ b/Beam/ByteDocument.cs @@ -1,7 +1,7 @@ using System.Text; namespace Beam { - internal class ByteDocument(string filename, byte[] content, Encoding? encoding = null) : Document(filename, encoding) { + public class ByteDocument(string filename, byte[] content, Encoding? encoding = null) : Document(filename, encoding) { public byte[] Content { get; set; } = content; public override byte[] ToBytes() { diff --git a/Beam/SequentialDownloader.cs b/Beam/SequentialDownloader.cs index d2a9171..f4b5d1b 100644 --- a/Beam/SequentialDownloader.cs +++ b/Beam/SequentialDownloader.cs @@ -16,7 +16,13 @@ namespace Beam { Context = context; Logger = logger; LinksEnumerator = Context.Links.GetEnumerator(); - LinksEnumerator.Reset(); + + try { + LinksEnumerator.Reset(); + } catch (NotSupportedException) { + Logger?.LogWarning("Enumerator of type {} does not support resets. This may cause buggy behavior", LinksEnumerator.GetType()); + } + if (!LinksEnumerator.MoveNext()) throw new ArgumentOutOfRangeException(S.M.LinksCannotBeEmpty); Current = default(OutType); @@ -29,14 +35,20 @@ namespace Beam { } public async ValueTask MoveNextAsync() { + if (!LinksEnumerator.Current.HasValue) + return false; + + //Logger?.LogInformation("MoveNextAsync()"); var unit = GetUnitDownloader(); // safe to instantiate per request. var idealLinkCount = unit.LinksPerDownload; List> links = []; - + + //Logger?.LogInformation("MoveNextAsync() \n\t -> Links.Current = {} ", LinksEnumerator.Current.Link.AbsoluteUri); links.Add(new Ordered(LinksEnumerator.Current.Link.ToString(), LastOrder++)); while (LinksEnumerator.MoveNext() && LinksEnumerator.Current != SourceLink.InvalidLink && links.Count < idealLinkCount) links.Add(new Ordered(LinksEnumerator.Current.Link.ToString(), LastOrder++)); + //Logger?.LogInformation("MoveNextAsync() \n\t -> links.Count = {} ", links.Count); if (links.Count == 0) { Logger?.LogInformation("Out of links!"); return false; diff --git a/Beam/SourceLink.cs b/Beam/SourceLink.cs index 721b442..c1a5e9d 100644 --- a/Beam/SourceLink.cs +++ b/Beam/SourceLink.cs @@ -14,10 +14,16 @@ namespace Beam { public static SourceLink InvalidLink { get; } = new("invalid://link"); public static bool operator ==(SourceLink lhs, SourceLink rhs) { - return lhs.Link == rhs.Link; + if (lhs.HasValue && rhs.HasValue) + return lhs.Link == rhs.Link; + else + return lhs.Link_ == rhs.Link_; } public static bool operator !=(SourceLink lhs, SourceLink rhs) { - return lhs.Link != rhs.Link; + if (lhs.HasValue && rhs.HasValue) + return lhs.Link != rhs.Link; + else + return lhs.Link_ != rhs.Link_; } public override bool Equals(object? obj) { diff --git a/Beam/UnitDownloaderBinary.cs b/Beam/UnitDownloaderBinary.cs index 375aecc..8d45471 100644 --- a/Beam/UnitDownloaderBinary.cs +++ b/Beam/UnitDownloaderBinary.cs @@ -10,12 +10,12 @@ namespace Beam { /// public class UnitDownloaderBinary( HttpClient client, - AsyncTransformer transformer, - AsyncDownloadFailurePredicate?[]? failurePredicates = null) + AsyncTransformer transformer, + AsyncDownloadFailurePredicate?[]? failurePredicates = null) : IUnitDownloader { public HttpClient Client { get; } = client; - public virtual AsyncTransformer Transformer { get; } = transformer; - public virtual AsyncDownloadFailurePredicate?[]? FailurePredicates { get; } = failurePredicates; + public virtual AsyncTransformer Transformer { get; } = transformer; + public virtual AsyncDownloadFailurePredicate?[]? FailurePredicates { get; } = failurePredicates; public int LinksPerDownload { get; } = 1; @@ -26,7 +26,8 @@ namespace Beam { var failed = false; await Parallel.ForEachAsync(FailurePredicates, async (pred, ct) => { if (failed || pred is null) return; - if (await pred(await response.Content.ReadAsByteArrayAsync(ct))) failed = true; + if (await pred(new ByteDocument(response.RequestMessage?.RequestUri?.AbsolutePath ?? "", await response.Content.ReadAsByteArrayAsync(ct)))) + failed = true; }); return failed; } @@ -40,7 +41,7 @@ namespace Beam { if (await IsFailure(response)) return (false, default); var bytes = await response.Content.ReadAsByteArrayAsync(ct); - return (true, await Transformer(bytes)); + return (true, await Transformer(new ByteDocument(link, bytes))); } catch { return (false, default); } diff --git a/Beam/UnitFragmentDownloaderBinary.cs b/Beam/UnitFragmentDownloaderBinary.cs index c3ad884..7c5a342 100644 --- a/Beam/UnitFragmentDownloaderBinary.cs +++ b/Beam/UnitFragmentDownloaderBinary.cs @@ -12,8 +12,8 @@ namespace Beam { public class UnitFragmentDownloaderBinary : IUnitDownloader>> { public UnitFragmentDownloaderBinary(HttpClient client, - AsyncTransformer transformer, - AsyncDownloadFailurePredicate?[]? failurePredicate = null, + AsyncTransformer transformer, + AsyncDownloadFailurePredicate?[]? failurePredicate = null, int fragmentSize = 4, ILogger? logger = null, IUnitDownloader? internalDownloader = null) { @@ -27,8 +27,8 @@ namespace Beam { } public HttpClient Client { get; } - public AsyncTransformer Transformer { get; } - public AsyncDownloadFailurePredicate?[]? FailurePredicate { get; } + public AsyncTransformer Transformer { get; } + public AsyncDownloadFailurePredicate?[]? FailurePredicate { get; } public int LinksPerDownload { get; set; } public ILogger? Logger { get; set; } diff --git a/aeqw89.Beam/aeqw89.Beam.csproj b/aeqw89.Beam/aeqw89.Beam.csproj index 2b89c7c..69f6b5e 100644 --- a/aeqw89.Beam/aeqw89.Beam.csproj +++ b/aeqw89.Beam/aeqw89.Beam.csproj @@ -7,7 +7,7 @@ Beam aeqw89 qwsdcvghyu - 1.2.4 + 1.2.10 A library for downloading internet resources https://github.com/qwsdcvghyu89/Beam https://github.com/qwsdcvghyu89/Beam