feat: update transformers to use ByteDocument type

Refactor the transformers in the downloader classes to use
ByteDocument instead of byte arrays. This change improves type
safety and clarity in handling document content during
downloads, ensuring that the transformations are more
consistent and maintainable.
This commit is contained in:
qwsdcvghyu89
2025-06-24 23:45:07 +03:00
parent 056e426572
commit 2317db9d3f
7 changed files with 42 additions and 20 deletions
+7 -4
View File
@@ -42,6 +42,9 @@ namespace Beam.Temporary.Cli {
public interface IAlternativeTransformStage { public interface IAlternativeTransformStage {
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer); IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
IContextStage WithTransformer(Func<RawType, OutType> transformer) {
return WithTransformer(rt => Task.FromResult(transformer(rt)));
}
} }
public interface IContextStage { public interface IContextStage {
@@ -192,8 +195,8 @@ namespace Beam.Temporary.Cli {
_parallelism, _parallelism,
context.DownloadLogger), context.DownloadLogger),
// ──────────────── fragmented binary ──────────────── // ──────────────── fragmented binary ────────────────
(true, AsyncTransformer<byte[], OutType> asyncBinaryTransformer, (true, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<byte[]>[] responseFailurePredicates) AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new UnitFragmentDownloaderBinary<OutType>( => new UnitFragmentDownloaderBinary<OutType>(
context.Client, context.Client,
asyncBinaryTransformer, asyncBinaryTransformer,
@@ -208,8 +211,8 @@ namespace Beam.Temporary.Cli {
asyncHtmlTransformer, asyncHtmlTransformer,
documentFailurePredicates), documentFailurePredicates),
// ──────────────── single binary ──────────────── // ──────────────── single binary ────────────────
(false, AsyncTransformer<byte[], OutType> asyncBinaryTransformer, (false, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<byte[]>[] responseFailurePredicates) AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new UnitDownloaderBinary<OutType>( => new UnitDownloaderBinary<OutType>(
context.Client, context.Client,
asyncBinaryTransformer, asyncBinaryTransformer,
+1 -1
View File
@@ -1,7 +1,7 @@
using System.Text; using System.Text;
namespace Beam { namespace Beam {
internal class ByteDocument(string filename, byte[] content, Encoding? encoding = null) : Document(filename, encoding) { public class ByteDocument(string filename, byte[] content, Encoding? encoding = null) : Document(filename, encoding) {
public byte[] Content { get; set; } = content; public byte[] Content { get; set; } = content;
public override byte[] ToBytes() { public override byte[] ToBytes() {
+12
View File
@@ -16,7 +16,13 @@ namespace Beam {
Context = context; Context = context;
Logger = logger; Logger = logger;
LinksEnumerator = Context.Links.GetEnumerator(); LinksEnumerator = Context.Links.GetEnumerator();
try {
LinksEnumerator.Reset(); LinksEnumerator.Reset();
} catch (NotSupportedException) {
Logger?.LogWarning("Enumerator of type {} does not support resets. This may cause buggy behavior", LinksEnumerator.GetType());
}
if (!LinksEnumerator.MoveNext()) if (!LinksEnumerator.MoveNext())
throw new ArgumentOutOfRangeException(S.M.LinksCannotBeEmpty); throw new ArgumentOutOfRangeException(S.M.LinksCannotBeEmpty);
Current = default(OutType); Current = default(OutType);
@@ -29,14 +35,20 @@ namespace Beam {
} }
public async ValueTask<bool> MoveNextAsync() { public async ValueTask<bool> MoveNextAsync() {
if (!LinksEnumerator.Current.HasValue)
return false;
//Logger?.LogInformation("MoveNextAsync()");
var unit = GetUnitDownloader(); // safe to instantiate per request. var unit = GetUnitDownloader(); // safe to instantiate per request.
var idealLinkCount = unit.LinksPerDownload; var idealLinkCount = unit.LinksPerDownload;
List<Ordered<string>> links = []; List<Ordered<string>> links = [];
//Logger?.LogInformation("MoveNextAsync() \n\t -> Links.Current = {} ", LinksEnumerator.Current.Link.AbsoluteUri);
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++)); links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
while (LinksEnumerator.MoveNext() && LinksEnumerator.Current != SourceLink.InvalidLink && links.Count < idealLinkCount) while (LinksEnumerator.MoveNext() && LinksEnumerator.Current != SourceLink.InvalidLink && links.Count < idealLinkCount)
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++)); links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
//Logger?.LogInformation("MoveNextAsync() \n\t -> links.Count = {} ", links.Count);
if (links.Count == 0) { if (links.Count == 0) {
Logger?.LogInformation("Out of links!"); Logger?.LogInformation("Out of links!");
return false; return false;
+6
View File
@@ -14,10 +14,16 @@ namespace Beam {
public static SourceLink InvalidLink { get; } = new("invalid://link"); public static SourceLink InvalidLink { get; } = new("invalid://link");
public static bool operator ==(SourceLink lhs, SourceLink rhs) { public static bool operator ==(SourceLink lhs, SourceLink rhs) {
if (lhs.HasValue && rhs.HasValue)
return lhs.Link == rhs.Link; return lhs.Link == rhs.Link;
else
return lhs.Link_ == rhs.Link_;
} }
public static bool operator !=(SourceLink lhs, SourceLink rhs) { public static bool operator !=(SourceLink lhs, SourceLink rhs) {
if (lhs.HasValue && rhs.HasValue)
return lhs.Link != rhs.Link; return lhs.Link != rhs.Link;
else
return lhs.Link_ != rhs.Link_;
} }
public override bool Equals(object? obj) { public override bool Equals(object? obj) {
+7 -6
View File
@@ -10,12 +10,12 @@ namespace Beam {
/// </summary> /// </summary>
public class UnitDownloaderBinary<T>( public class UnitDownloaderBinary<T>(
HttpClient client, HttpClient client,
AsyncTransformer<byte[], T> transformer, AsyncTransformer<ByteDocument, T> transformer,
AsyncDownloadFailurePredicate<byte[]>?[]? failurePredicates = null) AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicates = null)
: IUnitDownloader<T> { : IUnitDownloader<T> {
public HttpClient Client { get; } = client; public HttpClient Client { get; } = client;
public virtual AsyncTransformer<byte[], T> Transformer { get; } = transformer; public virtual AsyncTransformer<ByteDocument, T> Transformer { get; } = transformer;
public virtual AsyncDownloadFailurePredicate<byte[]>?[]? FailurePredicates { get; } = failurePredicates; public virtual AsyncDownloadFailurePredicate<ByteDocument>?[]? FailurePredicates { get; } = failurePredicates;
public int LinksPerDownload { get; } = 1; public int LinksPerDownload { get; } = 1;
@@ -26,7 +26,8 @@ namespace Beam {
var failed = false; var failed = false;
await Parallel.ForEachAsync(FailurePredicates, async (pred, ct) => { await Parallel.ForEachAsync(FailurePredicates, async (pred, ct) => {
if (failed || pred is null) return; if (failed || pred is null) return;
if (await pred(await response.Content.ReadAsByteArrayAsync(ct))) failed = true; if (await pred(new ByteDocument(response.RequestMessage?.RequestUri?.AbsolutePath ?? "", await response.Content.ReadAsByteArrayAsync(ct))))
failed = true;
}); });
return failed; return failed;
} }
@@ -40,7 +41,7 @@ namespace Beam {
if (await IsFailure(response)) return (false, default); if (await IsFailure(response)) return (false, default);
var bytes = await response.Content.ReadAsByteArrayAsync(ct); var bytes = await response.Content.ReadAsByteArrayAsync(ct);
return (true, await Transformer(bytes)); return (true, await Transformer(new ByteDocument(link, bytes)));
} catch { } catch {
return (false, default); return (false, default);
} }
+4 -4
View File
@@ -12,8 +12,8 @@ namespace Beam {
public class UnitFragmentDownloaderBinary<T> public class UnitFragmentDownloaderBinary<T>
: IUnitDownloader<Fragment<Ordered<T>>> { : IUnitDownloader<Fragment<Ordered<T>>> {
public UnitFragmentDownloaderBinary(HttpClient client, public UnitFragmentDownloaderBinary(HttpClient client,
AsyncTransformer<byte[], T> transformer, AsyncTransformer<ByteDocument, T> transformer,
AsyncDownloadFailurePredicate<byte[]>?[]? failurePredicate = null, AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicate = null,
int fragmentSize = 4, int fragmentSize = 4,
ILogger? logger = null, ILogger? logger = null,
IUnitDownloader<T>? internalDownloader = null) { IUnitDownloader<T>? internalDownloader = null) {
@@ -27,8 +27,8 @@ namespace Beam {
} }
public HttpClient Client { get; } public HttpClient Client { get; }
public AsyncTransformer<byte[], T> Transformer { get; } public AsyncTransformer<ByteDocument, T> Transformer { get; }
public AsyncDownloadFailurePredicate<byte[]>?[]? FailurePredicate { get; } public AsyncDownloadFailurePredicate<ByteDocument>?[]? FailurePredicate { get; }
public int LinksPerDownload { get; set; } public int LinksPerDownload { get; set; }
public ILogger? Logger { get; set; } public ILogger? Logger { get; set; }
+1 -1
View File
@@ -7,7 +7,7 @@
<Title>Beam</Title> <Title>Beam</Title>
<Authors>aeqw89</Authors> <Authors>aeqw89</Authors>
<Company>qwsdcvghyu</Company> <Company>qwsdcvghyu</Company>
<Version>1.2.4</Version> <Version>1.2.10</Version>
<Description>A library for downloading internet resources</Description> <Description>A library for downloading internet resources</Description>
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl> <PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl> <RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>