Enhance async capabilities and refactor project structure

Updated project files for `Beam.Dynamic`, `Beam.Exports`, `Beam.Puppeteer`, `Beam.Temporary.Cli`, and `Beam` to include new XML headers, reorganize property groups, and add project references.

Modified `PuppetedUnitDownloader` to support additional parameters for async transformers. Changed return types in `CommonTransformers` to `AsyncTransformer` for asynchronous processing.

Significant refactoring in `DownloadBuilder`, `DownloadContext`, and `DownloadContextBuilder` to introduce generic parameters and improve context management. Updated `SequentialDownloader`, `SequentialFragmentDownloader`, and `UnitDownloader` to accommodate new async transformer types.

Introduced `TypeExtensions` for unique type name generation and added `UnitFragmentDownloaderBinary` for handling binary downloads. Updated solution file to include the new `aeqw89.Beam` project, ensuring proper references across the solution.

These changes enhance the asynchronous capabilities of the Beam library, improve type safety, and streamline the downloading process.
This commit is contained in:
qwsdcvghyu89
2025-06-23 20:30:09 +03:00
parent 482a46b568
commit 056e426572
23 changed files with 531 additions and 262 deletions
+30 -32
View File
@@ -1,32 +1,30 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Title>Beam.Temporary.Cli</Title>
<Authors>aeqw89</Authors>
<Company>qwsdcvghyu</Company>
<Description>A temporary CLI for Beam providing several useful mechanisms</Description>
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
<PackageId>aeqw89.Beam.Temporary.Cli</PackageId>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="aeqw89.Beam.Exports" Version="1.0.0" />
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0" />
<PackageReference Include="aeqw89.Beam" Version="1.0.0" />
<PackageReference Include="aeqw89.Beam.Dynamic" Version="1.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1" />
<PackageReference Include="OpenAI" Version="2.1.0" />
<PackageReference Include="Spectre.Console" Version="0.49.2-preview.0.70" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
</ItemGroup>
</Project>
<?xml version='1.0' encoding='utf-8'?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1" />
<PackageReference Include="OpenAI" Version="2.1.0" />
<PackageReference Include="Spectre.Console" Version="0.49.2-preview.0.70" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam\Beam.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
</ItemGroup>
</Project>
+7 -6
View File
@@ -1,5 +1,6 @@
using aeqw89.DataKeys;
using Beam.Dynamic;
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
@@ -8,16 +9,16 @@ using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
public static class CommonTransformers {
public static HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
return new ArticleData() {
public static AsyncTransformer<HtmlDocument, IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
return Task.FromResult<IDocumentMetaData>(new ArticleData() {
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
};
});
};
public static HtmlTransformer<IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
public static AsyncTransformer<HtmlDocument, IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
var resolved = binding?.Resolve(x);
var articleData = new ArticleData() {
Name = OnlineCleaner.Clean(resolved?.Title),
@@ -26,9 +27,9 @@ namespace Beam.Temporary.Cli {
meta.Add(IArchitecture.Default.ChapterKey, articleData);
if (metaData is not null)
meta.Add(IArchitecture.Default.BookKey, metaData);
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
return Task.FromResult<IDocument>(new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
MetaData = meta
};
});
};
}
}
+100 -31
View File
@@ -4,6 +4,7 @@ using Beam;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using HtmlAgilityPack;
namespace Beam.Temporary.Cli {
/// <summary>
@@ -11,7 +12,7 @@ namespace Beam.Temporary.Cli {
/// (source → link selection → transformer) and surfaces operational knobs as firstclass
/// methods instead of magic parameters.
/// </summary>
public static class DownloadBuilder<T> {
public static class DownloadBuilder<RawType, OutType> {
/* ──────────────────────────── Entry points ─────────────────────────── */
public static ILinkStage FromMeta(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
@@ -20,6 +21,9 @@ namespace Beam.Temporary.Cli {
public static ILinkStage FromText(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
Create(novelKey, data, SourceKind.Text);
public static IAlternativeLinkStage FromScratch()
=> new LinkStage(null!, null!, null!, new());
/* ────────────────────────────── Stages ─────────────────────────────── */
public interface ILinkStage {
@@ -28,16 +32,25 @@ namespace Beam.Temporary.Cli {
ILinkStage WithRange(Range range);
}
public interface ITransformStage {
IContextStage<U> WithTransformer<U>(Func<DataBindings, AsyncTransformer<T, U>> factory);
public interface IAlternativeLinkStage {
IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links);
}
public interface IContextStage<U> {
IContextStage<U> Configure(Action<DownloadContextBuilder<T>> configure);
IContextStage<U> WithParallelism(int degree);
IContextStage<U> WithTimeout(TimeSpan timeout);
IContextStage<U> WithRetryReporter(IProgress<RetryReport> reporter);
DownloadEnumerable<T> Build();
public interface ITransformStage {
IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory);
}
public interface IAlternativeTransformStage {
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
}
public interface IContextStage {
IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure);
IContextStage WithParallelism(int degree);
IContextStage WithTimeout(TimeSpan timeout);
IContextStage WithRetryReporter(IProgress<RetryReport> reporter);
DownloadEnumerable<OutType> Build();
IContextStage UseFragments();
}
/* ────────────────────────── Implementation ────────────────────────── */
@@ -46,7 +59,7 @@ namespace Beam.Temporary.Cli {
private static ILinkStage Create(DataKey<TextResource> novelKey, BeamDataDictionary data, SourceKind kind) {
var (source, initial) = Resolve(novelKey, data, kind);
var ctxBuilder = new DownloadContextBuilder<T>().WithLinks(Array.Empty<SourceLink>()); // placeholder, filled later.
var ctxBuilder = new DownloadContextBuilder<RawType>().WithLinks(Array.Empty<SourceLink>()); // placeholder, filled later.
return new LinkStage(source, initial, data, ctxBuilder);
}
@@ -71,11 +84,12 @@ namespace Beam.Temporary.Cli {
/* ──────────────────────────── Stage types ─────────────────────────── */
private sealed record LinkStage(
WebResource Source,
State Initial,
BeamDataDictionary Data,
DownloadContextBuilder<T> CtxBuilder) : ILinkStage {
DownloadContextBuilder<RawType> CtxBuilder) : ILinkStage, IAlternativeLinkStage {
private State? endState;
private bool linksFrozen = false;
@@ -97,6 +111,11 @@ namespace Beam.Temporary.Cli {
return new TransformStage(Source, Data, CtxBuilder);
}
public IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links) {
CtxBuilder.WithLinks(links);
return new TransformStage(Source, Data, CtxBuilder);
}
public ILinkStage WithRange(Range range) {
if (linksFrozen)
throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
@@ -114,24 +133,29 @@ namespace Beam.Temporary.Cli {
private sealed record TransformStage(
WebResource Source,
BeamDataDictionary Data,
DownloadContextBuilder<T> CtxBuilder) : ITransformStage {
public IContextStage WithTransformer<U>(Func<DataBindings, Func<object, T>> factory) {
DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage, IAlternativeTransformStage {
public IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory) {
var transformer = factory(Data.Bindings[Source.Bindings]);
return new ContextStage<U>(CtxBuilder, transformer);
return new ContextStage(CtxBuilder, transformer);
}
public IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
return new ContextStage(CtxBuilder, transformer);
}
}
private sealed class ContextStage<U> : IContextStage {
private readonly DownloadContextBuilder<T> _ctxBuilder;
private readonly Func<object, T> _transformer;
private sealed class ContextStage : IContextStage {
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
private readonly AsyncTransformer<RawType, OutType> _transformer;
private int _parallelism = 4;
private bool useFragments = false;
public ContextStage(DownloadContextBuilder<T> ctxBuilder, Func<object, T> transformer) {
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
}
public IContextStage Configure(Action<DownloadContextBuilder<T>> configure) {
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
configure(_ctxBuilder);
return this;
}
@@ -151,21 +175,66 @@ namespace Beam.Temporary.Cli {
return this;
}
public DownloadEnumerable<T> Build() {
var context = _ctxBuilder.Build();
SequentialFragmentDownloader<T> sequentialDownloader = new(
context,
ctx => new UnitFragmentDownloader<T>(
public IContextStage UseFragments() {
useFragments = true;
return this;
}
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
return (useFragments, _transformer, context.AsyncFailurePredicates) switch {
// ──────────────── fragmented HTML ────────────────
(true, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new UnitFragmentDownloader<OutType>(
context.Web,
_transformer,
context.AsyncFailurePredicates,
asyncHtmlTransformer,
documentFailurePredicates,
_parallelism,
context.DownloadLogger),
context.DownloadLogger);
var enumerable = new DownloadEnumerable<T>(
sequentialDownloader
.UnwrapFragmented());
sequentialDownloader.DisposeAsync().AsTask().Wait();
// ──────────────── fragmented binary ────────────────
(true, AsyncTransformer<byte[], OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<byte[]>[] responseFailurePredicates)
=> new UnitFragmentDownloaderBinary<OutType>(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── single HTML ────────────────
(false, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new UnitDownloader<OutType>(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single binary ────────────────
(false, AsyncTransformer<byte[], OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<byte[]>[] responseFailurePredicates)
=> new UnitDownloaderBinary<OutType>(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates),
_ => throw new Exception($"Unsupported transformer / failure-predicate combination. Missing pattern: {useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
};
}
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
var copyOfContext = context.CreateBuilder().Build();
return useFragments switch {
true => new SequentialFragmentDownloader<RawType, OutType>(
copyOfContext,
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
false => new SequentialDownloader<RawType, OutType>(
copyOfContext,
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered()
};
}
public DownloadEnumerable<OutType> Build() {
var context = _ctxBuilder.Build();
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
return enumerable;
}
}
+2 -31
View File
@@ -11,15 +11,6 @@ namespace Beam.Temporary.Cli {
public DataKey<IDocumentMetaData> ChapterKey { get; set; } = new("ma:chapter");
public DataKey<IDocumentMetaData> BookKey { get; set; } = new("ma:book");
public HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
return new ArticleData() {
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
};
};
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) {
var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
@@ -34,14 +25,7 @@ namespace Beam.Temporary.Cli {
var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData);
var binding = auxiliary.Bindings;
return new DownloadContext<IDocumentMetaData>(web, [link], downloadLogger: logger, transformer: (x) => {
return new ArticleData() {
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
};
});
return new DownloadContext<IDocumentMetaData>(web, new(), [link], downloadLogger: logger);
}
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) {
@@ -60,20 +44,7 @@ namespace Beam.Temporary.Cli {
template.Builder, new NumberedStateChanger(template.Factory.Behavior),
res.Resource.TemplateInitialData));
return new DownloadContext<IDocument>(web, sle,
transformer: (x) => {
var resolved = aggregator.Bindings.Resolve(x);
var articleData = new ArticleData() {
Name = OnlineCleaner.Clean(resolved.Title),
};
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
meta.Add(ChapterKey, articleData);
if (metaData is not null)
meta.Add(BookKey, metaData);
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved.Content)) {
MetaData = meta
};
},
return new DownloadContext<IDocument>(web, new(), sle,
retryReporter: new Progress<RetryReport>((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")),
//downloadReporter: new Progress<DownloadReport>((x) => Console.WriteLine($"Downloaded ({x})")),
asyncFailurePredicates: [
+4 -27
View File
@@ -53,34 +53,11 @@ namespace Beam.Temporary.Cli {
CancellationTokenSource cts = new();
HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
return new ArticleData() {
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
};
};
HtmlTransformer<IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
var resolved = binding?.Resolve(x);
var articleData = new ArticleData() {
Name = OnlineCleaner.Clean(resolved?.Title),
};
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
meta.Add(IArchitecture.Default.ChapterKey, articleData);
if (metaData is not null)
meta.Add(IArchitecture.Default.BookKey, metaData);
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
MetaData = meta
};
};
var novel = new DataKey<TextResource>("novels:house_of_horrors");
var metadata2 = await DownloadBuilder<IDocumentMetaData>.FromMeta(novel, BeamData)
var metadata2 = await DownloadBuilder<HtmlDocument, IDocumentMetaData>.FromMeta(novel, BeamData)
.WithLink()
.WithTransformer(ArticleDataTransformer)
.WithTransformer(CommonTransformers.ArticleDataTransformer)
.Configure((x) => x
.WithDownloadLogger(logger)
.WithRetryReporter(new Progress<RetryReport>())
@@ -88,10 +65,10 @@ namespace Beam.Temporary.Cli {
.Build()
.FirstAsync();
var downloader2 = DownloadBuilder<IDocument>.FromText(novel, BeamData)
var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
.WithRange(1..5)
.WithLinkGenerator()
.WithTransformer((x) => DocumentTransformer(x, metadata2.Data))
.WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
.Configure((x) => x
.WithDownloadLogger(logger)
.WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))