Enhance async capabilities and refactor project structure
Updated project files for `Beam.Dynamic`, `Beam.Exports`, `Beam.Puppeteer`, `Beam.Temporary.Cli`, and `Beam` to include new XML headers, reorganize property groups, and add project references. Modified `PuppetedUnitDownloader` to support additional parameters for async transformers. Changed return types in `CommonTransformers` to `AsyncTransformer` for asynchronous processing. Significant refactoring in `DownloadBuilder`, `DownloadContext`, and `DownloadContextBuilder` to introduce generic parameters and improve context management. Updated `SequentialDownloader`, `SequentialFragmentDownloader`, and `UnitDownloader` to accommodate new async transformer types. Introduced `TypeExtensions` for unique type name generation and added `UnitFragmentDownloaderBinary` for handling binary downloads. Updated solution file to include the new `aeqw89.Beam` project, ensuring proper references across the solution. These changes enhance the asynchronous capabilities of the Beam library, improve type safety, and streamline the downloading process.
This commit is contained in:
@@ -1,24 +1,19 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<Title>Beam Dynamic</Title>
|
||||
<Authors>aeqw89</Authors>
|
||||
<Company>qwsdcvghyu</Company>
|
||||
<Product />
|
||||
<Description>Beam utilities facilitating dynamic fetching of elements of webpages</Description>
|
||||
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
|
||||
<PackageId>aeqw89.Beam.Dynamic</PackageId>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0" />
|
||||
<PackageReference Include="aeqw89.Beam" Version="1.0.0" />
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
||||
<PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -1,20 +1,16 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<Title>Beam.Exports</Title>
|
||||
<Authors>aeqw89</Authors>
|
||||
<Company>qwsdcvghyu</Company>
|
||||
<Description>Beam library that facilitates exporting different kinds of views for IDocuments</Description>
|
||||
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
|
||||
<PackageId>aeqw89.Beam.Exports</PackageId>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.Beam" Version="1.0.0" />
|
||||
</ItemGroup>
|
||||
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -1,17 +1,16 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Playwright" Version="1.52.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj" />
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -11,8 +11,8 @@ namespace Beam.Puppeteer {
|
||||
public class PuppetUnitDownloader<T> : UnitDownloader<T> {
|
||||
public PuppetContext Context { get; }
|
||||
|
||||
public PuppetUnitDownloader(PuppetContext pc, DownloadContext<T> context)
|
||||
: base(context.Web, context.AsyncTranformer, context.AsyncFailurePredicates) {
|
||||
public PuppetUnitDownloader(PuppetContext pc, DownloadContext<T> context, AsyncTransformer<HtmlDocument, T> asyncHtmlTransformer, AsyncDownloadFailurePredicate<HtmlDocument>[] asyncDownloadFailurePredicates)
|
||||
: base(context.Web, asyncHtmlTransformer, asyncDownloadFailurePredicates) {
|
||||
Context = pc;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,26 +1,14 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
|
||||
<Title>Beam.Temporary.Cli</Title>
|
||||
<Authors>aeqw89</Authors>
|
||||
<Company>qwsdcvghyu</Company>
|
||||
<Description>A temporary CLI for Beam providing several useful mechanisms</Description>
|
||||
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
|
||||
<PackageId>aeqw89.Beam.Temporary.Cli</PackageId>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.Beam.Exports" Version="1.0.0" />
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0" />
|
||||
<PackageReference Include="aeqw89.Beam" Version="1.0.0" />
|
||||
<PackageReference Include="aeqw89.Beam.Dynamic" Version="1.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1" />
|
||||
@@ -28,5 +16,15 @@
|
||||
<PackageReference Include="Spectre.Console" Version="0.49.2-preview.0.70" />
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -1,5 +1,6 @@
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
@@ -8,16 +9,16 @@ using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public static class CommonTransformers {
|
||||
public static HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||
return new ArticleData() {
|
||||
public static AsyncTransformer<HtmlDocument, IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||
return Task.FromResult<IDocumentMetaData>(new ArticleData() {
|
||||
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
|
||||
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
|
||||
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
public static HtmlTransformer<IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
|
||||
public static AsyncTransformer<HtmlDocument, IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
|
||||
var resolved = binding?.Resolve(x);
|
||||
var articleData = new ArticleData() {
|
||||
Name = OnlineCleaner.Clean(resolved?.Title),
|
||||
@@ -26,9 +27,9 @@ namespace Beam.Temporary.Cli {
|
||||
meta.Add(IArchitecture.Default.ChapterKey, articleData);
|
||||
if (metaData is not null)
|
||||
meta.Add(IArchitecture.Default.BookKey, metaData);
|
||||
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
|
||||
return Task.FromResult<IDocument>(new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
|
||||
MetaData = meta
|
||||
};
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ using Beam;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
/// <summary>
|
||||
@@ -11,7 +12,7 @@ namespace Beam.Temporary.Cli {
|
||||
/// (source → link selection → transformer) and surfaces operational knobs as first‑class
|
||||
/// methods instead of magic parameters.
|
||||
/// </summary>
|
||||
public static class DownloadBuilder<T> {
|
||||
public static class DownloadBuilder<RawType, OutType> {
|
||||
/* ──────────────────────────── Entry points ─────────────────────────── */
|
||||
|
||||
public static ILinkStage FromMeta(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
|
||||
@@ -20,6 +21,9 @@ namespace Beam.Temporary.Cli {
|
||||
public static ILinkStage FromText(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
|
||||
Create(novelKey, data, SourceKind.Text);
|
||||
|
||||
public static IAlternativeLinkStage FromScratch()
|
||||
=> new LinkStage(null!, null!, null!, new());
|
||||
|
||||
/* ────────────────────────────── Stages ─────────────────────────────── */
|
||||
|
||||
public interface ILinkStage {
|
||||
@@ -28,16 +32,25 @@ namespace Beam.Temporary.Cli {
|
||||
ILinkStage WithRange(Range range);
|
||||
}
|
||||
|
||||
public interface ITransformStage {
|
||||
IContextStage<U> WithTransformer<U>(Func<DataBindings, AsyncTransformer<T, U>> factory);
|
||||
public interface IAlternativeLinkStage {
|
||||
IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links);
|
||||
}
|
||||
|
||||
public interface IContextStage<U> {
|
||||
IContextStage<U> Configure(Action<DownloadContextBuilder<T>> configure);
|
||||
IContextStage<U> WithParallelism(int degree);
|
||||
IContextStage<U> WithTimeout(TimeSpan timeout);
|
||||
IContextStage<U> WithRetryReporter(IProgress<RetryReport> reporter);
|
||||
DownloadEnumerable<T> Build();
|
||||
public interface ITransformStage {
|
||||
IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory);
|
||||
}
|
||||
|
||||
public interface IAlternativeTransformStage {
|
||||
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
|
||||
}
|
||||
|
||||
public interface IContextStage {
|
||||
IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure);
|
||||
IContextStage WithParallelism(int degree);
|
||||
IContextStage WithTimeout(TimeSpan timeout);
|
||||
IContextStage WithRetryReporter(IProgress<RetryReport> reporter);
|
||||
DownloadEnumerable<OutType> Build();
|
||||
IContextStage UseFragments();
|
||||
}
|
||||
|
||||
/* ────────────────────────── Implementation ────────────────────────── */
|
||||
@@ -46,7 +59,7 @@ namespace Beam.Temporary.Cli {
|
||||
|
||||
private static ILinkStage Create(DataKey<TextResource> novelKey, BeamDataDictionary data, SourceKind kind) {
|
||||
var (source, initial) = Resolve(novelKey, data, kind);
|
||||
var ctxBuilder = new DownloadContextBuilder<T>().WithLinks(Array.Empty<SourceLink>()); // placeholder, filled later.
|
||||
var ctxBuilder = new DownloadContextBuilder<RawType>().WithLinks(Array.Empty<SourceLink>()); // placeholder, filled later.
|
||||
return new LinkStage(source, initial, data, ctxBuilder);
|
||||
}
|
||||
|
||||
@@ -71,11 +84,12 @@ namespace Beam.Temporary.Cli {
|
||||
|
||||
/* ──────────────────────────── Stage types ─────────────────────────── */
|
||||
|
||||
|
||||
private sealed record LinkStage(
|
||||
WebResource Source,
|
||||
State Initial,
|
||||
BeamDataDictionary Data,
|
||||
DownloadContextBuilder<T> CtxBuilder) : ILinkStage {
|
||||
DownloadContextBuilder<RawType> CtxBuilder) : ILinkStage, IAlternativeLinkStage {
|
||||
|
||||
private State? endState;
|
||||
private bool linksFrozen = false;
|
||||
@@ -97,6 +111,11 @@ namespace Beam.Temporary.Cli {
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links) {
|
||||
CtxBuilder.WithLinks(links);
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public ILinkStage WithRange(Range range) {
|
||||
if (linksFrozen)
|
||||
throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
|
||||
@@ -114,24 +133,29 @@ namespace Beam.Temporary.Cli {
|
||||
private sealed record TransformStage(
|
||||
WebResource Source,
|
||||
BeamDataDictionary Data,
|
||||
DownloadContextBuilder<T> CtxBuilder) : ITransformStage {
|
||||
public IContextStage WithTransformer<U>(Func<DataBindings, Func<object, T>> factory) {
|
||||
DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage, IAlternativeTransformStage {
|
||||
public IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory) {
|
||||
var transformer = factory(Data.Bindings[Source.Bindings]);
|
||||
return new ContextStage<U>(CtxBuilder, transformer);
|
||||
return new ContextStage(CtxBuilder, transformer);
|
||||
}
|
||||
|
||||
public IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
|
||||
return new ContextStage(CtxBuilder, transformer);
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class ContextStage<U> : IContextStage {
|
||||
private readonly DownloadContextBuilder<T> _ctxBuilder;
|
||||
private readonly Func<object, T> _transformer;
|
||||
private sealed class ContextStage : IContextStage {
|
||||
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
|
||||
private readonly AsyncTransformer<RawType, OutType> _transformer;
|
||||
private int _parallelism = 4;
|
||||
private bool useFragments = false;
|
||||
|
||||
public ContextStage(DownloadContextBuilder<T> ctxBuilder, Func<object, T> transformer) {
|
||||
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
|
||||
_ctxBuilder = ctxBuilder;
|
||||
_transformer = transformer;
|
||||
}
|
||||
|
||||
public IContextStage Configure(Action<DownloadContextBuilder<T>> configure) {
|
||||
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
|
||||
configure(_ctxBuilder);
|
||||
return this;
|
||||
}
|
||||
@@ -151,21 +175,66 @@ namespace Beam.Temporary.Cli {
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadEnumerable<T> Build() {
|
||||
var context = _ctxBuilder.Build();
|
||||
SequentialFragmentDownloader<T> sequentialDownloader = new(
|
||||
context,
|
||||
ctx => new UnitFragmentDownloader<T>(
|
||||
public IContextStage UseFragments() {
|
||||
useFragments = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
|
||||
return (useFragments, _transformer, context.AsyncFailurePredicates) switch {
|
||||
// ──────────────── fragmented HTML ────────────────
|
||||
(true, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new UnitFragmentDownloader<OutType>(
|
||||
context.Web,
|
||||
_transformer,
|
||||
context.AsyncFailurePredicates,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates,
|
||||
_parallelism,
|
||||
context.DownloadLogger),
|
||||
context.DownloadLogger);
|
||||
var enumerable = new DownloadEnumerable<T>(
|
||||
sequentialDownloader
|
||||
.UnwrapFragmented());
|
||||
sequentialDownloader.DisposeAsync().AsTask().Wait();
|
||||
// ──────────────── fragmented binary ────────────────
|
||||
(true, AsyncTransformer<byte[], OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<byte[]>[] responseFailurePredicates)
|
||||
=> new UnitFragmentDownloaderBinary<OutType>(
|
||||
context.Client,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates,
|
||||
_parallelism,
|
||||
context.DownloadLogger),
|
||||
// ──────────────── single HTML ────────────────
|
||||
(false, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new UnitDownloader<OutType>(
|
||||
context.Web,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates),
|
||||
// ──────────────── single binary ────────────────
|
||||
(false, AsyncTransformer<byte[], OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<byte[]>[] responseFailurePredicates)
|
||||
=> new UnitDownloaderBinary<OutType>(
|
||||
context.Client,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates),
|
||||
_ => throw new Exception($"Unsupported transformer / failure-predicate combination. Missing pattern: {useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
|
||||
};
|
||||
}
|
||||
|
||||
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
|
||||
var copyOfContext = context.CreateBuilder().Build();
|
||||
return useFragments switch {
|
||||
true => new SequentialFragmentDownloader<RawType, OutType>(
|
||||
copyOfContext,
|
||||
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
|
||||
context.DownloadLogger).UnwrapFragmented(),
|
||||
false => new SequentialDownloader<RawType, OutType>(
|
||||
copyOfContext,
|
||||
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
|
||||
context.DownloadLogger).WrapOrdered()
|
||||
};
|
||||
}
|
||||
|
||||
public DownloadEnumerable<OutType> Build() {
|
||||
var context = _ctxBuilder.Build();
|
||||
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
|
||||
return enumerable;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,15 +11,6 @@ namespace Beam.Temporary.Cli {
|
||||
public DataKey<IDocumentMetaData> ChapterKey { get; set; } = new("ma:chapter");
|
||||
public DataKey<IDocumentMetaData> BookKey { get; set; } = new("ma:book");
|
||||
|
||||
public HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||
return new ArticleData() {
|
||||
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
|
||||
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
|
||||
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
|
||||
};
|
||||
};
|
||||
|
||||
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) {
|
||||
var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
|
||||
var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
|
||||
@@ -34,14 +25,7 @@ namespace Beam.Temporary.Cli {
|
||||
var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData);
|
||||
var binding = auxiliary.Bindings;
|
||||
|
||||
return new DownloadContext<IDocumentMetaData>(web, [link], downloadLogger: logger, transformer: (x) => {
|
||||
return new ArticleData() {
|
||||
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
|
||||
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
|
||||
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
|
||||
};
|
||||
});
|
||||
return new DownloadContext<IDocumentMetaData>(web, new(), [link], downloadLogger: logger);
|
||||
}
|
||||
|
||||
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) {
|
||||
@@ -60,20 +44,7 @@ namespace Beam.Temporary.Cli {
|
||||
template.Builder, new NumberedStateChanger(template.Factory.Behavior),
|
||||
res.Resource.TemplateInitialData));
|
||||
|
||||
return new DownloadContext<IDocument>(web, sle,
|
||||
transformer: (x) => {
|
||||
var resolved = aggregator.Bindings.Resolve(x);
|
||||
var articleData = new ArticleData() {
|
||||
Name = OnlineCleaner.Clean(resolved.Title),
|
||||
};
|
||||
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
|
||||
meta.Add(ChapterKey, articleData);
|
||||
if (metaData is not null)
|
||||
meta.Add(BookKey, metaData);
|
||||
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved.Content)) {
|
||||
MetaData = meta
|
||||
};
|
||||
},
|
||||
return new DownloadContext<IDocument>(web, new(), sle,
|
||||
retryReporter: new Progress<RetryReport>((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")),
|
||||
//downloadReporter: new Progress<DownloadReport>((x) => Console.WriteLine($"Downloaded ({x})")),
|
||||
asyncFailurePredicates: [
|
||||
|
||||
@@ -53,34 +53,11 @@ namespace Beam.Temporary.Cli {
|
||||
|
||||
CancellationTokenSource cts = new();
|
||||
|
||||
HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||
return new ArticleData() {
|
||||
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
|
||||
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
|
||||
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
|
||||
};
|
||||
};
|
||||
|
||||
HtmlTransformer<IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
|
||||
var resolved = binding?.Resolve(x);
|
||||
var articleData = new ArticleData() {
|
||||
Name = OnlineCleaner.Clean(resolved?.Title),
|
||||
};
|
||||
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
|
||||
meta.Add(IArchitecture.Default.ChapterKey, articleData);
|
||||
if (metaData is not null)
|
||||
meta.Add(IArchitecture.Default.BookKey, metaData);
|
||||
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
|
||||
MetaData = meta
|
||||
};
|
||||
};
|
||||
|
||||
var novel = new DataKey<TextResource>("novels:house_of_horrors");
|
||||
|
||||
var metadata2 = await DownloadBuilder<IDocumentMetaData>.FromMeta(novel, BeamData)
|
||||
var metadata2 = await DownloadBuilder<HtmlDocument, IDocumentMetaData>.FromMeta(novel, BeamData)
|
||||
.WithLink()
|
||||
.WithTransformer(ArticleDataTransformer)
|
||||
.WithTransformer(CommonTransformers.ArticleDataTransformer)
|
||||
.Configure((x) => x
|
||||
.WithDownloadLogger(logger)
|
||||
.WithRetryReporter(new Progress<RetryReport>())
|
||||
@@ -88,10 +65,10 @@ namespace Beam.Temporary.Cli {
|
||||
.Build()
|
||||
.FirstAsync();
|
||||
|
||||
var downloader2 = DownloadBuilder<IDocument>.FromText(novel, BeamData)
|
||||
var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
|
||||
.WithRange(1..5)
|
||||
.WithLinkGenerator()
|
||||
.WithTransformer((x) => DocumentTransformer(x, metadata2.Data))
|
||||
.WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
|
||||
.Configure((x) => x
|
||||
.WithDownloadLogger(logger)
|
||||
.WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
|
||||
|
||||
@@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Tests", "Beam.Tests\Be
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Puppeteer", "Beam.Puppeteer\Beam.Puppeteer.csproj", "{1A967563-D643-401D-A031-68DD43FACE8D}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "aeqw89.Beam", "aeqw89.Beam\aeqw89.Beam.csproj", "{583236EC-0CE8-4FA3-ADA3-860405E1F16F}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -45,6 +47,10 @@ Global
|
||||
{1A967563-D643-401D-A031-68DD43FACE8D}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{1A967563-D643-401D-A031-68DD43FACE8D}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{1A967563-D643-401D-A031-68DD43FACE8D}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
+2
-13
@@ -1,29 +1,18 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
|
||||
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
|
||||
<Title>Beam</Title>
|
||||
<Authors>aeqw89</Authors>
|
||||
<Company>qwsdcvghyu</Company>
|
||||
<Description>A library for downloading internet resources</Description>
|
||||
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
|
||||
<PackageId>aeqw89.Beam</PackageId>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
+11
-8
@@ -8,21 +8,22 @@ using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam {
|
||||
public delegate T HtmlTransformer<out T>(HtmlDocument doc);
|
||||
//public delegate T HtmlTransformer<out T>(HtmlDocument doc);
|
||||
public delegate Task<U> AsyncTransformer<in T, U>(T elem);
|
||||
public delegate Task<T> AsyncHtmlTransformer<T>(HtmlDocument doc);
|
||||
public delegate Task<T> AsyncBinaryTransformer<T>(byte[] bin);
|
||||
//public delegate Task<T> AsyncHtmlTransformer<T>(HtmlDocument doc);
|
||||
//public delegate Task<T> AsyncBinaryTransformer<T>(byte[] bin);
|
||||
|
||||
public class DownloadContext<T> : IDisposable {
|
||||
public class DownloadContext<RawType> : IDisposable {
|
||||
private bool disposedValue;
|
||||
|
||||
public DownloadContextBuilder<T> CreateBuilder()
|
||||
=> DownloadContextBuilder<T>.FromContext(this);
|
||||
public DownloadContextBuilder<RawType> CreateBuilder()
|
||||
=> DownloadContextBuilder<RawType>.FromContext(this);
|
||||
|
||||
public HttpClient Client { get; }
|
||||
public HtmlWeb Web { get; }
|
||||
public IProgress<DownloadReport>? DownloadReporter { get; set; }
|
||||
public IProgress<RetryReport>? RetryReporter { get; set; }
|
||||
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? AsyncFailurePredicates { get; }
|
||||
public AsyncDownloadFailurePredicate<RawType>?[]? AsyncFailurePredicates { get; }
|
||||
public TimeSpan TimeOut { get; set; }
|
||||
public IEnumerable<SourceLink> Links { get; }
|
||||
public CancellationToken CancellationToken { get; }
|
||||
@@ -30,17 +31,19 @@ namespace Beam {
|
||||
public ILogger? DownloadLogger { get; set; }
|
||||
|
||||
public DownloadContext(HtmlWeb web,
|
||||
HttpClient client,
|
||||
IEnumerable<SourceLink> links,
|
||||
CancellationToken cancellationToken = default,
|
||||
IProgress<DownloadReport>? downloadReporter = null,
|
||||
IProgress<RetryReport>? retryReporter = null,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>?[]? asyncFailurePredicates = null,
|
||||
AsyncDownloadFailurePredicate<RawType>?[]? asyncFailurePredicates = null,
|
||||
TimeSpan? timeOut = null,
|
||||
ILogger? downloadLogger = null) {
|
||||
ArgumentNullException.ThrowIfNull(web, nameof(web));
|
||||
ArgumentNullException.ThrowIfNull(links, nameof(links));
|
||||
|
||||
Web = web;
|
||||
Client = client;
|
||||
Links = links;
|
||||
CancellationToken = cancellationToken;
|
||||
DownloadReporter = downloadReporter;
|
||||
|
||||
@@ -8,75 +8,83 @@ using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
|
||||
public class DownloadContextBuilder<T> {
|
||||
public class DownloadContextBuilder<RawType> {
|
||||
private HtmlWeb _web;
|
||||
private HttpClient _client;
|
||||
private IProgress<DownloadReport>? _downloadReporter;
|
||||
private IProgress<RetryReport>? _retryReporter;
|
||||
private AsyncDownloadFailurePredicate<HtmlDocument>?[]? _asyncFailurePredicates;
|
||||
private AsyncDownloadFailurePredicate<RawType>?[] _asyncFailurePredicates = [];
|
||||
private TimeSpan _timeOut;
|
||||
private IEnumerable<SourceLink> _links;
|
||||
private CancellationToken _cancellationToken;
|
||||
private DocumentCache _cache;
|
||||
private ILogger? _downloadLogger;
|
||||
|
||||
public DownloadContextBuilder(HtmlWeb? web = null) {
|
||||
public DownloadContextBuilder(HttpClient? client = null, HtmlWeb? web = null) {
|
||||
// You can initialize defaults here if needed, e.g.:
|
||||
// _timeOut = TimeSpan.FromSeconds(30);
|
||||
// _cancellationToken = CancellationToken.None;
|
||||
_client = client ?? new();
|
||||
_web = web ?? new();
|
||||
_links = [];
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<T> WithWeb(HtmlWeb web) {
|
||||
public DownloadContextBuilder<RawType> WithWeb(HtmlWeb web) {
|
||||
_web = web;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<T> WithDownloadReporter(IProgress<DownloadReport> downloadReporter) {
|
||||
public DownloadContextBuilder<RawType> WithClient(HttpClient client) {
|
||||
_client = client;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithDownloadReporter(IProgress<DownloadReport> downloadReporter) {
|
||||
_downloadReporter = downloadReporter;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<T> WithRetryReporter(IProgress<RetryReport> retryReporter) {
|
||||
public DownloadContextBuilder<RawType> WithRetryReporter(IProgress<RetryReport> retryReporter) {
|
||||
_retryReporter = retryReporter;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<T> WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate<HtmlDocument>[] predicates) {
|
||||
public DownloadContextBuilder<RawType> WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate<RawType>[] predicates) {
|
||||
_asyncFailurePredicates = predicates;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<T> WithTimeOut(TimeSpan timeOut) {
|
||||
public DownloadContextBuilder<RawType> WithTimeOut(TimeSpan timeOut) {
|
||||
_timeOut = timeOut;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<T> WithLinks(IEnumerable<SourceLink> links) {
|
||||
public DownloadContextBuilder<RawType> WithLinks(IEnumerable<SourceLink> links) {
|
||||
_links = links;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<T> WithCancellationToken(CancellationToken cancellationToken) {
|
||||
public DownloadContextBuilder<RawType> WithCancellationToken(CancellationToken cancellationToken) {
|
||||
_cancellationToken = cancellationToken;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<T> WithCache(DocumentCache cache) {
|
||||
public DownloadContextBuilder<RawType> WithCache(DocumentCache cache) {
|
||||
_cache = cache;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<T> WithDownloadLogger(ILogger downloadLogger) {
|
||||
public DownloadContextBuilder<RawType> WithDownloadLogger(ILogger downloadLogger) {
|
||||
_downloadLogger = downloadLogger;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public DownloadContext<T> Build() {
|
||||
public DownloadContext<RawType> Build() {
|
||||
// Construct the DownloadContext<T> using the collected values.
|
||||
var context = new DownloadContext<T>(
|
||||
var context = new DownloadContext<RawType>(
|
||||
web: _web,
|
||||
client: _client,
|
||||
links: _links,
|
||||
cancellationToken: _cancellationToken,
|
||||
downloadReporter: _downloadReporter,
|
||||
@@ -95,15 +103,15 @@ namespace Beam {
|
||||
return context;
|
||||
}
|
||||
|
||||
public static DownloadContextBuilder<T> FromContext(DownloadContext<T> existing) {
|
||||
public static DownloadContextBuilder<RawType> FromContext(DownloadContext<RawType> existing) {
|
||||
if (existing == null) throw new ArgumentNullException(nameof(existing));
|
||||
|
||||
return new DownloadContextBuilder<T>(existing.Web)
|
||||
return new DownloadContextBuilder<RawType>(existing.Client, existing.Web)
|
||||
.WithLinks(existing.Links)
|
||||
.WithCancellationToken(existing.CancellationToken)
|
||||
.WithDownloadReporter(existing.DownloadReporter!)
|
||||
.WithRetryReporter(existing.RetryReporter!)
|
||||
.WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty<AsyncDownloadFailurePredicate<HtmlDocument>>())
|
||||
.WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty<AsyncDownloadFailurePredicate<RawType>>())
|
||||
.WithTimeOut(existing.TimeOut)
|
||||
.WithDownloadLogger(existing.DownloadLogger!)
|
||||
.WithCache(existing.Cache);
|
||||
|
||||
@@ -19,5 +19,20 @@
|
||||
await fragmented.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
public static async IAsyncEnumerator<Ordered<T>> WrapOrdered<T>(this IAsyncEnumerator<T> enumerator) {
|
||||
if (enumerator is null)
|
||||
throw new ArgumentNullException();
|
||||
try {
|
||||
int index = 0;
|
||||
while (await enumerator.MoveNextAsync().ConfigureAwait(false))
|
||||
if (enumerator.Current is null)
|
||||
yield break;
|
||||
else
|
||||
yield return new Ordered<T>(enumerator.Current, index);
|
||||
} finally {
|
||||
await enumerator.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,24 +2,24 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam {
|
||||
public class SequentialDownloader<TInput, TOutput> : IAsyncEnumerator<TOutput> {
|
||||
public TOutput Current { get; protected set; }
|
||||
public DownloadContext<TInput> Context { get; }
|
||||
public class SequentialDownloader<RawType, OutType> : IAsyncEnumerator<OutType> {
|
||||
public OutType Current { get; protected set; }
|
||||
public DownloadContext<RawType> Context { get; }
|
||||
public ILogger? Logger { get; set; }
|
||||
public int LastOrder { get; set; } = 0;
|
||||
|
||||
protected IEnumerator<SourceLink> LinksEnumerator;
|
||||
|
||||
public Func<IUnitDownloader<TOutput>> GetUnitDownloader { get; set; }
|
||||
public Func<IUnitDownloader<OutType>> GetUnitDownloader { get; set; }
|
||||
|
||||
public SequentialDownloader(DownloadContext<TInput> context, Func<DownloadContext<TInput>, IUnitDownloader<TOutput>> getUnitDownloader, ILogger? logger = null) {
|
||||
public SequentialDownloader(DownloadContext<RawType> context, Func<DownloadContext<RawType>, IUnitDownloader<OutType>> getUnitDownloader, ILogger? logger = null) {
|
||||
Context = context;
|
||||
Logger = logger;
|
||||
LinksEnumerator = Context.Links.GetEnumerator();
|
||||
LinksEnumerator.Reset();
|
||||
if (!LinksEnumerator.MoveNext())
|
||||
throw new ArgumentOutOfRangeException(S.M.LinksCannotBeEmpty);
|
||||
Current = default(TOutput);
|
||||
Current = default(OutType);
|
||||
GetUnitDownloader = () => getUnitDownloader(Context);
|
||||
}
|
||||
|
||||
@@ -51,11 +51,11 @@ namespace Beam {
|
||||
tryProgress: Context.RetryReporter);
|
||||
|
||||
if (!result) {
|
||||
Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name);
|
||||
Logger?.LogWarning("Failed to download Unit<{}>", typeof(OutType).Name);
|
||||
return false; // unit download failed
|
||||
}
|
||||
if (downloadedT is null) {
|
||||
Logger?.LogWarning("Failed to download Unit<{}>", typeof(TOutput).Name);
|
||||
Logger?.LogWarning("Failed to download Unit<{}>", typeof(OutType).Name);
|
||||
return false; // unit download failed
|
||||
}
|
||||
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace Beam {
|
||||
public class SequentialFragmentDownloader<T> : SequentialDownloader<T, Fragment<Ordered<T>>> {
|
||||
public class SequentialFragmentDownloader<RawType, OutType> : SequentialDownloader<RawType, Fragment<Ordered<OutType>>> {
|
||||
public SequentialFragmentDownloader(
|
||||
DownloadContext<T> context,
|
||||
Func<DownloadContext<T>, IUnitDownloader<Fragment<Ordered<T>>>> getUnitDownloader,
|
||||
DownloadContext<RawType> context,
|
||||
Func<DownloadContext<RawType>, IUnitDownloader<Fragment<Ordered<OutType>>>> getUnitDownloader,
|
||||
ILogger? logger = null)
|
||||
: base(context, getUnitDownloader, logger) {}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
/// <summary>
|
||||
/// Source-style type name (e.g. Foo<T>, Dictionary<int,string>[,], Span<byte>*, MyClass.Nested).
|
||||
/// </summary>
|
||||
public static class TypeExtensions {
|
||||
public static string AsUniqueName(this Type type) {
|
||||
if (type.IsGenericParameter)
|
||||
return type.Name;
|
||||
|
||||
if (type.IsArray) {
|
||||
var commas = new string(',', type.GetArrayRank() - 1);
|
||||
return $"{type.GetElementType().AsUniqueName()}[{commas}]";
|
||||
}
|
||||
|
||||
if (type.IsPointer)
|
||||
return $"{type.GetElementType().AsUniqueName()}*";
|
||||
|
||||
if (type.IsByRef)
|
||||
return $"{type.GetElementType().AsUniqueName()}&";
|
||||
|
||||
if (type.IsNested && !type.IsGenericParameter)
|
||||
return $"{type.DeclaringType!.AsUniqueName()}.{BuildSimpleName(type)}";
|
||||
|
||||
return BuildSimpleName(type);
|
||||
|
||||
static string BuildSimpleName(Type t) {
|
||||
if (!t.IsGenericType) return t.Name;
|
||||
|
||||
var baseName = t.Name[..t.Name.IndexOf('`')];
|
||||
var args = t.GetGenericArguments().Select(AsUniqueName);
|
||||
return $"{baseName}<{string.Join(", ", args)}>";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -10,9 +10,9 @@ namespace Beam {
|
||||
/// <param name="web"></param>
|
||||
/// <param name="transformer"></param>
|
||||
/// <param name="failurePredicate"></param>
|
||||
public class UnitDownloader<T>(HtmlWeb web, AsyncHtmlTransformer<T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null) : IUnitDownloader<T> {
|
||||
public class UnitDownloader<T>(HtmlWeb web, AsyncTransformer<HtmlDocument, T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null) : IUnitDownloader<T> {
|
||||
public HtmlWeb Web { get; } = web;
|
||||
public virtual AsyncHtmlTransformer<T> Transformer { get; } = transformer;
|
||||
public virtual AsyncTransformer<HtmlDocument, T> Transformer { get; } = transformer;
|
||||
public virtual AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicates { get; } = failurePredicate;
|
||||
|
||||
public int LinksPerDownload { get; } = 1;
|
||||
|
||||
@@ -10,12 +10,12 @@ namespace Beam {
|
||||
/// </summary>
|
||||
public class UnitDownloaderBinary<T>(
|
||||
HttpClient client,
|
||||
AsyncBinaryTransformer<T> transformer,
|
||||
AsyncDownloadFailurePredicate<HttpResponseMessage>?[]? failurePredicates = null)
|
||||
AsyncTransformer<byte[], T> transformer,
|
||||
AsyncDownloadFailurePredicate<byte[]>?[]? failurePredicates = null)
|
||||
: IUnitDownloader<T> {
|
||||
public HttpClient Client { get; } = client;
|
||||
public virtual AsyncBinaryTransformer<T> Transformer { get; } = transformer;
|
||||
public virtual AsyncDownloadFailurePredicate<HttpResponseMessage>?[]? FailurePredicates { get; } = failurePredicates;
|
||||
public virtual AsyncTransformer<byte[], T> Transformer { get; } = transformer;
|
||||
public virtual AsyncDownloadFailurePredicate<byte[]>?[]? FailurePredicates { get; } = failurePredicates;
|
||||
|
||||
public int LinksPerDownload { get; } = 1;
|
||||
|
||||
@@ -24,9 +24,9 @@ namespace Beam {
|
||||
if (FailurePredicates is null) return false;
|
||||
|
||||
var failed = false;
|
||||
await Parallel.ForEachAsync(FailurePredicates, async (pred, _) => {
|
||||
await Parallel.ForEachAsync(FailurePredicates, async (pred, ct) => {
|
||||
if (failed || pred is null) return;
|
||||
if (await pred(response)) failed = true;
|
||||
if (await pred(await response.Content.ReadAsByteArrayAsync(ct))) failed = true;
|
||||
});
|
||||
return failed;
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ using System.Threading.Tasks;
|
||||
namespace Beam {
|
||||
public class UnitFragmentDownloader<T> : IUnitDownloader<Fragment<Ordered<T>>> {
|
||||
public UnitFragmentDownloader(HtmlWeb web,
|
||||
AsyncHtmlTransformer<T> transformer,
|
||||
AsyncTransformer<HtmlDocument, T> transformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null,
|
||||
int fragmentSize = 4,
|
||||
ILogger? logger = null,
|
||||
@@ -24,7 +24,7 @@ namespace Beam {
|
||||
}
|
||||
|
||||
public HtmlWeb Web { get; }
|
||||
public AsyncHtmlTransformer<T> Transformer { get; }
|
||||
public AsyncTransformer<HtmlDocument, T> Transformer { get; }
|
||||
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicate { get; }
|
||||
public int LinksPerDownload { get; set; }
|
||||
public ILogger? Logger { get; set; }
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
using System;
|
||||
using System.Net.Http;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam {
|
||||
/// <summary>
|
||||
/// Groups multiple binary downloads into a single Fragment, applying
|
||||
/// failure detection and exponential-back-off retries for each link.
|
||||
/// </summary>
|
||||
public class UnitFragmentDownloaderBinary<T>
|
||||
: IUnitDownloader<Fragment<Ordered<T>>> {
|
||||
public UnitFragmentDownloaderBinary(HttpClient client,
|
||||
AsyncTransformer<byte[], T> transformer,
|
||||
AsyncDownloadFailurePredicate<byte[]>?[]? failurePredicate = null,
|
||||
int fragmentSize = 4,
|
||||
ILogger? logger = null,
|
||||
IUnitDownloader<T>? internalDownloader = null) {
|
||||
Client = client;
|
||||
Transformer = transformer;
|
||||
FailurePredicate = failurePredicate;
|
||||
UnitDownloader = internalDownloader
|
||||
?? new UnitDownloaderBinary<T>(Client, Transformer, FailurePredicate);
|
||||
LinksPerDownload = fragmentSize;
|
||||
Logger = logger;
|
||||
}
|
||||
|
||||
public HttpClient Client { get; }
|
||||
public AsyncTransformer<byte[], T> Transformer { get; }
|
||||
public AsyncDownloadFailurePredicate<byte[]>?[]? FailurePredicate { get; }
|
||||
public int LinksPerDownload { get; set; }
|
||||
public ILogger? Logger { get; set; }
|
||||
|
||||
private readonly IUnitDownloader<T> UnitDownloader;
|
||||
|
||||
async Task<(bool, Fragment<Ordered<T>>?)> IUnitDownloader<Fragment<Ordered<T>>>.TryDownload(
|
||||
Ordered<string>[] link,
|
||||
CancellationToken ct,
|
||||
int maximumRetryCount,
|
||||
IProgress<RetryReport>? tryProgress) {
|
||||
var fragment = new Fragment<Ordered<T>>(link.Length);
|
||||
if (!Fragment<Ordered<T>>.TryAcquireUpdater(fragment, out var updater))
|
||||
throw new S.AssertionException(S.M.NewFragmentShouldBeFree);
|
||||
|
||||
var isFailure = false;
|
||||
|
||||
await Parallel.ForEachAsync(link, async (orderedLink, pct) => {
|
||||
pct.ThrowIfCancellationRequested();
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var (success, downloaded) =
|
||||
await UnitDownloader.TryDownload([orderedLink],
|
||||
ct,
|
||||
maximumRetryCount,
|
||||
tryProgress);
|
||||
|
||||
if (!success || downloaded is null) {
|
||||
Interlocked.Exchange(ref isFailure, true);
|
||||
Logger?.LogError("Failed to retrieve {Link} order={Order}",
|
||||
orderedLink.Data, orderedLink.Order);
|
||||
return;
|
||||
}
|
||||
|
||||
updater(new Ordered<T>(downloaded, orderedLink.Order));
|
||||
});
|
||||
|
||||
if (!isFailure)
|
||||
Fragment<Ordered<T>>.SetComplete(fragment, true);
|
||||
|
||||
Fragment<Ordered<T>>.TryReleaseUpdater(fragment, updater);
|
||||
return (!isFailure, fragment);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
namespace aeqw89.Beam {
|
||||
public class Class1 {
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<Title>Beam</Title>
|
||||
<Authors>aeqw89</Authors>
|
||||
<Company>qwsdcvghyu</Company>
|
||||
<Version>1.2.4</Version>
|
||||
<Description>A library for downloading internet resources</Description>
|
||||
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
|
||||
<PackageId>aeqw89.Beam</PackageId>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Puppeteer\Beam.Puppeteer.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Temporary.Cli\Beam.Temporary.Cli.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Content Include="..\Beam\bin\$(Configuration)\$(TargetFramework)\Beam.dll">
|
||||
<PackagePath>lib\$(TargetFramework)\</PackagePath>
|
||||
<Pack>true</Pack>
|
||||
</Content>
|
||||
<Content Include="..\Beam.Dynamic\bin\$(Configuration)\$(TargetFramework)\Beam.Dynamic.dll">
|
||||
<PackagePath>lib\$(TargetFramework)\</PackagePath>
|
||||
<Pack>true</Pack>
|
||||
</Content>
|
||||
<Content Include="..\Beam.Exports\bin\$(Configuration)\$(TargetFramework)\Beam.Exports.dll">
|
||||
<PackagePath>lib\$(TargetFramework)\</PackagePath>
|
||||
<Pack>true</Pack>
|
||||
</Content>
|
||||
<Content Include="..\Beam.Puppeteer\bin\$(Configuration)\$(TargetFramework)\Beam.Puppeteer.dll">
|
||||
<PackagePath>lib\$(TargetFramework)\</PackagePath>
|
||||
<Pack>true</Pack>
|
||||
</Content>
|
||||
<Content Include="..\Beam.Temporary.Cli\bin\$(Configuration)\$(TargetFramework)\Beam.Temporary.Cli.dll">
|
||||
<PackagePath>lib\$(TargetFramework)\</PackagePath>
|
||||
<Pack>true</Pack>
|
||||
</Content>
|
||||
<Content Include="..\Beam.Temporary.Cli\bin\$(Configuration)\$(TargetFramework)\HtmlAgilityPack.dll">
|
||||
<PackagePath>lib\$(TargetFramework)\</PackagePath>
|
||||
<Pack>true</Pack>
|
||||
</Content>
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.72">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.1">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Playwright" Version="1.52.0">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="OpenAI" Version="2.1.0">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Spectre.Console" Version="0.49.2-preview.0.70">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
Reference in New Issue
Block a user