Introduce Beam.Fluent and Beam.Models projects
Added new Beam.Fluent and Beam.Models projects with staged download builder and data context models. Refactored and moved model classes from Beam.Temporary.Cli to Beam.Models. Added new data providers and extended DataBindings in Beam.Dynamic. Renamed Beam.Puppeteer to Beam.Playwright and updated related classes. Updated project references and package versions. Removed obsolete and unused files from Beam.Temporary.Cli.
This commit is contained in:
@@ -0,0 +1,53 @@
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class AnchorCollectionDataProvider : IDataProvider<string[]>, IDataProvider<SourceLink[]> {
|
||||
public IBinding? Content { get; set; }
|
||||
public string? RelativeTo { get; set; }
|
||||
|
||||
private string GetAbsolute(string? @base, string relative) {
|
||||
if (@base is null)
|
||||
return relative;
|
||||
|
||||
if (@base.EndsWith('/'))
|
||||
@base = @base[..^1];
|
||||
if (relative.StartsWith('/'))
|
||||
relative = relative[1..];
|
||||
return @base + '/' + relative;
|
||||
}
|
||||
|
||||
public string[] Get(HtmlDocument document) {
|
||||
if (Content is null)
|
||||
return [];
|
||||
|
||||
var node = Content.Select(document);
|
||||
if (node is null)
|
||||
return [];
|
||||
|
||||
List<string> links = [];
|
||||
foreach (var child in node.Descendants())
|
||||
links.Add(child.GetAttributeValue("href", ""));
|
||||
|
||||
return links.Where(x => !string.IsNullOrWhiteSpace(x)).ToArray();
|
||||
}
|
||||
|
||||
SourceLink[] IDataProvider<SourceLink[]>.Get(HtmlDocument document) {
|
||||
var links = Get(document);
|
||||
|
||||
if (links.Length == 0)
|
||||
return [];
|
||||
|
||||
List<SourceLink> slinks = [];
|
||||
foreach (var link in links)
|
||||
if (Uri.TryCreate(GetAbsolute(RelativeTo, link), UriKind.RelativeOrAbsolute, out _))
|
||||
slinks.Add(new SourceLink(GetAbsolute(RelativeTo, link)));
|
||||
|
||||
return slinks.ToArray();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class AnchorDataProvider : IDataProvider<SourceLink>, IDataProvider<string> {
|
||||
public IBinding? Content { get; set; }
|
||||
|
||||
public string Get(HtmlDocument document) {
|
||||
if (Content is null)
|
||||
return "";
|
||||
|
||||
return Content.Select(document)?.GetAttributeValue("href", "") ?? "";
|
||||
|
||||
}
|
||||
|
||||
SourceLink IDataProvider<SourceLink>.Get(HtmlDocument document) {
|
||||
var content = Get(document);
|
||||
if (content is null)
|
||||
return SourceLink.InvalidLink;
|
||||
|
||||
if (!Uri.TryCreate(content, UriKind.RelativeOrAbsolute, out _))
|
||||
return SourceLink.InvalidLink;
|
||||
|
||||
return new SourceLink(content);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6,8 +6,8 @@
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0" />
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.1.0" />
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
||||
<PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13" />
|
||||
</ItemGroup>
|
||||
|
||||
+169
-10
@@ -1,22 +1,161 @@
|
||||
using HtmlAgilityPack;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public record class DataBindings {
|
||||
public IDataProvider<string>? Title { get; set; }
|
||||
public IDataProvider<string[]>? Authors { get; set; }
|
||||
public IDataProvider<string>? Description { get; set; }
|
||||
public IDataProvider<string>? Content { get; set; }
|
||||
public IDataProvider<string[]>? Language { get; set; }
|
||||
public IDataProvider<string[]>? Tags { get; set; }
|
||||
#region ---------------------- Common Bindings ----------------------
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string>? Title {
|
||||
get => Get<string>(nameof(Title));
|
||||
set => Providers[nameof(Title)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string[]>? Authors {
|
||||
get => Get<string[]>(nameof(Authors));
|
||||
set => Providers[nameof(Authors)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string>? Description {
|
||||
get => Get<string>(nameof(Description));
|
||||
set => Providers[nameof(Description)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string>? Content {
|
||||
get => Get<string>(nameof(Content));
|
||||
set => Providers[nameof(Content)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string[]>? Language {
|
||||
get => Get<string[]>(nameof(Language));
|
||||
set => Providers[nameof(Language)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string[]>? Tags {
|
||||
get => Get<string[]>(nameof(Tags));
|
||||
set => Providers[nameof(Tags)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string>? Publisher {
|
||||
get => Get<string>(nameof(Publisher));
|
||||
set => Providers[nameof(Publisher)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<DateTimeOffset>? PublicationDate {
|
||||
get => Get<DateTimeOffset>(nameof(PublicationDate));
|
||||
set => Providers[nameof(PublicationDate)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string>? ISBN {
|
||||
get => Get<string>(nameof(ISBN));
|
||||
set => Providers[nameof(ISBN)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<int>? PageCount {
|
||||
get => Get<int>(nameof(PageCount));
|
||||
set => Providers[nameof(PageCount)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<SourceLink>? CoverImage {
|
||||
get => Get<SourceLink>(nameof(CoverImage));
|
||||
set => Providers[nameof(CoverImage)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string[]>? Series {
|
||||
get => Get<string[]>(nameof(Series));
|
||||
set => Providers[nameof(Series)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<int>? Edition {
|
||||
get => Get<int>(nameof(Edition));
|
||||
set => Providers[nameof(Edition)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string[]>? Contributors {
|
||||
get => Get<string[]>(nameof(Contributors));
|
||||
set => Providers[nameof(Contributors)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string[]>? Subjects {
|
||||
get => Get<string[]>(nameof(Subjects));
|
||||
set => Providers[nameof(Subjects)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<string>? Rights {
|
||||
get => Get<string>(nameof(Rights));
|
||||
set => Providers[nameof(Rights)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<SourceLink[]>? TableOfContents {
|
||||
get => Get<SourceLink[]>(nameof(TableOfContents));
|
||||
set => Providers[nameof(TableOfContents)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<SourceLink[]>? PagesDropDown {
|
||||
get => Get<SourceLink[]>(nameof(PagesDropDown));
|
||||
set => Providers[nameof(PagesDropDown)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<SourceLink>? NextPageButton {
|
||||
get => Get<SourceLink>(nameof(NextPageButton));
|
||||
set => Providers[nameof(NextPageButton)] = value;
|
||||
}
|
||||
[JsonIgnore]
|
||||
public IDataProvider<SourceLink>? PreviousPageButton {
|
||||
get => Get<SourceLink>(nameof(PreviousPageButton));
|
||||
set => Providers[nameof(PreviousPageButton)] = value;
|
||||
}
|
||||
#endregion
|
||||
|
||||
public Dictionary<string, IDataProvider?> Providers { get; set; } = [];
|
||||
|
||||
private IDataProvider<T>? Get<T>(string key) {
|
||||
if (Providers.TryGetValue(key, out var k) && k is IDataProvider<T> ks)
|
||||
return ks;
|
||||
return default;
|
||||
}
|
||||
|
||||
public virtual ResolvedBindings Resolve(HtmlDocument doc) {
|
||||
return new ResolvedBindings() {
|
||||
// explicit fields already handled below
|
||||
var mappedKeys = new HashSet<string> {
|
||||
nameof(Title), nameof(Authors), nameof(Description), nameof(Content),
|
||||
nameof(Language), nameof(Tags), nameof(Publisher), nameof(PublicationDate),
|
||||
nameof(ISBN), nameof(PageCount), nameof(CoverImage), nameof(Series),
|
||||
nameof(Edition), nameof(Contributors), nameof(Subjects), nameof(Rights),
|
||||
nameof(TableOfContents), nameof(PagesDropDown), nameof(NextPageButton),
|
||||
nameof(PreviousPageButton)
|
||||
};
|
||||
|
||||
var additional = new Dictionary<string, object?>();
|
||||
|
||||
foreach (var (key, provider) in Providers) {
|
||||
if (!mappedKeys.Contains(key) && provider is not null) {
|
||||
// dynamic call so any IDataProvider<T> works
|
||||
additional[key] = ((dynamic)provider).Get(doc);
|
||||
}
|
||||
}
|
||||
|
||||
return new ResolvedBindings {
|
||||
Title = Title?.Get(doc),
|
||||
Authors = Authors?.Get(doc) ?? [],
|
||||
Language = Language?.Get(doc),
|
||||
Content = Content?.Get(doc),
|
||||
Description = Description?.Get(doc),
|
||||
Tags = Tags?.Get(doc) ?? []
|
||||
Content = Content?.Get(doc),
|
||||
Language = Language?.Get(doc),
|
||||
Tags = Tags?.Get(doc) ?? [],
|
||||
Publisher = Publisher?.Get(doc),
|
||||
PublicationDate = PublicationDate?.Get(doc),
|
||||
ISBN = ISBN?.Get(doc),
|
||||
PageCount = PageCount?.Get(doc),
|
||||
CoverImage = CoverImage?.Get(doc),
|
||||
Series = Series?.Get(doc) ?? [],
|
||||
Edition = Edition?.Get(doc),
|
||||
Contributors = Contributors?.Get(doc) ?? [],
|
||||
Subjects = Subjects?.Get(doc) ?? [],
|
||||
Rights = Rights?.Get(doc),
|
||||
TableOfContents = TableOfContents?.Get(doc) ?? [],
|
||||
PagesDropDown = PagesDropDown?.Get(doc),
|
||||
NextPageButton = NextPageButton?.Get(doc),
|
||||
PreviousPageButton = PreviousPageButton?.Get(doc),
|
||||
Additional = additional
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -28,5 +167,25 @@ namespace Beam.Dynamic {
|
||||
public string? Content { get; set; }
|
||||
public string[]? Language { get; set; }
|
||||
public string[]? Tags { get; set; }
|
||||
public string? Publisher { get; set; }
|
||||
public DateTimeOffset? PublicationDate { get; set; }
|
||||
public string? ISBN { get; set; }
|
||||
public int? PageCount { get; set; }
|
||||
public SourceLink? CoverImage { get; set; }
|
||||
public string[]? Series { get; set; }
|
||||
public int? Edition { get; set; }
|
||||
public string[]? Contributors { get; set; }
|
||||
public string[]? Subjects { get; set; }
|
||||
public string? Rights { get; set; }
|
||||
public SourceLink[]? TableOfContents { get; set; }
|
||||
public SourceLink[]? PagesDropDown { get; set; }
|
||||
public SourceLink? NextPageButton { get; set; }
|
||||
public SourceLink? PreviousPageButton { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Values resolved from any providers whose keys aren’t represented
|
||||
/// by the named properties above.
|
||||
/// </summary>
|
||||
public Dictionary<string, object?> Additional { get; set; } = [];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.InteropServices.Marshalling;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading.Tasks;
|
||||
@@ -12,6 +14,18 @@ namespace Beam.Dynamic {
|
||||
IDataProvider<string[]>,
|
||||
IDataProvider<SourceLink[]> {
|
||||
public IBinding? Content { get; set; }
|
||||
public string? RelativeTo { get; set; }
|
||||
|
||||
private string GetAbsolute(string? @base, string relative) {
|
||||
if (@base is null)
|
||||
return relative;
|
||||
|
||||
if (@base.EndsWith('/'))
|
||||
@base = @base[..^1];
|
||||
if (relative.StartsWith('/'))
|
||||
relative = relative[1..];
|
||||
return @base + '/' + relative;
|
||||
}
|
||||
|
||||
public SourceLink[] Get(HtmlDocument document) {
|
||||
if (Content is null)
|
||||
@@ -22,9 +36,9 @@ namespace Beam.Dynamic {
|
||||
List<SourceLink> links = [];
|
||||
foreach (var child in node.ChildNodes.Where(x => x.Name == "option")) {
|
||||
var childValue = child.GetAttributeValue("value", null);
|
||||
if (!Uri.TryCreate(childValue, UriKind.Absolute, out _))
|
||||
if (!Uri.TryCreate(GetAbsolute(RelativeTo, childValue), UriKind.Absolute, out _))
|
||||
continue;
|
||||
links.Add(new SourceLink(childValue));
|
||||
links.Add(new SourceLink(GetAbsolute(RelativeTo, childValue)));
|
||||
}
|
||||
|
||||
return links.ToArray();
|
||||
|
||||
@@ -7,7 +7,14 @@ namespace Beam.Dynamic {
|
||||
[JsonDerivedType(typeof(ContentsArrayDataProvider), "array")]
|
||||
[JsonDerivedType(typeof(ContentsDataProvider), "single")]
|
||||
[JsonDerivedType(typeof(DropDownDataProvider), "dropdown")]
|
||||
public interface IDataProvider<T> {
|
||||
[JsonDerivedType(typeof(AnchorCollectionDataProvider), "anchor-list")]
|
||||
[JsonDerivedType(typeof(AnchorDataProvider), "anchor")]
|
||||
public interface IDataProvider {
|
||||
public string GetString(HtmlDocument document)
|
||||
=> (this as IDataProvider<object>)?.Get(document)?.ToString() ?? "";
|
||||
}
|
||||
|
||||
public interface IDataProvider<out T> : IDataProvider {
|
||||
public T Get(HtmlDocument document);
|
||||
//public HtmlNode? GetNode(HtmlDocument document);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Dynamic {
|
||||
public class StateChangerFactory {
|
||||
[JsonIgnore]
|
||||
public IStateChangeBehaviour Behavior => FactoryTable[StateChangerKey]();
|
||||
|
||||
[JsonInclude]
|
||||
public string StateChangerKey { get; set; }
|
||||
|
||||
[JsonConstructor]
|
||||
public StateChangerFactory(string stateChangerKey) {
|
||||
if (!Keys.Contains(stateChangerKey))
|
||||
throw new ArgumentException($"{stateChangerKey} not in keys list", nameof(stateChangerKey));
|
||||
StateChangerKey = stateChangerKey;
|
||||
}
|
||||
|
||||
public static Dictionary<string, Func<IStateChangeBehaviour>> FactoryTable = new() {
|
||||
{ LastAsNumber, () => CommonStateChangers.LastAsNumber },
|
||||
{ LastAsNumberPrefixed, () => CommonStateChangers.NthAsNumber(^1, true) },
|
||||
{ Constant, () => CommonStateChangers.Constant },
|
||||
};
|
||||
|
||||
public HashSet<string> Keys = [LastAsNumber, LastAsNumberPrefixed, Constant];
|
||||
public const string LastAsNumber = "LastAsNumber";
|
||||
public const string LastAsNumberPrefixed = "LastAsNumberPrefixed";
|
||||
public const string Constant = "Constant";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.1.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.7" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.7" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.7" />
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
|
||||
<ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Stealth\Beam.Stealth.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,186 @@
|
||||
using HtmlAgilityPack;
|
||||
using Beam.Playwright;
|
||||
using Beam.Stealth;
|
||||
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
private sealed class ContextStage : IContextStage {
|
||||
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
|
||||
private readonly AsyncTransformer<RawType, OutType> _transformer;
|
||||
private int _parallelism = 4;
|
||||
private bool _useFragments = false;
|
||||
private PlaywrightAsyncManipulator? _usePlaywrightManipulator = null;
|
||||
private StealthAsyncManipulator? _useStealthManipulator = null;
|
||||
private StealthConfig? _stealthConfig = null;
|
||||
|
||||
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
|
||||
_ctxBuilder = ctxBuilder;
|
||||
_transformer = transformer;
|
||||
}
|
||||
|
||||
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
|
||||
configure(_ctxBuilder);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage WithParallelism(int degree) {
|
||||
_parallelism = Math.Max(1, degree);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage WithTimeout(TimeSpan timeout) {
|
||||
_ctxBuilder.WithTimeOut(timeout);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage WithRetryReporter(IProgress<RetryReport> reporter) {
|
||||
_ctxBuilder.WithRetryReporter(reporter);
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePlaywright(PlaywrightAsyncManipulator)"/>
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public IContextStage UseFragments() {
|
||||
if (_usePlaywrightManipulator is not null)
|
||||
_usePlaywrightManipulator = null;
|
||||
|
||||
_useFragments = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Use a puppet browser to download the links. This strategy is mutually exclusive with <see cref="UseFragments"/>
|
||||
/// </summary>
|
||||
/// <param name="manipulator">The page manipulator</param>
|
||||
/// <returns></returns>
|
||||
public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) {
|
||||
if (_useFragments)
|
||||
_useFragments = false;
|
||||
if (_useStealthManipulator is not null)
|
||||
_useStealthManipulator = null;
|
||||
|
||||
_usePlaywrightManipulator = manipulator;
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
|
||||
if (_usePlaywrightManipulator is not null)
|
||||
_usePlaywrightManipulator = null;
|
||||
|
||||
_useStealthManipulator = manipulator;
|
||||
_stealthConfig = config;
|
||||
return this;
|
||||
}
|
||||
|
||||
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
|
||||
return (_useFragments, _useStealthManipulator, _usePlaywrightManipulator, _transformer, context.AsyncFailurePredicates) switch {
|
||||
// ──────────────── fragmented HTML ────────────────
|
||||
(true, null, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new UnitFragmentDownloader<OutType>(
|
||||
context.Web,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates,
|
||||
_parallelism,
|
||||
context.DownloadLogger),
|
||||
// ──────────────── fragmented binary ────────────────
|
||||
(true, null, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new UnitFragmentDownloaderBinary<OutType>(
|
||||
context.Client,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates,
|
||||
_parallelism,
|
||||
context.DownloadLogger),
|
||||
// ──────────────── single HTML ────────────────
|
||||
(false, null, null, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new UnitDownloader<OutType>(
|
||||
context.Web,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates),
|
||||
// ──────────────── single binary ────────────────
|
||||
(false, null, null, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new UnitDownloaderBinary<OutType>(
|
||||
context.Client,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates),
|
||||
// ──────────────── single playwright binary ────────────────
|
||||
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new PlaywrightUnitDownloader<OutType>(
|
||||
context.Client,
|
||||
manipulator,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates),
|
||||
// ──────────────── single playwrigt HTML ────────────────
|
||||
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new PlaywrightUnitPageDownloader<OutType>(
|
||||
context.Web,
|
||||
manipulator,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates),
|
||||
// ──────────────── single stealth HTML ────────────────
|
||||
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new StealthUnitPageDownloader<OutType>(
|
||||
context.Web,
|
||||
_stealthConfig ?? throw new Exception($"Stealth config is null"),
|
||||
manipulator,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates),
|
||||
// ──────────────── single stealth binary ────────────────
|
||||
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new StealthUnitDownloader<OutType>(
|
||||
context.Client,
|
||||
_stealthConfig ?? throw new Exception($"Stealth config is null"),
|
||||
manipulator,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates),
|
||||
// ──────────────── fragment stealth HTML ────────────────
|
||||
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] downloadFailurePredicates)
|
||||
=> new StealthFragmentPageDownloader<OutType>(
|
||||
context.Web,
|
||||
_stealthConfig ?? throw new Exception($"Stealth config is null"),
|
||||
manipulator,
|
||||
asyncHtmlTransformer),
|
||||
// ──────────────── fragment stealth binary ────────────────
|
||||
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new StealthFragmentDownloader<OutType>(
|
||||
context.Client,
|
||||
_stealthConfig ?? throw new Exception($"Stealth config is null"),
|
||||
manipulator,
|
||||
asyncBinaryTransformer),
|
||||
_ => throw new Exception($"Unsupported transformer / failure-predicate combinatAsyion. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
|
||||
};
|
||||
}
|
||||
|
||||
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
|
||||
var copyOfContext = context.CreateBuilder().Build();
|
||||
return _useFragments switch {
|
||||
true => new SequentialFragmentDownloader<RawType, OutType>(
|
||||
copyOfContext,
|
||||
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
|
||||
context.DownloadLogger).UnwrapFragmented(),
|
||||
false => new SequentialDownloader<RawType, OutType>(
|
||||
copyOfContext,
|
||||
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
|
||||
context.DownloadLogger).WrapOrdered()
|
||||
};
|
||||
}
|
||||
|
||||
public DownloadEnumerable<OutType> Build() {
|
||||
var context = _ctxBuilder.Build();
|
||||
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
|
||||
return enumerable;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
private sealed class DownloadStage(DownloadEnumerable<OutType> download) : IDownloadStage {
|
||||
private IAsyncEnumerable<Ordered<OutType>> _download = download;
|
||||
|
||||
public DownloadEnumerable<OutType> AsAsyncEnumerable() {
|
||||
return new DownloadEnumerable<OutType>(_download.GetAsyncEnumerator());
|
||||
}
|
||||
|
||||
private async IAsyncEnumerable<Ordered<OutType>> _SaveToDirectory(string dir) {
|
||||
Directory.CreateDirectory(dir);
|
||||
await foreach(var download in _download) {
|
||||
await System.IO.File.WriteAllTextAsync(Path.Combine(dir, $"{Path.GetRandomFileName()}.{download.Order}.json"), JsonSerializer.Serialize(dir));
|
||||
yield return download;
|
||||
}
|
||||
}
|
||||
|
||||
public IDownloadStage SaveToDirectory(string dir) {
|
||||
_download = _SaveToDirectory(dir);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IDownloadStage SaveToFiles(IEnumerable<string> files) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public IDownloadStage SaveToMemory(ConcurrentBag<OutType> bag) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public void WaitForDownload() {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public Task WaitForDownloadAsync() {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
public interface IAlternativeLinkStage {
|
||||
IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
public interface IAlternativeTransformStage {
|
||||
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
|
||||
IContextStage WithTransformer(Func<RawType, OutType> transformer) {
|
||||
return WithTransformer(rt => Task.FromResult(transformer(rt)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
using Beam.Playwright;
|
||||
using Beam.Stealth;
|
||||
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
public interface IContextStage {
|
||||
IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure);
|
||||
IContextStage WithParallelism(int degree);
|
||||
IContextStage WithTimeout(TimeSpan timeout);
|
||||
IContextStage WithRetryReporter(IProgress<RetryReport> reporter);
|
||||
DownloadEnumerable<OutType> Build();
|
||||
IContextStage UseFragments();
|
||||
IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator);
|
||||
IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
public interface IDownloadStage {
|
||||
IDownloadStage SaveToDirectory(string dir);
|
||||
IDownloadStage SaveToFiles(IEnumerable<string> files);
|
||||
IDownloadStage SaveToMemory(ConcurrentBag<OutType> bag);
|
||||
DownloadEnumerable<OutType> AsAsyncEnumerable();
|
||||
void WaitForDownload();
|
||||
Task WaitForDownloadAsync();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
/* ────────────────────────────── Stages ─────────────────────────────── */
|
||||
|
||||
public interface ILinkStage {
|
||||
ITransformStage WithLink();
|
||||
ITransformStage WithLinkGenerator();
|
||||
ILinkStage WithRange(Range range);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
using Beam.Dynamic;
|
||||
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
public interface ITransformStage {
|
||||
IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
/* ──────────────────────────── Stage types ─────────────────────────── */
|
||||
|
||||
|
||||
private sealed record LinkStage(
|
||||
WebResource Source,
|
||||
State Initial,
|
||||
BeamDataContext Data,
|
||||
DownloadContextBuilder<RawType> CtxBuilder) : ILinkStage, IAlternativeLinkStage {
|
||||
|
||||
private State? endState;
|
||||
private bool linksFrozen = false;
|
||||
|
||||
public ITransformStage WithLink() {
|
||||
var link = Data.Templates[Source.Key].Builder.Build(Initial);
|
||||
CtxBuilder.WithLinks([link]);
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public ITransformStage WithLinkGenerator() {
|
||||
var template = Data.Templates[Source.Key];
|
||||
var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
||||
template.Builder,
|
||||
new NumberedStateChanger(template.Factory.Behavior),
|
||||
Initial, endState));
|
||||
CtxBuilder.WithLinks(generator);
|
||||
linksFrozen = true;
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links) {
|
||||
CtxBuilder.WithLinks(links);
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public ILinkStage WithRange(Range range) {
|
||||
if (linksFrozen)
|
||||
throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
|
||||
if (range.End.Value < range.Start.Value)
|
||||
throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end");
|
||||
var template = Data.Templates[Source.Key];
|
||||
var stateChanger = new NumberedStateChanger(template.Factory.Behavior);
|
||||
endState = Initial.Copy();
|
||||
stateChanger.Apply(Initial, range.Start.Value - 1);
|
||||
stateChanger.Apply(endState, range.End.Value - 1);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
/* ────────────────────────── Implementation ────────────────────────── */
|
||||
private enum SourceKind { Meta, Text }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
using Beam.Dynamic;
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Fluent {
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
private sealed record TransformStage(
|
||||
WebResource Source,
|
||||
BeamDataContext Data,
|
||||
DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage, IAlternativeTransformStage {
|
||||
public IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory) {
|
||||
var transformer = factory(Data.Bindings[Source.Bindings]);
|
||||
return new ContextStage(CtxBuilder, transformer);
|
||||
}
|
||||
|
||||
public IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
|
||||
return new ContextStage(CtxBuilder, transformer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
using aeqw89.DataKeys;
|
||||
using Beam;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Fluent {
|
||||
/// <summary>
|
||||
/// Type‑safe, staged builder that prevents callers from forgetting the mandatory steps
|
||||
/// (source → link selection → transformer) and surfaces operational knobs as first‑class
|
||||
/// methods instead of magic parameters.
|
||||
/// </summary>
|
||||
public static partial class DownloadBuilder<RawType, OutType> {
|
||||
/* ──────────────────────────── Entry points ─────────────────────────── */
|
||||
|
||||
public static ILinkStage FromResource(DataKey<ResourceDictionary> dict, string kind, BeamDataContext beamDataDictionary)
|
||||
=> Create(dict, beamDataDictionary, kind);
|
||||
|
||||
public static IAlternativeLinkStage FromScratch()
|
||||
=> new LinkStage(null!, null!, null!, new());
|
||||
|
||||
private static ILinkStage Create(DataKey<ResourceDictionary> resourceDict, BeamDataContext data, string kind) {
|
||||
var (source, initial) = Resolve(resourceDict, kind, data);
|
||||
var ctxBuilder = new DownloadContextBuilder<RawType>().WithLinks(Array.Empty<SourceLink>()); // placeholder, filled later.
|
||||
return new LinkStage(source, initial, data, ctxBuilder);
|
||||
}
|
||||
|
||||
private static (WebResource Source, State Initial) Resolve(DataKey<ResourceDictionary> resourceDict, string kind, BeamDataContext data) {
|
||||
if (!data.ResourceDictionaries.TryGetValue(resourceDict, out var dict))
|
||||
throw new KeyNotFoundException($"Novel '{resourceDict}' not found in BeamDataDictionary.");
|
||||
if (!dict.Resources.TryGetValue(kind, out var sourceKey))
|
||||
throw new KeyNotFoundException($"Novel kind '{kind}' not found in '{resourceDict}'");
|
||||
if (!data.Resources.TryGetValue(sourceKey, out var source))
|
||||
throw new KeyNotFoundException($"Novel source '{sourceKey}' was not found");
|
||||
if (!data.InitialStates.TryGetValue(sourceKey.To<ImmutableState>(), out var istate))
|
||||
throw new KeyNotFoundException($"Immutable state for kind '{kind}' not found");
|
||||
return (source, istate);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj" />
|
||||
<ProjectReference Include="..\Beam\Beam.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="EntityFramework" Version="6.5.1" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="9.0.8">
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Tools" Version="9.0.8">
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,50 @@
|
||||
using aeqw89.PersistentData;
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading.Tasks;
|
||||
using System.Data.Entity;
|
||||
|
||||
|
||||
namespace Beam.Models {
|
||||
public class BeamDataContext : BaseDataDictionary {
|
||||
public Dictionary<DataKey<Template>, Template> Templates {
|
||||
get => GetOrCreateDictionary<DataKey<Template>, Template>(nameof(Templates));
|
||||
set => Set(nameof(Templates), value);
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<DataBindings>, DataBindings> Bindings {
|
||||
get => GetOrCreateDictionary<DataKey<DataBindings>, DataBindings>(nameof(Bindings));
|
||||
set => Set(nameof(Bindings), value);
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, HashSet<DataKey<ResourceDictionary>>> AggregatorNovels {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, HashSet<DataKey<ResourceDictionary>>>(nameof(AggregatorNovels));
|
||||
set => Set(nameof(AggregatorNovels), value);
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, WebResource> Resources {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Resources));
|
||||
set => Set(nameof(Resources), value);
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<ResourceDictionary>, ResourceDictionary> ResourceDictionaries {
|
||||
get => GetOrCreateDictionary<DataKey<ResourceDictionary>, ResourceDictionary>(nameof(ResourceDictionaries));
|
||||
set => Set(nameof(ResourceDictionaries), value);
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<ImmutableState>, ImmutableState> InitialStates {
|
||||
get => GetOrCreateDictionary<DataKey<ImmutableState>, ImmutableState>(nameof(InitialStates));
|
||||
set => Set(nameof(InitialStates), value);
|
||||
}
|
||||
|
||||
internal Dictionary<DataKey<File>, File> Files {
|
||||
get => GetOrCreateDictionary<DataKey<File>, File>(nameof(Files));
|
||||
set => Set(nameof(Files), value);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Beam.Temporary.Cli {
|
||||
namespace Beam.Models {
|
||||
internal class File(string path, params string[] tags) {
|
||||
public string Path { get; set; } = path;
|
||||
public string[] Tags { get; set; } = tags;
|
||||
@@ -5,7 +5,7 @@ using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
namespace Beam.Models {
|
||||
internal class LinkCollection(DataKey<string> key, List<SourceLink> links) {
|
||||
public DataKey<string> Key { get; set; } = key;
|
||||
public List<SourceLink> Links { get; set; } = links;
|
||||
@@ -0,0 +1,25 @@
|
||||
|
||||
|
||||
using aeqw89.DataKeys;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Models {
|
||||
public class ResourceDictionary : IKeyed<ResourceDictionary> {
|
||||
public required DataKey<ResourceDictionary> Key { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public string? FriendlyName { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
|
||||
public Dictionary<string, DataKey<WebResource>> Resources { get; set; } = [];
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
|
||||
public Dictionary<DataKey<WebResource>, ImmutableState> InitialStates { get; set; } = [];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Models {
|
||||
public record class Template : IKeyed<Template> {
|
||||
public required DataKey<Template> Key { get; set; }
|
||||
public required StateChangerFactory Factory { get; set; }
|
||||
public required SourceLinkBuilder Builder { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Beam.Temporary.Cli {
|
||||
namespace Beam.Models {
|
||||
internal class Tracked<T>(T obj) {
|
||||
public T TrackedObject { get; set; } = obj;
|
||||
public bool IsDirty { get; set; } = true;
|
||||
@@ -7,7 +7,14 @@ using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
namespace Beam.Models {
|
||||
/// <summary>
|
||||
/// Represents a specific resource accessible online; e.g. a book's contents.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// A resource should be one-to-one with a web request.
|
||||
/// </remarks>
|
||||
/// <param name="key"></param>
|
||||
public class WebResource(DataKey<WebResource> key) : IKeyed<WebResource> {
|
||||
public DataKey<WebResource> Key { get; set; } = key;
|
||||
|
||||
@@ -19,10 +26,10 @@ namespace Beam.Temporary.Cli {
|
||||
|
||||
public WebResource() : this(new(string.Empty)) { }
|
||||
|
||||
public WebResourceRecord ToRecord(BeamDataDictionary sdd) {
|
||||
return new WebResourceRecord(this, sdd.Bindings[Bindings]);
|
||||
}
|
||||
}
|
||||
//public Entity ToRecord(BeamDataDictionary sdd) {
|
||||
// return new Entity(this, sdd.Bindings[Bindings]);
|
||||
//}
|
||||
|
||||
public record WebResourceRecord(WebResource Resource, DataBindings Bindings);
|
||||
//public record class Entity(WebResource Resource, DataBindings Bindings);
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,7 @@ using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Puppeteer {
|
||||
internal class PuppetConfig {
|
||||
namespace Beam.Playwright {
|
||||
internal class PlaywrightConfig {
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
using Microsoft.Playwright;
|
||||
|
||||
namespace Beam.Puppeteer {
|
||||
public delegate Task AsyncManipulator(IPage page);
|
||||
namespace Beam.Playwright {
|
||||
public delegate Task PlaywrightAsyncManipulator(IPage page);
|
||||
|
||||
public static class PuppetContext {
|
||||
public static class PlaywrightContext {
|
||||
public static Lazy<IPlaywright> Playwright { get; set; }
|
||||
public static Lazy<IBrowser> Browser { get; set; }
|
||||
|
||||
@@ -11,7 +11,7 @@ namespace Beam.Puppeteer {
|
||||
|
||||
//}
|
||||
|
||||
static PuppetContext() {
|
||||
static PlaywrightContext() {
|
||||
Playwright = new Lazy<IPlaywright>(() => Microsoft.Playwright.Playwright.CreateAsync().Result);
|
||||
Browser = new Lazy<IBrowser>(() => Playwright.Value.Chromium.LaunchAsync().Result);
|
||||
}
|
||||
+5
-5
@@ -1,16 +1,16 @@
|
||||
using Microsoft.Playwright;
|
||||
|
||||
namespace Beam.Puppeteer {
|
||||
public class PuppetUnitDownloader<T> : UnitDownloaderBinary<T> {
|
||||
public AsyncManipulator PuppetManipulator { get; }
|
||||
namespace Beam.Playwright {
|
||||
public class PlaywrightUnitDownloader<T> : UnitDownloaderBinary<T> {
|
||||
public PlaywrightAsyncManipulator PuppetManipulator { get; }
|
||||
|
||||
public PuppetUnitDownloader(HttpClient client, AsyncManipulator puppetManipulator, AsyncTransformer<ByteDocument, T> asyncHtmlTransformer, AsyncDownloadFailurePredicate<ByteDocument>[] asyncDownloadFailurePredicates)
|
||||
public PlaywrightUnitDownloader(HttpClient client, PlaywrightAsyncManipulator puppetManipulator, AsyncTransformer<ByteDocument, T> asyncHtmlTransformer, AsyncDownloadFailurePredicate<ByteDocument>[] asyncDownloadFailurePredicates)
|
||||
: base(client, asyncHtmlTransformer, asyncDownloadFailurePredicates) {
|
||||
PuppetManipulator = puppetManipulator;
|
||||
}
|
||||
|
||||
protected override async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
||||
var page = await PuppetContext.Browser.Value.NewPageAsync();
|
||||
var page = await PlaywrightContext.Browser.Value.NewPageAsync();
|
||||
try {
|
||||
await page.GotoAsync(link);
|
||||
await PuppetManipulator(page);
|
||||
+5
-5
@@ -2,17 +2,17 @@
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Playwright;
|
||||
|
||||
namespace Beam.Puppeteer {
|
||||
public class PuppetUnitPageDownloader<T> : UnitDownloader<T> {
|
||||
public AsyncManipulator PuppetManipulator { get; }
|
||||
namespace Beam.Playwright {
|
||||
public class PlaywrightUnitPageDownloader<T> : UnitDownloader<T> {
|
||||
public PlaywrightAsyncManipulator PuppetManipulator { get; }
|
||||
|
||||
public PuppetUnitPageDownloader(HtmlWeb web, AsyncManipulator puppetManipulator, AsyncTransformer<HtmlDocument, T> asyncHtmlTransformer, AsyncDownloadFailurePredicate<HtmlDocument>[] asyncDownloadFailurePredicates)
|
||||
public PlaywrightUnitPageDownloader(HtmlWeb web, PlaywrightAsyncManipulator puppetManipulator, AsyncTransformer<HtmlDocument, T> asyncHtmlTransformer, AsyncDownloadFailurePredicate<HtmlDocument>[] asyncDownloadFailurePredicates)
|
||||
: base(web, asyncHtmlTransformer, asyncDownloadFailurePredicates) {
|
||||
PuppetManipulator = puppetManipulator;
|
||||
}
|
||||
|
||||
protected override async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
||||
var page = await PuppetContext.Browser.Value.NewPageAsync();
|
||||
var page = await PlaywrightContext.Browser.Value.NewPageAsync();
|
||||
try {
|
||||
await page.GotoAsync(link);
|
||||
await PuppetManipulator(page);
|
||||
@@ -0,0 +1,17 @@
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.7" />
|
||||
<PackageReference Include="Selenium.WebDriver" Version="4.34.0" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,10 @@
|
||||
using OpenQA.Selenium;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public delegate Task StealthAsyncManipulator(IWebDriver driver);
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
using OpenQA.Selenium.Firefox;
|
||||
using OpenQA.Selenium.Chrome;
|
||||
using OpenQA.Selenium.Edge;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public enum Browser {
|
||||
Firefox,
|
||||
Chrome,
|
||||
Chromium,
|
||||
Edge
|
||||
}
|
||||
|
||||
public sealed class StealthConfig : IDisposable {
|
||||
public bool ShowBrowser { get; init; }
|
||||
public TimeSpan TimeOut { get; init; } = Timeout.InfiniteTimeSpan;
|
||||
public string DownloadsDirectory { get; }
|
||||
public ILogger? Logger { get; init; }
|
||||
public required IWebDriver Driver { get; init; }
|
||||
|
||||
private StealthConfig(string downloadDir) => DownloadsDirectory = downloadDir;
|
||||
|
||||
/* ---------- browser-specific option builders ---------- */
|
||||
|
||||
private static FirefoxOptions GetFirefoxOptions(string downloadDir, bool headless) {
|
||||
var o = new FirefoxOptions();
|
||||
if (headless) o.AddArgument("--headless");
|
||||
|
||||
o.SetPreference("browser.download.folderList", 2); // use custom dir
|
||||
o.SetPreference("browser.download.dir", downloadDir);
|
||||
o.SetPreference("browser.download.useDownloadDir", true);
|
||||
o.SetPreference("browser.helperApps.neverAsk.saveToDisk",
|
||||
"application/octet-stream,application/pdf,application/zip");
|
||||
o.SetPreference("pdfjs.disabled", true); // open PDFs externally
|
||||
o.SetPreference("browser.download.manager.showWhenStarting", false);
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
private static ChromeOptions GetChromeOptions(string downloadDir, bool headless) {
|
||||
var o = new ChromeOptions();
|
||||
if (headless) o.AddArgument("--headless=new");
|
||||
|
||||
// download prefs
|
||||
o.AddUserProfilePreference("download.default_directory", downloadDir);
|
||||
o.AddUserProfilePreference("download.prompt_for_download", false);
|
||||
o.AddUserProfilePreference("safebrowsing.enabled", false);
|
||||
|
||||
// common stability flags
|
||||
o.AddArgument("--no-sandbox");
|
||||
o.AddArgument("--disable-dev-shm-usage");
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
private static EdgeOptions GetEdgeOptions(string downloadDir, bool headless) {
|
||||
var o = new EdgeOptions();
|
||||
if (headless) o.AddArgument("--headless=new");
|
||||
|
||||
o.AddUserProfilePreference("download.default_directory", downloadDir);
|
||||
o.AddUserProfilePreference("download.prompt_for_download", false);
|
||||
o.AddUserProfilePreference("safebrowsing.enabled", false);
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
/* ---------- factory ---------- */
|
||||
|
||||
public static StealthConfig Create(
|
||||
bool showBrowser = false,
|
||||
string? downloadDir = null,
|
||||
TimeSpan? timeOut = null,
|
||||
Browser browser = Browser.Firefox,
|
||||
ILogger? logger = null) {
|
||||
// pick or create a dedicated download folder
|
||||
downloadDir ??= Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
|
||||
Directory.CreateDirectory(downloadDir);
|
||||
|
||||
bool headless = !showBrowser;
|
||||
|
||||
IWebDriver driver = browser switch {
|
||||
Browser.Chrome or Browser.Chromium
|
||||
=> new ChromeDriver(GetChromeOptions(downloadDir, headless)),
|
||||
Browser.Edge
|
||||
=> new EdgeDriver(GetEdgeOptions(downloadDir, headless)),
|
||||
Browser.Firefox or _
|
||||
=> new FirefoxDriver(GetFirefoxOptions(downloadDir, headless)),
|
||||
};
|
||||
|
||||
return new StealthConfig(downloadDir) {
|
||||
ShowBrowser = showBrowser,
|
||||
TimeOut = timeOut ?? Timeout.InfiniteTimeSpan,
|
||||
Logger = logger,
|
||||
Driver = driver
|
||||
};
|
||||
}
|
||||
|
||||
public void Dispose() {
|
||||
Driver.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthFragmentDownloader<T> : UnitFragmentDownloaderBinary<T> {
|
||||
public StealthFragmentDownloader(HttpClient client, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<ByteDocument, T> transformer, AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicate = null, int fragmentSize = 4, ILogger? logger = null) : base(client, transformer, failurePredicate, fragmentSize, logger, new StealthUnitDownloader<T>(client, config, manipulator, transformer, failurePredicate)) {}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthFragmentPageDownloader<T> : UnitFragmentDownloader<T> {
|
||||
public StealthFragmentPageDownloader(HtmlWeb web, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null, int fragmentSize = 4, ILogger? logger = null) : base(web, transformer, failurePredicate, fragmentSize, logger, new StealthUnitPageDownloader<T>(web, config, manipulator, transformer, failurePredicate)) {}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium.Chrome;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthUnitDownloader<T> : UnitDownloaderBinary<T> {
|
||||
public StealthConfig Config { get; }
|
||||
public StealthAsyncManipulator Manipulator { get; }
|
||||
|
||||
private ILogger? Logger => Config.Logger;
|
||||
|
||||
public StealthUnitDownloader(HttpClient client, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<ByteDocument, T> transformer, AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicates = null) : base(client, transformer, failurePredicates) {
|
||||
Config = config;
|
||||
Manipulator = manipulator;
|
||||
}
|
||||
|
||||
protected override async Task<(bool Success, T? Result)> TryDownloadWithNoRetries(
|
||||
string link, CancellationToken ct) {
|
||||
try {
|
||||
Logger?.LogInformation("Navigating to {Link}", link);
|
||||
|
||||
var driver = Config.Driver;
|
||||
await driver.Navigate().GoToUrlAsync(link);
|
||||
await Manipulator(driver);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
ByteDocument? doc = await WaitForDownloadAsync(link, sw, ct);
|
||||
|
||||
if (doc is null || await IsFailure(doc))
|
||||
return (false, default);
|
||||
|
||||
Logger?.LogInformation("Download finished in {Elapsed}", sw.Elapsed);
|
||||
return (true, await Transformer(doc));
|
||||
} catch (Exception ex) {
|
||||
Logger?.LogError(ex, "Error occurred downloading {Link}", link);
|
||||
return (false, default);
|
||||
}
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
private async Task<ByteDocument?> WaitForDownloadAsync(
|
||||
string link, Stopwatch sw, CancellationToken ct) {
|
||||
const int PollDelayMs = 250; // how often we look
|
||||
const int StableDelayMs = 1000; // size-unchanged window
|
||||
|
||||
string dir = Config.DownloadsDirectory;
|
||||
string? finalPath = null;
|
||||
long lastSize = -1;
|
||||
DateTime lastChange = DateTime.UtcNow;
|
||||
|
||||
bool IsTemp(string p) =>
|
||||
p.EndsWith(".crdownload", StringComparison.OrdinalIgnoreCase) ||
|
||||
p.EndsWith(".part", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
Logger?.LogDebug("Polling {Dir} for download files", dir);
|
||||
|
||||
while (sw.Elapsed < Config.TimeOut && !ct.IsCancellationRequested) {
|
||||
// current files in the directory
|
||||
var files = Directory.EnumerateFiles(dir, "*", SearchOption.TopDirectoryOnly).ToArray();
|
||||
|
||||
// ignore temp names; pick (or re-pick) the first real candidate
|
||||
finalPath ??= files.FirstOrDefault(f => !IsTemp(f));
|
||||
|
||||
// still nothing but temps – keep waiting
|
||||
if (finalPath is null) {
|
||||
await Task.Delay(PollDelayMs, ct);
|
||||
continue;
|
||||
}
|
||||
|
||||
// track growth
|
||||
long size = new FileInfo(finalPath).Length;
|
||||
if (size == 0 || size != lastSize) {
|
||||
lastSize = size;
|
||||
lastChange = DateTime.UtcNow;
|
||||
await Task.Delay(PollDelayMs, ct);
|
||||
continue;
|
||||
}
|
||||
|
||||
// size stable long enough *and* no temp files left?
|
||||
bool tempsRemain = files.Any(IsTemp);
|
||||
if ((DateTime.UtcNow - lastChange).TotalMilliseconds < StableDelayMs || tempsRemain) {
|
||||
await Task.Delay(PollDelayMs, ct);
|
||||
continue;
|
||||
}
|
||||
|
||||
// wait until writer releases lock
|
||||
while (true) {
|
||||
try {
|
||||
using FileStream _ =
|
||||
File.Open(finalPath, FileMode.Open, FileAccess.Read, FileShare.None);
|
||||
break;
|
||||
} catch (IOException) {
|
||||
await Task.Delay(200, ct);
|
||||
}
|
||||
}
|
||||
|
||||
byte[] bytes = await File.ReadAllBytesAsync(finalPath, ct);
|
||||
Logger?.LogInformation("Download completed {Path} ({Size} bytes)",
|
||||
finalPath, bytes.Length);
|
||||
|
||||
return new ByteDocument(Path.GetFileName(finalPath), bytes);
|
||||
}
|
||||
|
||||
Logger?.LogWarning("Download timed out after {Elapsed}", sw.Elapsed);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthUnitPageDownloader<T> : UnitDownloader<T> {
|
||||
public StealthConfig Config { get; }
|
||||
public StealthAsyncManipulator Manipulator { get; }
|
||||
|
||||
private ILogger? Logger => Config.Logger;
|
||||
|
||||
public StealthUnitPageDownloader(HtmlWeb web, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null) : base(web, transformer, failurePredicate) {
|
||||
Config = config;
|
||||
Manipulator = manipulator;
|
||||
}
|
||||
|
||||
protected async override Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
||||
try {
|
||||
var driver = Config.Driver;
|
||||
|
||||
await driver.Navigate().GoToUrlAsync(link);
|
||||
await Manipulator(driver);
|
||||
|
||||
HtmlDocument doc = new();
|
||||
doc.LoadHtml(driver.PageSource);
|
||||
|
||||
if (await IsFailure(doc))
|
||||
return (false, default);
|
||||
|
||||
return (true, await Transformer(doc));
|
||||
} catch (Exception e) {
|
||||
Logger?.LogError(e, "Error occurred downloading {}", link);
|
||||
return (false, default);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,25 +14,25 @@ namespace Beam.Temporary.Cli {
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public partial interface IArchitecture {
|
||||
/// <summary>
|
||||
/// Gets the metadata associated with a <see cref="TextResource"/>
|
||||
/// </summary>
|
||||
/// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||
/// <param name="pieceKey">The key of the <see cref="TextResource"/> stored in the <paramref name="sdd"/></param>
|
||||
/// <param name="sdd">The <see cref="BeamDataDictionary"/> to be used to retrieve information</param>
|
||||
/// <param name="logger">Optional logger for logging debug information</param>
|
||||
/// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null);
|
||||
/// <summary>
|
||||
/// Gets the <see cref="DownloadContext{T}"/> of the text record associated with <see cref="TextResource"/>
|
||||
/// </summary>
|
||||
/// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||
/// <param name="pieceKey">The key of the <see cref="TextResource"/> stored in the <paramref name="sdd"/></param>
|
||||
/// <param name="sdd">The <see cref="BeamDataDictionary"/> to be used to retrieve information</param>
|
||||
/// <param name="metadata">Optional book metadata to include with the final text record</param>
|
||||
/// <param name="logger">Optional logger for logging debug information</param>
|
||||
/// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> pieceKey, BeamDataDictionary sdd, IDocumentMetaData? metadata = null, CancellationToken ct = default, ILogger? logger = null);
|
||||
///// <summary>
|
||||
///// Gets the metadata associated with a <see cref="ResourceDictionary"/>
|
||||
///// </summary>
|
||||
///// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||
///// <param name="pieceKey">The key of the <see cref="ResourceDictionary"/> stored in the <paramref name="sdd"/></param>
|
||||
///// <param name="sdd">The <see cref="BeamDataDictionary"/> to be used to retrieve information</param>
|
||||
///// <param name="logger">Optional logger for logging debug information</param>
|
||||
///// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||
//public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<ResourceDictionary> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null);
|
||||
///// <summary>
|
||||
///// Gets the <see cref="DownloadContext{T}"/> of the text record associated with <see cref="ResourceDictionary"/>
|
||||
///// </summary>
|
||||
///// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
|
||||
///// <param name="pieceKey">The key of the <see cref="ResourceDictionary"/> stored in the <paramref name="sdd"/></param>
|
||||
///// <param name="sdd">The <see cref="BeamDataDictionary"/> to be used to retrieve information</param>
|
||||
///// <param name="metadata">Optional book metadata to include with the final text record</param>
|
||||
///// <param name="logger">Optional logger for logging debug information</param>
|
||||
///// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
|
||||
//public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<ResourceDictionary> pieceKey, BeamDataDictionary sdd, IDocumentMetaData? metadata = null, CancellationToken ct = default, ILogger? logger = null);
|
||||
|
||||
/// <summary>
|
||||
/// The <see cref="DataKey{IDocumentMetaData}"/> to use when looking for the chapter metadata
|
||||
|
||||
@@ -7,11 +7,15 @@
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1" />
|
||||
<Compile Include="HtmlBook.cs.obsolete" />
|
||||
<Compile Include="HtmlBookTemplates.cs.obsolete" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.1.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.7" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.7" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.7" />
|
||||
<PackageReference Include="OpenAI" Version="2.1.0" />
|
||||
<PackageReference Include="Spectre.Console" Version="0.49.2-preview.0.70" />
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||
@@ -23,7 +27,12 @@
|
||||
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Puppeteer\Beam.Puppeteer.csproj">
|
||||
<ProjectReference Include="..\Beam.Fluent\Beam.Fluent.csproj" />
|
||||
<ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
|
||||
<ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Stealth\Beam.Stealth.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
using aeqw89.PersistentData;
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class BeamDataDictionary : BaseDataDictionary {
|
||||
public Dictionary<DataKey<WebResource>, Template> Templates {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, Template>(nameof(Templates));
|
||||
set => Data[nameof(Templates)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, WebResource> Aggregators {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Aggregators));
|
||||
set => Data[nameof(Aggregators)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, WebResource> Auxillaries {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, WebResource>(nameof(Auxillaries));
|
||||
set => Data[nameof(Auxillaries)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<DataBindings>, DataBindings> Bindings {
|
||||
get => GetOrCreateDictionary<DataKey<DataBindings>, DataBindings>(nameof(Bindings));
|
||||
set => Data[nameof(Bindings)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<WebResource>, HashSet<DataKey<TextResource>>> AggregatorNovels {
|
||||
get => GetOrCreateDictionary<DataKey<WebResource>, HashSet<DataKey<TextResource>>>(nameof(AggregatorNovels));
|
||||
set => Data[nameof(AggregatorNovels)] = value;
|
||||
}
|
||||
|
||||
public Dictionary<DataKey<TextResource>, TextResource> Novels {
|
||||
get => GetOrCreateDictionary<DataKey<TextResource>, TextResource>(nameof(Novels));
|
||||
set => Data[nameof(Novels)] = value;
|
||||
}
|
||||
|
||||
internal Dictionary<DataKey<File>, File> Files {
|
||||
get => GetOrCreateDictionary<DataKey<File>, File>(nameof(Files));
|
||||
set => Data[nameof(Files)] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,8 +9,8 @@ using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public static class CommonTransformers {
|
||||
public static AsyncTransformer<HtmlDocument, IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||
return Task.FromResult<IDocumentMetaData>(new ArticleData() {
|
||||
public static AsyncTransformer<HtmlDocument, ArticleData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||
return Task.FromResult(new ArticleData() {
|
||||
Authors = binding?.Authors?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.Get(x) ?? ""),
|
||||
Categories = binding?.Tags?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
|
||||
@@ -18,7 +18,18 @@ namespace Beam.Temporary.Cli {
|
||||
});
|
||||
};
|
||||
|
||||
public static AsyncTransformer<HtmlDocument, IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
|
||||
public static AsyncTransformer<HtmlDocument, TableOfContentsData> TableOfContentsTransformer(DataBindings? binding) => (x) => {
|
||||
return Task.FromResult(new TableOfContentsData() {
|
||||
Authors = binding?.Authors?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.Get(x) ?? ""),
|
||||
Categories = binding?.Tags?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
|
||||
Description = OnlineCleaner.Clean(binding?.Description?.Get(x) ?? ""),
|
||||
ContentLinks = binding?.TableOfContents?.Get(x) ?? [],
|
||||
PagesLinks = binding?.PagesDropDown?.Get(x) ?? []
|
||||
});
|
||||
};
|
||||
|
||||
public static AsyncTransformer<HtmlDocument, StringDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
|
||||
var resolved = binding?.Resolve(x);
|
||||
var articleData = new ArticleData() {
|
||||
Name = OnlineCleaner.Clean(resolved?.Title),
|
||||
@@ -27,7 +38,7 @@ namespace Beam.Temporary.Cli {
|
||||
meta.Add(IArchitecture.Default.ChapterKey, articleData);
|
||||
if (metaData is not null)
|
||||
meta.Add(IArchitecture.Default.BookKey, metaData);
|
||||
return Task.FromResult<IDocument>(new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
|
||||
return Task.FromResult(new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
|
||||
MetaData = meta
|
||||
});
|
||||
};
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class CssData {
|
||||
// Primary background color (e.g., for the body)
|
||||
public string PrimaryColor { get; set; } = "#f5f5f5";
|
||||
|
||||
// Secondary color (e.g., for header background)
|
||||
public string SecondaryColor { get; set; } = "#e0e0e0";
|
||||
|
||||
// Tertiary color (e.g., for content sections)
|
||||
public string TertiaryColor { get; set; } = "#ffffff";
|
||||
|
||||
// Button background color
|
||||
public string ButtonColor { get; set; } = "#007bff";
|
||||
|
||||
// Foreground text color
|
||||
public string ForegroundColor { get; set; } = "#333333";
|
||||
|
||||
// Font family for main content
|
||||
public string ContentFont { get; set; } = "Arial, sans-serif";
|
||||
|
||||
// Font size for main content
|
||||
public string ContentFontSize { get; set; } = "16px";
|
||||
|
||||
// Font family for titles
|
||||
public string TitleFont { get; set; } = "Georgia, serif";
|
||||
|
||||
// Font size for titles
|
||||
public string TitleFontSize { get; set; } = "32px";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
//namespace Beam.Temporary.Cli {
|
||||
// public class CssData {
|
||||
// // Primary background color (e.g., for the body)
|
||||
// public string PrimaryColor { get; set; } = "#f5f5f5";
|
||||
|
||||
// // Secondary color (e.g., for header background)
|
||||
// public string SecondaryColor { get; set; } = "#e0e0e0";
|
||||
|
||||
// // Tertiary color (e.g., for content sections)
|
||||
// public string TertiaryColor { get; set; } = "#ffffff";
|
||||
|
||||
// // Button background color
|
||||
// public string ButtonColor { get; set; } = "#007bff";
|
||||
|
||||
// // Foreground text color
|
||||
// public string ForegroundColor { get; set; } = "#333333";
|
||||
|
||||
// // Font family for main content
|
||||
// public string ContentFont { get; set; } = "Arial, sans-serif";
|
||||
|
||||
// // Font size for main content
|
||||
// public string ContentFontSize { get; set; } = "16px";
|
||||
|
||||
// // Font family for titles
|
||||
// public string TitleFont { get; set; } = "Georgia, serif";
|
||||
|
||||
// // Font size for titles
|
||||
// public string TitleFontSize { get; set; } = "32px";
|
||||
// }
|
||||
//}
|
||||
@@ -1,34 +0,0 @@
|
||||
|
||||
|
||||
using aeqw89.DataKeys;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal static class DataKeyExtensions {
|
||||
public static DataKey WithNamespace(this DataKey dk, string @namespace) {
|
||||
string[] names = @namespace.Split(':');
|
||||
var agg = (string x, string y) => $"{x}:{y}";
|
||||
for (int i = 0; i < names.Length; i++) {
|
||||
string test = names.SkipLast(i).Aggregate(agg);
|
||||
if (dk.Identifier.StartsWith(test)) {
|
||||
return new DataKey(dk.Identifier.Replace(test, @namespace));
|
||||
}
|
||||
}
|
||||
|
||||
return new DataKey(@namespace + ":" + dk.Identifier);
|
||||
}
|
||||
|
||||
public static DataKey<T> WithNamespace<T>(this DataKey<T> dk, string @namespace) {
|
||||
return ((DataKey)dk).WithNamespace(@namespace).As<T>();
|
||||
}
|
||||
|
||||
public static DataKey<T> WithSuffix<T>(this DataKey<T> dk, string suffix) {
|
||||
return new DataKey<T>(dk.Identifier + suffix);
|
||||
}
|
||||
|
||||
public static DataKey ToAggregator(this DataKey dk)
|
||||
=> dk.WithNamespace("aeqw89:document:aggregators");
|
||||
public static DataKey ToAuxiliary(this DataKey dk)
|
||||
=> dk.WithNamespace("aeqw89:document:auxillaries");
|
||||
public static DataKey<T> As<T>(this DataKey dk) => new DataKey<T>(dk.Identifier);
|
||||
}
|
||||
}
|
||||
@@ -1,284 +0,0 @@
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using Beam;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using HtmlAgilityPack;
|
||||
using Beam.Puppeteer;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
/// <summary>
|
||||
/// Type‑safe, staged builder that prevents callers from forgetting the mandatory steps
|
||||
/// (source → link selection → transformer) and surfaces operational knobs as first‑class
|
||||
/// methods instead of magic parameters.
|
||||
/// </summary>
|
||||
public static class DownloadBuilder<RawType, OutType> {
|
||||
/* ──────────────────────────── Entry points ─────────────────────────── */
|
||||
|
||||
public static ILinkStage FromMeta(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
|
||||
Create(novelKey, data, SourceKind.Meta);
|
||||
|
||||
public static ILinkStage FromText(DataKey<TextResource> novelKey, BeamDataDictionary data) =>
|
||||
Create(novelKey, data, SourceKind.Text);
|
||||
|
||||
public static IAlternativeLinkStage FromScratch()
|
||||
=> new LinkStage(null!, null!, null!, new());
|
||||
|
||||
/* ────────────────────────────── Stages ─────────────────────────────── */
|
||||
|
||||
public interface ILinkStage {
|
||||
ITransformStage WithLink();
|
||||
ITransformStage WithLinkGenerator();
|
||||
ILinkStage WithRange(Range range);
|
||||
}
|
||||
|
||||
public interface IAlternativeLinkStage {
|
||||
IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links);
|
||||
}
|
||||
|
||||
public interface ITransformStage {
|
||||
IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory);
|
||||
}
|
||||
|
||||
public interface IAlternativeTransformStage {
|
||||
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
|
||||
IContextStage WithTransformer(Func<RawType, OutType> transformer) {
|
||||
return WithTransformer(rt => Task.FromResult(transformer(rt)));
|
||||
}
|
||||
}
|
||||
|
||||
public interface IContextStage {
|
||||
IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure);
|
||||
IContextStage WithParallelism(int degree);
|
||||
IContextStage WithTimeout(TimeSpan timeout);
|
||||
IContextStage WithRetryReporter(IProgress<RetryReport> reporter);
|
||||
DownloadEnumerable<OutType> Build();
|
||||
IContextStage UseFragments();
|
||||
IContextStage UsePuppet(AsyncManipulator manipulator);
|
||||
}
|
||||
|
||||
/* ────────────────────────── Implementation ────────────────────────── */
|
||||
|
||||
private enum SourceKind { Meta, Text }
|
||||
|
||||
private static ILinkStage Create(DataKey<TextResource> novelKey, BeamDataDictionary data, SourceKind kind) {
|
||||
var (source, initial) = Resolve(novelKey, data, kind);
|
||||
var ctxBuilder = new DownloadContextBuilder<RawType>().WithLinks(Array.Empty<SourceLink>()); // placeholder, filled later.
|
||||
return new LinkStage(source, initial, data, ctxBuilder);
|
||||
}
|
||||
|
||||
private static (WebResource Source, State Initial) Resolve(DataKey<TextResource> novelKey, BeamDataDictionary data, SourceKind kind) {
|
||||
if (!data.Novels.TryGetValue(novelKey, out var tr))
|
||||
throw new KeyNotFoundException($"Novel '{novelKey}' not found in BeamDataDictionary.");
|
||||
|
||||
var textRecord = tr.ToRecord(data);
|
||||
WebResource? source;
|
||||
State? initial;
|
||||
|
||||
if (kind == SourceKind.Meta) {
|
||||
source = textRecord.AssociatedMetaSource ?? throw new InvalidOperationException($"Meta source missing for '{novelKey}'.");
|
||||
initial = textRecord.Resource.MetaTemplateInitialData ?? throw new InvalidOperationException("Meta template data missing.");
|
||||
} else {
|
||||
source = textRecord.AssociatedSource ?? throw new InvalidOperationException($"Text source missing for '{novelKey}'.");
|
||||
initial = textRecord.Resource.TemplateInitialData;
|
||||
}
|
||||
|
||||
return (source, initial);
|
||||
}
|
||||
|
||||
/* ──────────────────────────── Stage types ─────────────────────────── */
|
||||
|
||||
|
||||
private sealed record LinkStage(
|
||||
WebResource Source,
|
||||
State Initial,
|
||||
BeamDataDictionary Data,
|
||||
DownloadContextBuilder<RawType> CtxBuilder) : ILinkStage, IAlternativeLinkStage {
|
||||
|
||||
private State? endState;
|
||||
private bool linksFrozen = false;
|
||||
|
||||
public ITransformStage WithLink() {
|
||||
var link = Data.Templates[Source.Key].Builder.Build(Initial);
|
||||
CtxBuilder.WithLinks(new[] { link });
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public ITransformStage WithLinkGenerator() {
|
||||
var template = Data.Templates[Source.Key];
|
||||
var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
||||
template.Builder,
|
||||
new NumberedStateChanger(template.Factory.Behavior),
|
||||
Initial, endState));
|
||||
CtxBuilder.WithLinks(generator);
|
||||
linksFrozen = true;
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links) {
|
||||
CtxBuilder.WithLinks(links);
|
||||
return new TransformStage(Source, Data, CtxBuilder);
|
||||
}
|
||||
|
||||
public ILinkStage WithRange(Range range) {
|
||||
if (linksFrozen)
|
||||
throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
|
||||
if (range.End.Value < range.Start.Value)
|
||||
throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end");
|
||||
var template = Data.Templates[Source.Key];
|
||||
var stateChanger = new NumberedStateChanger(template.Factory.Behavior);
|
||||
endState = Initial.Copy();
|
||||
stateChanger.Apply(Initial, range.Start.Value - 1);
|
||||
stateChanger.Apply(endState, range.End.Value - 1);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
private sealed record TransformStage(
|
||||
WebResource Source,
|
||||
BeamDataDictionary Data,
|
||||
DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage, IAlternativeTransformStage {
|
||||
public IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory) {
|
||||
var transformer = factory(Data.Bindings[Source.Bindings]);
|
||||
return new ContextStage(CtxBuilder, transformer);
|
||||
}
|
||||
|
||||
public IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
|
||||
return new ContextStage(CtxBuilder, transformer);
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class ContextStage : IContextStage {
|
||||
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
|
||||
private readonly AsyncTransformer<RawType, OutType> _transformer;
|
||||
private int _parallelism = 4;
|
||||
private bool _useFragments = false;
|
||||
private AsyncManipulator? _useManipulator = null;
|
||||
|
||||
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
|
||||
_ctxBuilder = ctxBuilder;
|
||||
_transformer = transformer;
|
||||
}
|
||||
|
||||
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
|
||||
configure(_ctxBuilder);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage WithParallelism(int degree) {
|
||||
_parallelism = Math.Max(1, degree);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage WithTimeout(TimeSpan timeout) {
|
||||
_ctxBuilder.WithTimeOut(timeout);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage WithRetryReporter(IProgress<RetryReport> reporter) {
|
||||
_ctxBuilder.WithRetryReporter(reporter);
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePuppet(AsyncManipulator)"/>
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public IContextStage UseFragments() {
|
||||
if (_useManipulator is not null)
|
||||
_useManipulator = null;
|
||||
|
||||
_useFragments = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Use a puppet browser to download the links. This strategy is mutually exclusive with <see cref="UseFragments"/>
|
||||
/// </summary>
|
||||
/// <param name="manipulator">The page manipulator</param>
|
||||
/// <returns></returns>
|
||||
public IContextStage UsePuppet(AsyncManipulator manipulator) {
|
||||
if (_useFragments)
|
||||
_useFragments = false;
|
||||
|
||||
_useManipulator = manipulator;
|
||||
return this;
|
||||
}
|
||||
|
||||
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
|
||||
return (_useFragments, _useManipulator, _transformer, context.AsyncFailurePredicates) switch {
|
||||
// ──────────────── fragmented HTML ────────────────
|
||||
(true, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new UnitFragmentDownloader<OutType>(
|
||||
context.Web,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates,
|
||||
_parallelism,
|
||||
context.DownloadLogger),
|
||||
// ──────────────── fragmented binary ────────────────
|
||||
(true, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new UnitFragmentDownloaderBinary<OutType>(
|
||||
context.Client,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates,
|
||||
_parallelism,
|
||||
context.DownloadLogger),
|
||||
// ──────────────── single HTML ────────────────
|
||||
(false, null, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new UnitDownloader<OutType>(
|
||||
context.Web,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates),
|
||||
// ──────────────── single binary ────────────────
|
||||
(false, null, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new UnitDownloaderBinary<OutType>(
|
||||
context.Client,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates),
|
||||
// ──────────────── single puppet binary ────────────────
|
||||
(false, AsyncManipulator manipulator, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
||||
=> new PuppetUnitDownloader<OutType>(
|
||||
context.Client,
|
||||
manipulator,
|
||||
asyncBinaryTransformer,
|
||||
responseFailurePredicates),
|
||||
// ──────────────── single puppet HTML ────────────────
|
||||
(false, AsyncManipulator manipulator, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
||||
=> new PuppetUnitPageDownloader<OutType>(
|
||||
context.Web,
|
||||
manipulator,
|
||||
asyncHtmlTransformer,
|
||||
documentFailurePredicates),
|
||||
_ => throw new Exception($"Unsupported transformer / failure-predicate combination. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
|
||||
};
|
||||
}
|
||||
|
||||
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
|
||||
var copyOfContext = context.CreateBuilder().Build();
|
||||
return _useFragments switch {
|
||||
true => new SequentialFragmentDownloader<RawType, OutType>(
|
||||
copyOfContext,
|
||||
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
|
||||
context.DownloadLogger).UnwrapFragmented(),
|
||||
false => new SequentialDownloader<RawType, OutType>(
|
||||
copyOfContext,
|
||||
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
|
||||
context.DownloadLogger).WrapOrdered()
|
||||
};
|
||||
}
|
||||
|
||||
public DownloadEnumerable<OutType> Build() {
|
||||
var context = _ctxBuilder.Build();
|
||||
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
|
||||
return enumerable;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -11,49 +11,49 @@ namespace Beam.Temporary.Cli {
|
||||
public DataKey<IDocumentMetaData> ChapterKey { get; set; } = new("ma:chapter");
|
||||
public DataKey<IDocumentMetaData> BookKey { get; set; } = new("ma:book");
|
||||
|
||||
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) {
|
||||
var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
|
||||
var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
|
||||
//public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<ResourceDictionary> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) {
|
||||
// var piece = sdd.ResourceDictionaries[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
|
||||
// var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
|
||||
|
||||
// null checks
|
||||
if (auxiliary is null) // aux is required to get metadata
|
||||
return null;
|
||||
if (piece?.Resource?.MetaTemplateInitialData is null) // sanity check to avoid null warnings
|
||||
return null;
|
||||
// // null checks
|
||||
// if (auxiliary is null) // aux is required to get metadata
|
||||
// return null;
|
||||
// if (piece?.Resource?.MetaTemplateInitialData is null) // sanity check to avoid null warnings
|
||||
// return null;
|
||||
|
||||
// gets the link for the novel's metadata using the auxillary data retrieved from the sdd
|
||||
var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData);
|
||||
var binding = auxiliary.Bindings;
|
||||
// // gets the link for the novel's metadata using the auxillary data retrieved from the sdd
|
||||
// var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData);
|
||||
// var binding = auxiliary.Bindings;
|
||||
|
||||
return new DownloadContext<IDocumentMetaData>(web, new(), [link], downloadLogger: logger);
|
||||
}
|
||||
// return new DownloadContext<IDocumentMetaData>(web, new(), [link], downloadLogger: logger);
|
||||
//}
|
||||
|
||||
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) {
|
||||
var res = sdd.Novels[resKey].ToRecord(sdd); // retrieves the novel data from the sdd
|
||||
var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd
|
||||
//public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<ResourceDictionary> resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) {
|
||||
// var res = sdd.ResourceDictionaries[resKey].ToRecord(sdd); // retrieves the novel data from the sdd
|
||||
// var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd
|
||||
|
||||
if (aggregator is null) // ensure aggergator data was retrieved successfully
|
||||
return null;
|
||||
if (res is null) // ensure novel data was retrieved successfully
|
||||
return null;
|
||||
// if (aggregator is null) // ensure aggergator data was retrieved successfully
|
||||
// return null;
|
||||
// if (res is null) // ensure novel data was retrieved successfully
|
||||
// return null;
|
||||
|
||||
var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator
|
||||
// var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator
|
||||
|
||||
// creates a generative enumerable of type link from 'template'
|
||||
var sle = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
||||
template.Builder, new NumberedStateChanger(template.Factory.Behavior),
|
||||
res.Resource.TemplateInitialData));
|
||||
// // creates a generative enumerable of type link from 'template'
|
||||
// var sle = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
||||
// template.Builder, new NumberedStateChanger(template.Factory.Behavior),
|
||||
// res.Resource.TemplateInitialData));
|
||||
|
||||
return new DownloadContext<IDocument>(web, new(), sle,
|
||||
retryReporter: new Progress<RetryReport>((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")),
|
||||
//downloadReporter: new Progress<DownloadReport>((x) => Console.WriteLine($"Downloaded ({x})")),
|
||||
asyncFailurePredicates: [
|
||||
//(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
|
||||
],
|
||||
timeOut: TimeSpan.FromSeconds(15),
|
||||
downloadLogger: logger
|
||||
);
|
||||
}
|
||||
// return new DownloadContext<IDocument>(web, new(), sle,
|
||||
// retryReporter: new Progress<RetryReport>((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")),
|
||||
// //downloadReporter: new Progress<DownloadReport>((x) => Console.WriteLine($"Downloaded ({x})")),
|
||||
// asyncFailurePredicates: [
|
||||
// //(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
|
||||
// ],
|
||||
// timeOut: TimeSpan.FromSeconds(15),
|
||||
// downloadLogger: logger
|
||||
// );
|
||||
//}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
using aeqw89.DataKeys;
|
||||
using Beam.Dynamic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.Immutable;
|
||||
using System.Collections.ObjectModel;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Beam.Models;
|
||||
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
|
||||
public record class ResourceDictionaryBuilder(string SiteKey) {
|
||||
private List<Func<WebResourceBuilder>> _builders;
|
||||
|
||||
|
||||
private record class WebResourceBuilder(string ResourceKey) {
|
||||
private Func<Template> _template;
|
||||
private Func<IReadOnlyDictionary<DataKey<DataBindings>, DataBindings>> _bindings;
|
||||
private string _name;
|
||||
private string _description;
|
||||
private Uri _domain;
|
||||
|
||||
}
|
||||
|
||||
private record class ResourceDictionaryRegistrar(
|
||||
string SiteKey,
|
||||
string FriendlyName,
|
||||
IEnumerable<WebResource> Resources,
|
||||
IReadOnlyDictionary<string, Template> Templates,
|
||||
IReadOnlyDictionary<string, DataBindings> Bindings) : IResourceDictionaryRegistrar {
|
||||
|
||||
private Dictionary<string, ImmutableState> _states;
|
||||
|
||||
public IResourceDictionaryRegistrar AddInitialState(string key, ImmutableState state) {
|
||||
_states[key] = state;
|
||||
return this;
|
||||
}
|
||||
|
||||
public void Register(BeamDataContext sdd) {
|
||||
foreach (var resource in Resources)
|
||||
sdd.Resources.TryAdd(resource.Key, resource);
|
||||
foreach (var template in Templates)
|
||||
sdd.Templates.TryAdd(new DataKey<WebResource>(template.Key), template.Value);
|
||||
foreach (var binding in Bindings)
|
||||
sdd.Bindings.TryAdd(new DataKey<DataBindings>(binding.Key), binding.Value);
|
||||
foreach (var state in _states)
|
||||
sdd.InitialStates.TryAdd(new DataKey<ImmutableState>(state.Key), state.Value);
|
||||
|
||||
sdd.ResourceDictionaries.TryAdd(new DataKey<ResourceDictionary>(SiteKey), new ResourceDictionary() {
|
||||
Key = new DataKey<ResourceDictionary>(SiteKey),
|
||||
FriendlyName = FriendlyName,
|
||||
InitialStates =
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public interface IResourceDictionaryRegistrar {
|
||||
public IResourceDictionaryRegistrar AddInitialState(string key, ImmutableState state);
|
||||
public void Register(BeamDataContext sdd);
|
||||
}
|
||||
|
||||
public interface IBindingsBuilder {
|
||||
public IBindingsBuilder AddBinding(DataBindings bindings);
|
||||
public IBindingsBuilder AddBinding(Action<DataBindings> configure);
|
||||
public IReadOnlyDictionary<DataKey<DataBindings>, DataBindings> Build();
|
||||
}
|
||||
|
||||
public interface IResourceDictionaryBuilder {
|
||||
public IResourceDictionaryBuilder AddResource(Func<ITemplateBuilderStage, IWebResourceBuilderStage> configure);
|
||||
public IResourceDictionaryBuilder WithResources(Func<ITemplateBuilderStage, IWebResourceBuilderStage>[] configure);
|
||||
public IResourceDictionaryBuilder WithFriendlyName(string friendlyName);
|
||||
public IResourceDictionaryRegistrar Then();
|
||||
}
|
||||
|
||||
public interface IWebResourceBuilderStage {
|
||||
public IWebResourceBuilderStage WithName(string name); // Stage 3
|
||||
public IWebResourceBuilderStage WithDescription(string description); // Stage 3
|
||||
public IWebResourceBuilderStage WithDomain(Uri domain); // Stage 3
|
||||
public WebResource Build();
|
||||
}
|
||||
|
||||
public interface IBindingBuilderStage {
|
||||
public IWebResourceBuilderStage WithBindings(Action<IBindingsBuilder> configure);
|
||||
public IWebResourceBuilderStage WithBindings(IReadOnlyDictionary<DataKey<DataBindings>, DataBindings> bindings);
|
||||
}
|
||||
|
||||
public interface ITemplateBuilderStage {
|
||||
public IBindingBuilderStage WithTemplate(Action<ITemplateBuilder> configure);
|
||||
public IBindingBuilderStage WithTemplate(Template template);
|
||||
}
|
||||
|
||||
public interface ITemplateBuilder {
|
||||
public ITemplateBuilder WithFactory(StateChangerFactory factory);
|
||||
public ITemplateBuilder WithUrlBuilder(SourceLinkBuilder builder);
|
||||
public ITemplateBuilder WithUrlBuilder(Action<SourceLinkBuilder> configure);
|
||||
public Template Build();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -8,6 +8,7 @@ using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
|
||||
@@ -43,97 +44,372 @@ namespace Beam.Temporary.Cli {
|
||||
// sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
|
||||
//}
|
||||
|
||||
public static void Define_WoDuShu_HouseOfHorrors(BeamDataDictionary sdd) {
|
||||
var (wdsAgg, wdsAux) = CreateKeyPair<WebResource>("aggregators", "auxillaries", "wodushu", "aeqw89:document");
|
||||
var novel = new TextResource() {
|
||||
Key = new DataKey<TextResource>("novels:house_of_horrors"),
|
||||
FriendlyName = "My House Of Horrors",
|
||||
AssociatedSource = wdsAgg,
|
||||
AssociatedMetaSource = wdsAux,
|
||||
TemplateInitialData = new ImmutableState(["24349", "2896325"]),
|
||||
MetaTemplateInitialData = new ImmutableState(["24349"])
|
||||
//// -----------------------------------------------------------------------------
|
||||
//// Helper: same as in the WoDuShu file
|
||||
//private static (DataKey<T>, DataKey<T>) CreateKeyPair<T>(
|
||||
// string pref1, string pref2, string common, string @namespace) {
|
||||
// return (
|
||||
// new DataKey<T>($"{pref1}:{common}").WithNamespace(@namespace),
|
||||
// new DataKey<T>($"{pref2}:{common}").WithNamespace(@namespace)
|
||||
// );
|
||||
//}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 1) Site-wide definition – YeBiQuge (m.yebiquge.com)
|
||||
public static void Define_YeBiQuge(BeamDataContext sdd) {
|
||||
// ---------- keys ----------
|
||||
var yb = new DataKey<WebResource>("aeqw89:yebiquge");
|
||||
|
||||
var bindings = new DataKey<DataBindings>($"aeqw89:yebiquge:{nameof(IDocument).ToLower()}:bindings");
|
||||
var bindings_info = new DataKey<DataBindings>($"aeqw89:yebiquge:{nameof(ArticleData).ToLower()}:bindings");
|
||||
var bindings_toc = new DataKey<DataBindings>($"aeqw89:yebiquge:{nameof(TableOfContentsData).ToLower()}:bindings");
|
||||
|
||||
// ---------- web resources ----------
|
||||
var aggregator = new WebResource(yb.InsertEnd(nameof(IDocument).ToLower())) {
|
||||
Name = "YeBiQuge – Chapters",
|
||||
Description = "Chapter pages (mobile)",
|
||||
Domain = "https://m.yebiquge.com",
|
||||
Bindings = bindings
|
||||
};
|
||||
|
||||
sdd.Novels.TryAdd(novel.Key, novel);
|
||||
var bookInfo = new WebResource(yb.InsertEnd(nameof(ArticleData).ToLower())) {
|
||||
Name = "YeBiQuge – Book Info",
|
||||
Description = "Book information / latest updates page",
|
||||
Domain = "https://m.yebiquge.com",
|
||||
Bindings = bindings_info
|
||||
};
|
||||
|
||||
sdd.AggregatorNovels.TryAdd(wdsAgg, [novel.Key]);
|
||||
var tocPage = new WebResource(yb.InsertEnd(nameof(TableOfContentsData).ToLower())) {
|
||||
Name = "YeBiQuge – TOC",
|
||||
Description = "Full chapter list (index*.html)",
|
||||
Domain = "https://m.yebiquge.com",
|
||||
Bindings = bindings_toc
|
||||
};
|
||||
|
||||
sdd.Resources.TryAdd(aggregator.Key, aggregator);
|
||||
sdd.Resources.TryAdd(bookInfo.Key, bookInfo);
|
||||
sdd.Resources.TryAdd(tocPage.Key, tocPage);
|
||||
|
||||
// ---------- URL templates ----------
|
||||
// 1-a) Chapter page /{catId}/{bookId}/{chapterId}.html
|
||||
sdd.Templates.TryAdd(aggregator.Key, new() {
|
||||
Factory = new(StateChangerFactory.LastAsNumber),
|
||||
Builder = new SourceLinkBuilder("m.yebiquge.com")
|
||||
.WithSegments("", "", "") // /<cat>/<book>/<chap>
|
||||
.WithParameters(0, "")
|
||||
.WithParameters(1, "")
|
||||
.WithParameters(2, (".html", Position.After)) // chapId.html
|
||||
});
|
||||
|
||||
// 1-b) Book-info page /{catId}/{bookId}/
|
||||
sdd.Templates.TryAdd(bookInfo.Key, new() {
|
||||
Factory = new(StateChangerFactory.Constant),
|
||||
Builder = new SourceLinkBuilder("m.yebiquge.com")
|
||||
.WithSegments("", "") // /<cat>/<book>/
|
||||
.WithParameters(0, "")
|
||||
.WithParameters(1, "")
|
||||
});
|
||||
|
||||
// 1-c) TOC page /{catId}/{bookId}/index.html (first page)
|
||||
sdd.Templates.TryAdd(tocPage.Key, new() {
|
||||
Factory = new(StateChangerFactory.Constant),
|
||||
Builder = new SourceLinkBuilder("m.yebiquge.com")
|
||||
.WithSegments("", "", "index.html") // /<cat>/<book>/index.html
|
||||
.WithParameters(0, "")
|
||||
.WithParameters(1, "")
|
||||
});
|
||||
|
||||
// ---------- bindings ----------
|
||||
// ── chapter page ────────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings, new() {
|
||||
Title = new ContentsDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='nr_title']" }
|
||||
},
|
||||
Content = new ParagraphedContentDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='chaptercontent']" }
|
||||
},
|
||||
});
|
||||
|
||||
// ── book-info page ──────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings_info, new() {
|
||||
Title = new ContentsDataProvider {
|
||||
Content = new Binding { XPath = "//div[@class='book_info']//dt[@class='name']" }
|
||||
},
|
||||
Authors = new ContentsArrayDataProvider {
|
||||
Content = new Binding {
|
||||
XPath = "//div[@class='book_info']//span[contains(text(),'作者')]"
|
||||
}
|
||||
},
|
||||
Description = new ParagraphedContentDataProvider {
|
||||
Content = new Binding { XPath = "//div[@class='book_about']/dl/dd" }
|
||||
}
|
||||
});
|
||||
|
||||
// ── TOC page ────────────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings_toc, new() {
|
||||
PagesDropDown = new DropDownDataProvider {
|
||||
Content = new Binding { XPath = "//div[@class='fenye']//select" },
|
||||
RelativeTo = tocPage.Domain
|
||||
},
|
||||
TableOfContents = new AnchorCollectionDataProvider {
|
||||
Content = new Binding { XPath = "//div[@class='book_last']/dl" },
|
||||
RelativeTo = tocPage.Domain
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 2) Concrete novel – 《诡秘之主》 / Lord of the Mysteries
|
||||
public static void Define_YeBiQuge_LordOfMysteries(BeamDataContext sdd) {
|
||||
var yb = new DataKey<WebResource>("aeqw89:yebiquge:novels:lord_of_the_mysteries");
|
||||
var ybAgg = yb.InsertEnd(nameof(IDocument).ToLower());
|
||||
var ybInfo = yb.InsertEnd(nameof(ArticleData).ToLower());
|
||||
var ybToc = yb.InsertEnd(nameof(TableOfContentsData).ToLower());
|
||||
|
||||
var novel = new ResourceDictionary {
|
||||
Key = yb.To<ResourceDictionary>(),
|
||||
FriendlyName = "Lord of the Mysteries",
|
||||
Resources = {
|
||||
{ nameof(IDocument) , ybAgg }, // chapters
|
||||
{ nameof(ArticleData) , ybInfo }, // book info
|
||||
{ nameof(TableOfContentsData), ybToc } // full TOC
|
||||
},
|
||||
|
||||
// catId = 2 , bookId = 2958 , sample chapterId = 8699808
|
||||
InitialStates = new Dictionary<DataKey<WebResource>, ImmutableState> {
|
||||
{ ybAgg, new ImmutableState(["2","2958","8699808"]) },
|
||||
{ ybInfo, new ImmutableState(["2","2958"]) },
|
||||
{ ybToc, new ImmutableState(["2","2958"]) },
|
||||
}
|
||||
};
|
||||
|
||||
sdd.ResourceDictionaries.TryAdd(novel.Key, novel);
|
||||
sdd.AggregatorNovels.TryAdd(ybAgg, [novel.Key]);
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 1) Site-wide definition – KuaiShu5 (www.kuaishu5.com)
|
||||
public static void Define_KuaiShu5(BeamDataContext sdd) {
|
||||
// ---------- keys ----------
|
||||
var ks = new DataKey<WebResource>("aeqw89:kuaishu5");
|
||||
|
||||
var bindings_chapter = new DataKey<DataBindings>($"aeqw89:kuaishu5:{nameof(IDocument).ToLower()}:bindings");
|
||||
var bindings_info = new DataKey<DataBindings>($"aeqw89:kuaishu5:{nameof(ArticleData).ToLower()}:bindings");
|
||||
var bindings_toc = new DataKey<DataBindings>($"aeqw89:kuaishu5:{nameof(TableOfContentsData).ToLower()}:bindings");
|
||||
|
||||
// ---------- web resources ----------
|
||||
var chapters = new WebResource(ks.InsertEnd(nameof(IDocument).ToLower())) {
|
||||
Name = "KuaiShu5 – Chapters",
|
||||
Description = "Chapter pages",
|
||||
Domain = "https://www.kuaishu5.com",
|
||||
Bindings = bindings_chapter
|
||||
};
|
||||
|
||||
var bookInfo = new WebResource(ks.InsertEnd(nameof(ArticleData).ToLower())) {
|
||||
Name = "KuaiShu5 – Book Info",
|
||||
Description = "Book information / landing page",
|
||||
Domain = "https://www.kuaishu5.com",
|
||||
Bindings = bindings_info
|
||||
};
|
||||
|
||||
var tocPage = new WebResource(ks.InsertEnd(nameof(TableOfContentsData).ToLower())) {
|
||||
Name = "KuaiShu5 – TOC",
|
||||
Description = "Full chapter list (index page)",
|
||||
Domain = "https://www.kuaishu5.com",
|
||||
Bindings = bindings_toc
|
||||
};
|
||||
|
||||
sdd.Resources.TryAdd(chapters.Key, chapters);
|
||||
sdd.Resources.TryAdd(bookInfo.Key, bookInfo);
|
||||
sdd.Resources.TryAdd(tocPage.Key, tocPage);
|
||||
|
||||
// ---------- URL templates ----------
|
||||
// 1-a) Chapter page /b{bookId}/{chapterId}.html
|
||||
sdd.Templates.TryAdd(chapters.Key, new() {
|
||||
Factory = new(StateChangerFactory.LastAsNumber),
|
||||
Builder = new SourceLinkBuilder("www.kuaishu5.com")
|
||||
.WithSegments("", "") // /<seg0>/<seg1>
|
||||
.WithParameters(0, ("b", Position.Before)) // seg0: b{bookId}
|
||||
.WithParameters(1, (".html", Position.After)) // seg1: {chapterId}.html
|
||||
});
|
||||
|
||||
// 1-b) Book-info page /b{bookId}/
|
||||
sdd.Templates.TryAdd(bookInfo.Key, new() {
|
||||
Factory = new(StateChangerFactory.Constant),
|
||||
Builder = new SourceLinkBuilder("www.kuaishu5.com")
|
||||
.WithSegments("") // /<seg0>
|
||||
.WithParameters(0, ("b", Position.Before)) // seg0: b{bookId}
|
||||
});
|
||||
|
||||
// 1-c) TOC page /b{bookId}/ (same as book-info)
|
||||
sdd.Templates.TryAdd(tocPage.Key, new() {
|
||||
Factory = new(StateChangerFactory.Constant),
|
||||
Builder = new SourceLinkBuilder("www.kuaishu5.com")
|
||||
.WithSegments("") // /<seg0>
|
||||
.WithParameters(0, ("b", Position.Before)) // seg0: b{bookId}
|
||||
});
|
||||
|
||||
// ---------- bindings ----------
|
||||
// ── chapter page ────────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings_chapter, new() {
|
||||
Title = new ContentsDataProvider {
|
||||
Content = new Binding { XPath = "//h1[@class='bookname']" }
|
||||
},
|
||||
Content = new ParagraphedContentDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='booktxt']" }
|
||||
}
|
||||
});
|
||||
|
||||
// ── book-info page ──────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings_info, new() {
|
||||
Title = new ContentsDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='info']/h1" }
|
||||
},
|
||||
Authors = new ContentsArrayDataProvider {
|
||||
Content = new Binding {
|
||||
XPath = "//*[@id='info']//p[contains(text(),'作者')]/a"
|
||||
}
|
||||
},
|
||||
Description = new ParagraphedContentDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='intro']" }
|
||||
}
|
||||
});
|
||||
|
||||
// ── TOC page ────────────────────────────────────────────────────
|
||||
sdd.Bindings.Add(bindings_toc, new() {
|
||||
PagesDropDown = new DropDownDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='indexselect']" },
|
||||
RelativeTo = tocPage.Domain
|
||||
},
|
||||
TableOfContents = new AnchorCollectionDataProvider {
|
||||
Content = new Binding { XPath = "//*[@id='content_1']" },
|
||||
RelativeTo = tocPage.Domain
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 2) Concrete novel – 《诡秘之主》 / Lord of the Mysteries
|
||||
public static void Define_KuaiShu5_LordOfMysteries(BeamDataContext sdd) {
|
||||
var ks = new DataKey<WebResource>("aeqw89:kuaishu5");
|
||||
var ksChapters = ks.InsertEnd(nameof(IDocument).ToLower());
|
||||
var ksInfo = ks.InsertEnd(nameof(ArticleData).ToLower());
|
||||
var ksToc = ks.InsertEnd(nameof(TableOfContentsData).ToLower());
|
||||
|
||||
var novel = new ResourceDictionary {
|
||||
Key = new DataKey<ResourceDictionary>("kuaishu5:novels:lord_of_the_mysteries"),
|
||||
FriendlyName = "Lord of the Mysteries",
|
||||
Resources =
|
||||
{
|
||||
{ nameof(IDocument) , ksChapters },
|
||||
{ nameof(ArticleData) , ksInfo },
|
||||
{ nameof(TableOfContentsData), ksToc }
|
||||
}
|
||||
};
|
||||
|
||||
// bookId = 122722 , sample chapterId = 288372
|
||||
sdd.InitialStates = new Dictionary<DataKey<ImmutableState>, ImmutableState>
|
||||
{
|
||||
{ ksChapters.To<ImmutableState>(), new ImmutableState(["122722", "288372"]) },
|
||||
{ ksInfo .To<ImmutableState>(), new ImmutableState(["122722"]) },
|
||||
{ ksToc .To<ImmutableState>(), new ImmutableState(["122722"]) }
|
||||
};
|
||||
|
||||
sdd.ResourceDictionaries.TryAdd(novel.Key, novel);
|
||||
sdd.AggregatorNovels.TryAdd(ksChapters, [novel.Key]);
|
||||
}
|
||||
//public static void Define_WoDuShu_HouseOfHorrors(BeamDataDictionary sdd) {
|
||||
// var (wdsAgg, wdsAux) = CreateKeyPair<WebResource>("aggregators", "auxillaries", "wodushu", "aeqw89:document");
|
||||
// var novel = new ResourceDictionary() {
|
||||
// Key = new DataKey<ResourceDictionary>("novels:house_of_horrors"),
|
||||
// FriendlyName = "My House Of Horrors",
|
||||
// AssociatedSource = wdsAgg,
|
||||
// AssociatedMetaSource = wdsAux,
|
||||
// TemplateInitialData = new ImmutableState(["24349", "2896325"]),
|
||||
// MetaTemplateInitialData = new ImmutableState(["24349"])
|
||||
// };
|
||||
|
||||
// sdd.ResourceDictionaries.TryAdd(novel.Key, novel);
|
||||
|
||||
// sdd.AggregatorNovels.TryAdd(wdsAgg, [novel.Key]);
|
||||
//}
|
||||
|
||||
private static (DataKey<T>, DataKey<T>) CreateKeyPair<T>(string pref1, string pref2, string common, string @namespace) {
|
||||
return (
|
||||
new DataKey<T>(pref1 + ":" + common).WithNamespace(@namespace),
|
||||
new DataKey<T>(pref2 + ":" + common).WithNamespace(@namespace)
|
||||
new DataKey<T>(pref1 + ":" + common).InsertStart(@namespace),
|
||||
new DataKey<T>(pref2 + ":" + common).InsertStart(@namespace)
|
||||
);
|
||||
}
|
||||
|
||||
public static void Define_WoDuShu(BeamDataDictionary sdd) {
|
||||
var (wdsAgg, wdsAux) = CreateKeyPair<WebResource>("aggregators", "auxillaries", "wodushu", "aeqw89:document");
|
||||
var bindings = new DataKey<DataBindings>("aeqw89:bindings:wodushu");
|
||||
var aggregator = new WebResource(wdsAgg) {
|
||||
Name = "WoDuShu.com",
|
||||
Description = "A Chinese novel aggregator site",
|
||||
Domain = "https://wodushu.com",
|
||||
Bindings = bindings
|
||||
};
|
||||
var auxiliary = new WebResource(wdsAux) {
|
||||
Name = "WoDuShu.com",
|
||||
Description = "A Chinese novel aggregator site",
|
||||
Domain = "https://wodushu.com",
|
||||
Bindings = bindings.WithSuffix("_aux")
|
||||
};
|
||||
//public static void Define_WoDuShu(BeamDataDictionary sdd) {
|
||||
// var (wdsAgg, wdsAux) = CreateKeyPair<WebResource>("aggregators", "auxillaries", "wodushu", "aeqw89:document");
|
||||
// var bindings = new DataKey<DataBindings>("aeqw89:bindings:wodushu");
|
||||
// var aggregator = new WebResource(wdsAgg) {
|
||||
// Name = "WoDuShu.com",
|
||||
// Description = "A Chinese novel aggregator site",
|
||||
// Domain = "https://wodushu.com",
|
||||
// Bindings = bindings
|
||||
// };
|
||||
// var auxiliary = new WebResource(wdsAux) {
|
||||
// Name = "WoDuShu.com",
|
||||
// Description = "A Chinese novel aggregator site",
|
||||
// Domain = "https://wodushu.com",
|
||||
// Bindings = bindings.WithSuffix("_aux")
|
||||
// };
|
||||
|
||||
sdd.Templates.TryAdd(wdsAgg, new() {
|
||||
Factory = new(StateChangerFactory.LastAsNumber),
|
||||
Builder = new SourceLinkBuilder("www.wodushu.com")
|
||||
.WithSegments("read", "", "")
|
||||
.WithParameters(1, "")
|
||||
.WithParameters(2, (".html", Position.After))
|
||||
});
|
||||
sdd.Templates.TryAdd(wdsAux, new() {
|
||||
Factory = new(StateChangerFactory.Constant),
|
||||
Builder = new SourceLinkBuilder("www.wodushu.com")
|
||||
.WithSegments("book", "")
|
||||
.WithParameters(1, "")
|
||||
});
|
||||
// sdd.Templates.TryAdd(wdsAgg, new() {
|
||||
// Factory = new(StateChangerFactory.LastAsNumber),
|
||||
// Builder = new SourceLinkBuilder("www.wodushu.com")
|
||||
// .WithSegments("read", "", "")
|
||||
// .WithParameters(1, "")
|
||||
// .WithParameters(2, (".html", Position.After))
|
||||
// });
|
||||
// sdd.Templates.TryAdd(wdsAux, new() {
|
||||
// Factory = new(StateChangerFactory.Constant),
|
||||
// Builder = new SourceLinkBuilder("www.wodushu.com")
|
||||
// .WithSegments("book", "")
|
||||
// .WithParameters(1, "")
|
||||
// });
|
||||
|
||||
sdd.Aggregators.TryAdd(wdsAgg, aggregator);
|
||||
sdd.Auxillaries.TryAdd(wdsAux, auxiliary);
|
||||
// sdd.Resources.TryAdd(wdsAgg, aggregator);
|
||||
// sdd.Auxillaries.TryAdd(wdsAux, auxiliary);
|
||||
|
||||
var binding_agg = new DataKey<DataBindings>("aeqw89:bindings:wodushu");
|
||||
var binding_aux = new DataKey<DataBindings>("aeqw89:bindings:wodushu_aux");
|
||||
// var binding_agg = new DataKey<DataBindings>("aeqw89:bindings:wodushu");
|
||||
// var binding_aux = new DataKey<DataBindings>("aeqw89:bindings:wodushu_aux");
|
||||
|
||||
sdd.Bindings.Add(binding_agg, new() {
|
||||
Title = new ContentsDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "/html/body/div[4]/div/div/div[2]/h1"
|
||||
}
|
||||
},
|
||||
// sdd.Bindings.Add(binding_agg, new() {
|
||||
// Title = new ContentsDataProvider() {
|
||||
// Content = new Binding() {
|
||||
// XPath = "/html/body/div[4]/div/div/div[2]/h1"
|
||||
// }
|
||||
// },
|
||||
|
||||
Content = new ParagraphedContentDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "//*[@id=\"content\"]"
|
||||
}
|
||||
},
|
||||
});
|
||||
// Content = new ParagraphedContentDataProvider() {
|
||||
// Content = new Binding() {
|
||||
// XPath = "//*[@id=\"content\"]"
|
||||
// }
|
||||
// },
|
||||
// });
|
||||
|
||||
sdd.Bindings.Add(binding_aux, new() {
|
||||
Title = new ContentsDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/h1"
|
||||
}
|
||||
},
|
||||
Authors = new ContentsArrayDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/div/p[1]/a"
|
||||
}
|
||||
},
|
||||
Description = new ParagraphedContentDataProvider() {
|
||||
Content = new Binding() {
|
||||
XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[2]"
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
// sdd.Bindings.Add(binding_aux, new() {
|
||||
// Title = new ContentsDataProvider() {
|
||||
// Content = new Binding() {
|
||||
// XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/h1"
|
||||
// }
|
||||
// },
|
||||
// Authors = new ContentsArrayDataProvider() {
|
||||
// Content = new Binding() {
|
||||
// XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/div/p[1]/a"
|
||||
// }
|
||||
// },
|
||||
// Description = new ParagraphedContentDataProvider() {
|
||||
// Content = new Binding() {
|
||||
// XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[2]"
|
||||
// }
|
||||
// },
|
||||
// });
|
||||
//}
|
||||
|
||||
//public static void Define_NovelFull(SharedDataDictionary sdd) {
|
||||
// var docNamespace = "aeqw89:document";
|
||||
|
||||
+136
-65
@@ -9,13 +9,16 @@ using System.Text.Json.Serialization;
|
||||
using System.Text.Json.Serialization.Metadata;
|
||||
using Beam.Temporary.Cli.Templates.Classic;
|
||||
using Beam.Exports;
|
||||
using System.Diagnostics;
|
||||
using Beam.Models;
|
||||
using Beam.Stealth;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
internal class Program {
|
||||
|
||||
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
|
||||
|
||||
public static BeamDataDictionary BeamData { get; set; } = [];
|
||||
public static BeamDataContext BeamData { get; set; } = [];
|
||||
|
||||
public static IArchitecture Architecture = IArchitecture.Default;
|
||||
|
||||
@@ -27,14 +30,15 @@ namespace Beam.Temporary.Cli {
|
||||
|
||||
var web = new HtmlWeb();
|
||||
|
||||
var lf = LoggerFactory.Create((x) => {
|
||||
x.AddConsole();
|
||||
});
|
||||
var lf = LoggerFactory.Create((x) => x
|
||||
.AddConsole()
|
||||
.SetMinimumLevel(LogLevel.Trace)
|
||||
);
|
||||
|
||||
ILogger logger = lf
|
||||
.CreateLogger("Program");
|
||||
|
||||
await using var sharedContext = await DataDictionaryContext<BeamDataDictionary>.Create(
|
||||
await using var sharedContext = await DataDictionaryContext<BeamDataContext>.Create(
|
||||
BeamDataPath,
|
||||
false,
|
||||
DataKind.Shared,
|
||||
@@ -45,82 +49,149 @@ namespace Beam.Temporary.Cli {
|
||||
BeamData = sharedContext.Data;
|
||||
|
||||
BeamData.Clear();
|
||||
NovelStatics.Define_WoDuShu(BeamData);
|
||||
NovelStatics.Define_WoDuShu_HouseOfHorrors(BeamData);
|
||||
NovelStatics.Define_YeBiQuge(BeamData);
|
||||
NovelStatics.Define_YeBiQuge_LordOfMysteries(BeamData);
|
||||
NovelStatics.Define_KuaiShu5(BeamData);
|
||||
NovelStatics.Define_KuaiShu5_LordOfMysteries(BeamData);
|
||||
ClassicTemplates.Register(BeamData);
|
||||
|
||||
await sharedContext.ForceSave();
|
||||
BeamData = sharedContext.Data; // need to refresh instance after forced save!
|
||||
|
||||
CancellationTokenSource cts = new();
|
||||
|
||||
var novel = new DataKey<TextResource>("novels:house_of_horrors");
|
||||
using var config = StealthConfig.Create(true, null, TimeSpan.FromMinutes(2), Browser.Chrome, lf.CreateLogger<StealthConfig>());
|
||||
var unit = new StealthUnitPageDownloader<HtmlDocument>(new(), config, (x) => {
|
||||
return Task.CompletedTask;
|
||||
}, x => Task.FromResult(x));
|
||||
|
||||
var metadata2 = await DownloadBuilder<HtmlDocument, IDocumentMetaData>.FromMeta(novel, BeamData)
|
||||
.WithLink()
|
||||
.WithTransformer(CommonTransformers.ArticleDataTransformer)
|
||||
.Configure((x) => x
|
||||
.WithDownloadLogger(logger)
|
||||
.WithRetryReporter(new Progress<RetryReport>())
|
||||
.WithTimeOut(TimeSpan.FromSeconds(15)))
|
||||
.Build()
|
||||
.FirstAsync();
|
||||
var (success, result) = await unit.TryDownload([new("https://duckduckgo.com/?t=ffab&q=C%23+stealth+headless+browser&ia=web", 0)], default);
|
||||
if (success)
|
||||
logger?.LogInformation("Success! Downloaded '{}'", result?.DocumentNode.Name);
|
||||
else
|
||||
logger?.LogError("Failed to download!");
|
||||
|
||||
var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
|
||||
.WithRange(1..5)
|
||||
.WithLinkGenerator()
|
||||
.WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
|
||||
.Configure((x) => x
|
||||
.WithDownloadLogger(logger)
|
||||
.WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
|
||||
.WithTimeOut(TimeSpan.FromSeconds(15))
|
||||
)
|
||||
.Build();
|
||||
Console.WriteLine(result?.DocumentNode.OuterHtml);
|
||||
|
||||
//var novelResDict = new DataKey<ResourceDictionary>("kuaishu5:novels:lord_of_the_mysteries");
|
||||
|
||||
//var metadata2 = await DownloadBuilder<HtmlDocument, TableOfContentsData>.FromResource(novelResDict, nameof(TableOfContentsData), BeamData)
|
||||
// .WithLink()
|
||||
// .WithTransformer(CommonTransformers.TableOfContentsTransformer)
|
||||
// .Configure((x) => x
|
||||
// .WithDownloadLogger(logger)
|
||||
// .WithRetryReporter(new Progress<RetryReport>())
|
||||
// .WithTimeOut(TimeSpan.FromSeconds(15)))
|
||||
// .Build()
|
||||
// .FirstAsync();
|
||||
|
||||
//if (metadata2.Data.PagesLinks is null || metadata2.Data.PagesLinks.Length == 0)
|
||||
// Debugger.Break();
|
||||
|
||||
//var pageLinks = DownloadBuilder<HtmlDocument, TableOfContentsData>.FromScratch()
|
||||
// .WithLinks(metadata2.Data.PagesLinks)
|
||||
// .WithTransformer(CommonTransformers.TableOfContentsTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(TableOfContentsData)]].Bindings]))
|
||||
// .Configure(x => x
|
||||
// .WithDownloadLogger(logger)
|
||||
// .WithRetryReporter(new Progress<RetryReport>())
|
||||
// .WithTimeOut(TimeSpan.FromSeconds(15)))
|
||||
// .Build();
|
||||
|
||||
//var links = (await pageLinks
|
||||
// .ToListAsync())
|
||||
// .Where(x => x?.Data?.ContentLinks is not null)
|
||||
// .SelectMany(x => x.Data.ContentLinks!)
|
||||
// .DistinctBy(x => x.Link.AbsoluteUri);
|
||||
|
||||
//var downloader = DownloadBuilder<HtmlDocument, StringDocument>.FromScratch()
|
||||
// .WithLinks(links)
|
||||
// .WithTransformer(CommonTransformers.DocumentTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(IDocument)]].Bindings]))
|
||||
// .Configure(x => x
|
||||
// .WithDownloadLogger(logger)
|
||||
// .WithRetryReporter(new Progress<RetryReport>(x => logger?.LogWarning("Retrying download {} for the {} time", x.Link, x.TryNumber)))
|
||||
// .WithTimeOut(TimeSpan.FromSeconds(15)))
|
||||
// .WithParallelism(4)
|
||||
// .UseFragments()
|
||||
// .Build();
|
||||
|
||||
//HashSet<Ordered<StringDocument>> downloaded = [];
|
||||
//try {
|
||||
// await foreach (var download in downloader) {
|
||||
// logger?.LogInformation("Downloaded chapter with order={}", download.Order);
|
||||
// try {
|
||||
// downloaded.Add(download);
|
||||
// } catch (Exception e) {
|
||||
// logger?.LogError(e, "Unknown error occurred");
|
||||
// }
|
||||
// }
|
||||
//} catch (Exception e) {
|
||||
// logger?.LogError(e, "Uncaught error detected!");
|
||||
//} finally {
|
||||
// logger?.LogInformation("Done with loop, downloaded {}", downloaded.Count);
|
||||
// try {
|
||||
// string serialized = JsonSerializer.Serialize(downloaded.Select(x => new { x.Order, x.Data.Content, x.Data.MetaData }).ToArray(), ConversionOptions);
|
||||
// System.IO.File.WriteAllText("lordOfTheMysteries.json", serialized);
|
||||
// } catch (Exception e) {
|
||||
// logger?.LogInformation(e, "Failed to serialize chapters");
|
||||
// }
|
||||
//}
|
||||
|
||||
//var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
|
||||
// .WithRange(1..5)
|
||||
// .WithLinkGenerator()
|
||||
// .WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
|
||||
// .Configure((x) => x
|
||||
// .WithDownloadLogger(logger)
|
||||
// .WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
|
||||
// .WithTimeOut(TimeSpan.FromSeconds(15))
|
||||
// )
|
||||
// .Build();
|
||||
|
||||
|
||||
|
||||
List<Task<Ordered<IDocument>>> translationTasks = [];
|
||||
List<Ordered<IDocument>> documents = [];
|
||||
//List<Task<Ordered<IDocument>>> translationTasks = [];
|
||||
//List<Ordered<IDocument>> documents = [];
|
||||
|
||||
await foreach (var download in downloader2.Take(10)) {
|
||||
if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
|
||||
continue;
|
||||
if (meta is not ArticleData articleMetaData)
|
||||
continue;
|
||||
if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
|
||||
continue;
|
||||
if (meta is not ArticleData bookMetaData)
|
||||
continue;
|
||||
//Console.WriteLine($"Title: {data.Name}");
|
||||
//Console.WriteLine($"Description: {data.Description}");
|
||||
//Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
|
||||
//Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
|
||||
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
||||
Console.WriteLine($"Book title: {bookMetaData.Name}");
|
||||
//Console.WriteLine($"Content: {download}");
|
||||
//await foreach (var download in downloader2.Take(10)) {
|
||||
// if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
|
||||
// continue;
|
||||
// if (meta is not ArticleData articleMetaData)
|
||||
// continue;
|
||||
// if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
|
||||
// continue;
|
||||
// if (meta is not ArticleData bookMetaData)
|
||||
// continue;
|
||||
// //Console.WriteLine($"Title: {data.Name}");
|
||||
// //Console.WriteLine($"Description: {data.Description}");
|
||||
// //Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
|
||||
// //Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
|
||||
// Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
||||
// Console.WriteLine($"Book title: {bookMetaData.Name}");
|
||||
// //Console.WriteLine($"Content: {download}");
|
||||
|
||||
//translationTasks.Add(Task.Run(async () => {
|
||||
// logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
|
||||
// var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
|
||||
// logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
|
||||
// return ret;
|
||||
//}));
|
||||
}
|
||||
// //translationTasks.Add(Task.Run(async () => {
|
||||
// // logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
|
||||
// // var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
|
||||
// // logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
|
||||
// // return ret;
|
||||
// //}));
|
||||
//}
|
||||
|
||||
documents = (await Task.WhenAll(translationTasks)).ToList();
|
||||
//documents = (await Task.WhenAll(translationTasks)).ToList();
|
||||
|
||||
string testDir = Path.Combine("txt", Path.GetRandomFileName());
|
||||
Directory.CreateDirectory(testDir);
|
||||
//string testDir = Path.Combine("txt", Path.GetRandomFileName());
|
||||
//Directory.CreateDirectory(testDir);
|
||||
|
||||
int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
|
||||
foreach (var document in documents.OrderBy((x) => x.Order)) {
|
||||
document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
|
||||
Dictionary<string, string> linkButtons = new();
|
||||
if (document.Order != 0)
|
||||
linkButtons.Add("Previous", $"{document.Order - 1}.html");
|
||||
if (document.Order != len)
|
||||
linkButtons.Add("Next", $"{document.Order + 1}.html");
|
||||
new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
|
||||
}
|
||||
//int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
|
||||
//foreach (var document in documents.OrderBy((x) => x.Order)) {
|
||||
// document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
|
||||
// Dictionary<string, string> linkButtons = new();
|
||||
// if (document.Order != 0)
|
||||
// linkButtons.Add("Previous", $"{document.Order - 1}.html");
|
||||
// if (document.Order != len)
|
||||
// linkButtons.Add("Next", $"{document.Order + 1}.html");
|
||||
// new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
|
||||
//}
|
||||
|
||||
Console.ReadKey();
|
||||
|
||||
|
||||
@@ -14,10 +14,10 @@ namespace Beam.Temporary.Cli {
|
||||
public string StateChangerKey { get; set; }
|
||||
|
||||
[JsonConstructor]
|
||||
public StateChangerFactory(string key) {
|
||||
if (!Keys.Contains(key))
|
||||
throw new ArgumentException($"{key} not in keys list", nameof(key));
|
||||
StateChangerKey = key;
|
||||
public StateChangerFactory(string stateChangerKey) {
|
||||
if (!Keys.Contains(stateChangerKey))
|
||||
throw new ArgumentException($"{stateChangerKey} not in keys list", nameof(stateChangerKey));
|
||||
StateChangerKey = stateChangerKey;
|
||||
}
|
||||
|
||||
public static Dictionary<string, Func<IStateChangeBehaviour>> FactoryTable = new() {
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public record class TableOfContentsData : ArticleData {
|
||||
/// <summary>
|
||||
/// The link collection of the actual content
|
||||
/// </summary>
|
||||
public SourceLink[]? ContentLinks { get; set; }
|
||||
/// <summary>
|
||||
/// The link collection of all the Table Of Content pages for this specific resource.
|
||||
/// </summary>
|
||||
public SourceLink[]? PagesLinks { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class Template {
|
||||
public StateChangerFactory Factory { get; set; }
|
||||
public SourceLinkBuilder Builder { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
|
||||
|
||||
using aeqw89.DataKeys;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
public class TextResource : IKeyed<TextResource> {
|
||||
public required DataKey<TextResource> Key { get; set; }
|
||||
public string? FriendlyName { get; set; }
|
||||
public DataKey<WebResource>? AssociatedSource { get; set; }
|
||||
public DataKey<WebResource>? AssociatedMetaSource { get; set; }
|
||||
public required ImmutableState TemplateInitialData { get; set; }
|
||||
public ImmutableState? MetaTemplateInitialData { get; set; }
|
||||
|
||||
public TextResourceRecord ToRecord(BeamDataDictionary sdd) {
|
||||
return new(this,
|
||||
AssociatedSource is null ? null : sdd.Aggregators[AssociatedSource],
|
||||
AssociatedMetaSource is null ? null : sdd.Auxillaries[AssociatedMetaSource]);
|
||||
}
|
||||
}
|
||||
|
||||
public record TextResourceRecord(TextResource Resource, WebResource? AssociatedSource, WebResource? AssociatedMetaSource);
|
||||
}
|
||||
@@ -13,10 +13,16 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Exports", "Beam.Export
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Tests", "Beam.Tests\Beam.Tests.csproj", "{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Puppeteer", "Beam.Puppeteer\Beam.Puppeteer.csproj", "{1A967563-D643-401D-A031-68DD43FACE8D}"
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Playwright", "Beam.Playwright\Beam.Playwright.csproj", "{1A967563-D643-401D-A031-68DD43FACE8D}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "aeqw89.Beam", "aeqw89.Beam\aeqw89.Beam.csproj", "{583236EC-0CE8-4FA3-ADA3-860405E1F16F}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Stealth", "Beam.Stealth\Beam.Stealth.csproj", "{81E3F6F2-AE16-43DB-93FC-8FDFE14ACA83}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Fluent", "Beam.Fluent\Beam.Fluent.csproj", "{0EFE0D86-2809-426A-AC57-52BDCDD25D26}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Models", "Beam.Models\Beam.Models.csproj", "{DD98868D-D1CA-4B6F-AC0F-4ADCBCD6DAD1}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -51,6 +57,18 @@ Global
|
||||
{583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{583236EC-0CE8-4FA3-ADA3-860405E1F16F}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{81E3F6F2-AE16-43DB-93FC-8FDFE14ACA83}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{81E3F6F2-AE16-43DB-93FC-8FDFE14ACA83}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{81E3F6F2-AE16-43DB-93FC-8FDFE14ACA83}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{81E3F6F2-AE16-43DB-93FC-8FDFE14ACA83}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{0EFE0D86-2809-426A-AC57-52BDCDD25D26}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{0EFE0D86-2809-426A-AC57-52BDCDD25D26}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{0EFE0D86-2809-426A-AC57-52BDCDD25D26}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{0EFE0D86-2809-426A-AC57-52BDCDD25D26}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{DD98868D-D1CA-4B6F-AC0F-4ADCBCD6DAD1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{DD98868D-D1CA-4B6F-AC0F-4ADCBCD6DAD1}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{DD98868D-D1CA-4B6F-AC0F-4ADCBCD6DAD1}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{DD98868D-D1CA-4B6F-AC0F-4ADCBCD6DAD1}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
+30
-3
@@ -5,6 +5,7 @@ using System.Linq;
|
||||
using System.Net;
|
||||
using System.Net.Http.Json;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
@@ -14,15 +15,25 @@ namespace Beam {
|
||||
public object? Body { get; }
|
||||
public string Uri { get; } = uri;
|
||||
public HttpMethod Method { get; } = method;
|
||||
public KeyValuePair<string, string[]>[] Headers { get; } = headers;
|
||||
public KeyValuePair<string, string[]>[] Headers { get; private set; } = headers;
|
||||
public HashSet<HttpStatusCode> SuccessCodes { get; } = successCodes;
|
||||
|
||||
private string? ContentType = "application/json";
|
||||
|
||||
public async Task<ApiResponse> GetResponse(ILogger<ApiResponse>? logger, (int @try, int max)? tries = null, CancellationToken ct = default) {
|
||||
logger?.LogInformation("Fetching '{}' with method '{}'", Uri, Method);
|
||||
SanitizeHeaders();
|
||||
|
||||
var request = new HttpRequestMessage(Method, Uri);
|
||||
request.Content = body is null ? request.Content : JsonContent.Create(body);
|
||||
request.Content = body is null ? request.Content :
|
||||
body is string stringBody ? new StringContent(stringBody) : JsonContent.Create(body);
|
||||
|
||||
if (request.Content is not null)
|
||||
request.Content.Headers.ContentType = ContentType is null ? null : new System.Net.Http.Headers.MediaTypeHeaderValue(ContentType);
|
||||
|
||||
foreach (var header in Headers)
|
||||
request.Headers.Add(header.Key, header.Value);
|
||||
|
||||
logger?.LogInformation("Fetching '{}' with method '{}', content-type '{}', and headers '{}'", Uri, Method, ContentType, JsonSerializer.Serialize(request.Headers.ToDictionary()));
|
||||
var response = await Client.SendAsync(request, ct);
|
||||
|
||||
if (tries is not null && tries?.@try < tries?.max && !SuccessCodes.Contains(response.StatusCode)) {
|
||||
@@ -33,6 +44,22 @@ namespace Beam {
|
||||
return await ApiResponse.CreateAsync(response, logger, RequestData, ct);
|
||||
}
|
||||
|
||||
private void SanitizeHeaders() {
|
||||
Dictionary<string, string[]> headers = [];
|
||||
foreach(var kvp in Headers) {
|
||||
if (kvp.Value.Length == 0)
|
||||
continue;
|
||||
|
||||
if (kvp.Key == "Content-Type") {
|
||||
ContentType = kvp.Value[0];
|
||||
} else {
|
||||
headers[kvp.Key] = kvp.Value;
|
||||
}
|
||||
}
|
||||
|
||||
Headers = headers.ToArray();
|
||||
}
|
||||
|
||||
public static async Task<ApiResponse> Get(HttpClient client, string url, ILoggerFactory factory)
|
||||
=> await new ApiCall(client, url, HttpMethod.Get, [], null, null).GetResponse(factory.CreateLogger<ApiResponse>());
|
||||
}
|
||||
|
||||
@@ -21,6 +21,14 @@ namespace Beam {
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApiCallBuilder WithUri(Uri uri) {
|
||||
return WithUri(uri.AbsoluteUri);
|
||||
}
|
||||
|
||||
public ApiCallBuilder WithUri(SourceLink uri) {
|
||||
return WithUri(uri.Link);
|
||||
}
|
||||
|
||||
public ApiCallBuilder WithRequestData(object? data) {
|
||||
Data = data;
|
||||
return this;
|
||||
@@ -59,6 +67,9 @@ namespace Beam {
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApiCallBuilder AddBearer(string value)
|
||||
=> AddHeader("Authorization", "Bearer " + value);
|
||||
|
||||
public ApiCall Build() {
|
||||
if (Uri is null)
|
||||
throw new InvalidOperationException();
|
||||
|
||||
+2
-1
@@ -3,10 +3,11 @@ using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public class ArticleData : IDocumentMetaData {
|
||||
public record class ArticleData : IDocumentMetaData {
|
||||
public string? Name { get; set; }
|
||||
public string[] Authors { get; set; } = [];
|
||||
public string? Language { get; set; }
|
||||
|
||||
+2
-2
@@ -7,9 +7,9 @@
|
||||
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.7" />
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
||||
@@ -10,7 +10,7 @@ namespace Beam {
|
||||
object last = x[^1];
|
||||
if (!int.TryParse(last.ToString(), out var number))
|
||||
throw new InvalidOperationException(S.M.StateChangeError);
|
||||
x[^1] = number + i;
|
||||
x[^1] = (number + i).ToString();
|
||||
});
|
||||
|
||||
public static IStateChangeBehaviour Constant => new ConstantStateChanger();
|
||||
@@ -29,7 +29,7 @@ namespace Beam {
|
||||
return;
|
||||
} else
|
||||
throw new InvalidOperationException(S.M.StateChangeError);
|
||||
x[n] = number + i;
|
||||
x[n] = (number + i).ToString();
|
||||
});
|
||||
|
||||
|
||||
|
||||
+13
-5
@@ -2,18 +2,26 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public readonly struct ImmutableState(object[] state) {
|
||||
readonly object[] state = state;
|
||||
public readonly struct ImmutableState {
|
||||
readonly string[] state;
|
||||
|
||||
public readonly Span<object> GetState() => state;
|
||||
[JsonConstructor]
|
||||
public ImmutableState(string[] state) {
|
||||
this.state = state ?? [];
|
||||
}
|
||||
|
||||
public string[] State => state ?? [];
|
||||
|
||||
public readonly Span<string> AsSpan() => state ?? [];
|
||||
|
||||
public readonly State Copy()
|
||||
=> new((object[])state.Clone());
|
||||
=> new((string[])(state ?? []).Clone());
|
||||
|
||||
public readonly object this[Index i] {
|
||||
public readonly string this[Index i] {
|
||||
get => state[i];
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ namespace Beam {
|
||||
private State? EndState;
|
||||
private State InitialState;
|
||||
|
||||
public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, params object[] initialState)
|
||||
public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, params string[] initialState)
|
||||
: this(builder, behaviour, new State(initialState)) { }
|
||||
public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, State initialState, State? endState = null) {
|
||||
Builder = builder;
|
||||
|
||||
@@ -37,6 +37,9 @@ namespace Beam {
|
||||
public const string StimulusMustBeInt = "Stimulus must be an integer";
|
||||
public const string StateCastException = "State cannot be cast to T";
|
||||
public const string StateChangeError = "Something went wrong while changing the state.";
|
||||
public const string RequiredArgumentMissing = "A required argument is missing";
|
||||
public const string QueryFlagIncompatibleWithAfterFlag = "The query flag is incompatible with the after flag.";
|
||||
public const string QueryParametersOnlyAtLastSegment = "Query parameters can only go in the last segment of a url";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,9 +22,6 @@ namespace Beam {
|
||||
} catch (NotSupportedException) {
|
||||
Logger?.LogWarning("Enumerator of type {} does not support resets. This may cause buggy behavior", LinksEnumerator.GetType());
|
||||
}
|
||||
|
||||
if (!LinksEnumerator.MoveNext())
|
||||
throw new ArgumentOutOfRangeException(S.M.LinksCannotBeEmpty);
|
||||
Current = default(OutType);
|
||||
GetUnitDownloader = () => getUnitDownloader(Context);
|
||||
}
|
||||
@@ -35,7 +32,7 @@ namespace Beam {
|
||||
}
|
||||
|
||||
public async ValueTask<bool> MoveNextAsync() {
|
||||
if (!LinksEnumerator.Current.HasValue)
|
||||
if (!LinksEnumerator.MoveNext())
|
||||
return false;
|
||||
|
||||
//Logger?.LogInformation("MoveNextAsync()");
|
||||
|
||||
@@ -27,7 +27,17 @@ namespace Beam {
|
||||
/// The parameter name is written both before and after the value
|
||||
/// (e.g. <c>id42id</c>).
|
||||
/// </summary>
|
||||
BeforeAndAfter = 0b11
|
||||
BeforeAndAfter = 0b11,
|
||||
|
||||
/// <summary>
|
||||
/// The parameter is optional, and is omitted if missing.
|
||||
/// </summary>
|
||||
Optional = 0b100,
|
||||
|
||||
/// <summary>
|
||||
/// The parameter is a query parameter, and should be decorated with <c>?</c> and <c>&</c>
|
||||
/// </summary>
|
||||
Query = 0b1000,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -326,10 +336,10 @@ namespace Beam {
|
||||
/// <summary>
|
||||
/// Produces a concrete <see cref="SourceLink"/> by substituting <paramref name="parameterValues"/> into the template.
|
||||
/// </summary>
|
||||
/// <param name="parameterValues">Flat array of values that will be written in the order that parameters appear when segments are enumerated left‑to‑right.</param>
|
||||
/// <param name="parameterValues">Flat array of values that will be written in the order that parameters appear when segments are enumerated left‑to‑right. Any optional parameters must still appear as null if missing.</param>
|
||||
/// <returns>The completed <see cref="SourceLink"/>.</returns>
|
||||
/// <exception cref="ArgumentOutOfRangeException">If the supplied value count does not match <see cref="GetParameterCount"/>().</exception>
|
||||
public SourceLink Build(params object[] parameterValues) {
|
||||
public SourceLink Build(params object?[] parameterValues) {
|
||||
ArgumentOutOfRangeException.ThrowIfNotEqual(parameterValues.Length, GetParameterCount());
|
||||
|
||||
StringBuilder link = new();
|
||||
@@ -341,11 +351,37 @@ namespace Beam {
|
||||
foreach (var segment in Segments) {
|
||||
link.Append('/');
|
||||
link.Append(segment.Name);
|
||||
bool startedQueryString = false;
|
||||
for (int i = 0; i < segment.Parameters.Count; i++) {
|
||||
if (parameterValues[pvC] is null)
|
||||
if (segment.Parameters[i].Position.HasFlag(Position.Optional))
|
||||
continue;
|
||||
else
|
||||
throw new ArgumentException(S.M.RequiredArgumentMissing);
|
||||
|
||||
if (segment.Parameters[i].Position.HasFlag(Position.Query) && Segments[^1] != segment)
|
||||
throw new ArgumentException(S.M.QueryParametersOnlyAtLastSegment);
|
||||
|
||||
if (segment.Parameters[i].Position.HasFlag(Position.Query))
|
||||
if (!startedQueryString) {
|
||||
link.Append('?');
|
||||
startedQueryString = true;
|
||||
} else
|
||||
link.Append('&');
|
||||
|
||||
if (segment.Parameters[i].Position.HasFlag(Position.Before))
|
||||
link.Append(segment.Parameters[i].Name);
|
||||
|
||||
if (segment.Parameters[i].Position.HasFlag(Position.Query))
|
||||
link.Append('=');
|
||||
|
||||
if (parameterValues[pvC] is not null)
|
||||
link.Append(parameterValues[pvC++]);
|
||||
else if (!segment.Parameters[i].Position.HasFlag(Position.Optional))
|
||||
throw new ArgumentException(S.M.RequiredArgumentMissing);
|
||||
|
||||
if (segment.Parameters[i].Position.HasFlag(Position.Query | Position.After))
|
||||
throw new ArgumentException(S.M.QueryFlagIncompatibleWithAfterFlag);
|
||||
|
||||
if (segment.Parameters[i].Position.HasFlag(Position.After))
|
||||
link.Append(segment.Parameters[i].Name);
|
||||
|
||||
+6
-6
@@ -5,16 +5,16 @@ using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam {
|
||||
public class State(object[] state) {
|
||||
object[] state = state;
|
||||
public class State(string[] state) {
|
||||
string[] state = state;
|
||||
|
||||
public object[] GetState() => state;
|
||||
public void SetState(object[] state) => this.state = state;
|
||||
public string[] GetState() => state;
|
||||
public void SetState(string[] state) => this.state = state;
|
||||
|
||||
public State Copy()
|
||||
=> new((object[])state.Clone());
|
||||
=> new((string[])state.Clone());
|
||||
|
||||
public object this[Index i] {
|
||||
public string this[Index i] {
|
||||
get => state[i];
|
||||
set => state[i] = value;
|
||||
}
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
namespace aeqw89.Beam {
|
||||
public class Class1 {
|
||||
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,7 @@
|
||||
<Title>Beam</Title>
|
||||
<Authors>aeqw89</Authors>
|
||||
<Company>qwsdcvghyu</Company>
|
||||
<Version>1.3.6</Version>
|
||||
<Version>2.0.2</Version>
|
||||
<Description>A library for downloading internet resources</Description>
|
||||
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
|
||||
@@ -20,7 +20,10 @@
|
||||
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Puppeteer\Beam.Puppeteer.csproj">
|
||||
<ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Stealth\Beam.Stealth.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\Beam.Temporary.Cli\Beam.Temporary.Cli.csproj">
|
||||
@@ -29,7 +32,7 @@
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1">
|
||||
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
@@ -47,7 +50,11 @@
|
||||
<PackagePath>lib\$(TargetFramework)\</PackagePath>
|
||||
<Pack>true</Pack>
|
||||
</Content>
|
||||
<Content Include="..\Beam.Puppeteer\bin\$(Configuration)\$(TargetFramework)\Beam.Puppeteer.dll">
|
||||
<Content Include="..\Beam.Playwright\bin\$(Configuration)\$(TargetFramework)\Beam.Playwright.dll">
|
||||
<PackagePath>lib\$(TargetFramework)\</PackagePath>
|
||||
<Pack>true</Pack>
|
||||
</Content>
|
||||
<Content Include="..\Beam.Stealth\bin\$(Configuration)\$(TargetFramework)\Beam.Stealth.dll">
|
||||
<PackagePath>lib\$(TargetFramework)\</PackagePath>
|
||||
<Pack>true</Pack>
|
||||
</Content>
|
||||
@@ -65,7 +72,7 @@
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1">
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.7">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
@@ -77,7 +84,7 @@
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0">
|
||||
<PackageReference Include="aeqw89.PersistentData" Version="1.1.0">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
@@ -95,13 +102,13 @@
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1">
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.7">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1">
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.7">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
@@ -118,4 +125,10 @@
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Selenium.WebDriver" Version="4.34.0">
|
||||
<PrivateAssets />
|
||||
<Transitive>true</Transitive>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
Reference in New Issue
Block a user