Compare commits

20 Commits
Author SHA1 Message Date
qwsdcvghyu89 08447ca8dd version bump: 2.9.0 -> 2.9.1 2026-04-03 12:02:31 +11:00
qwsdcvghyu89 0c0c11b278 fix: use ResponseHeadersRead for streaming-friendly HTTP responses 2026-04-03 12:01:46 +11:00
qwsdcvghyu89 2965270928 feat: add deferred response buffering, TableDataProvider, and stealth improvements
- ApiResponse: add readToBuffer option to defer/stream body instead of eagerly buffering
- TableDataProvider: implement HTML table parser with per-column provider support
- StealthConfig: add 10s page load timeout and copyCookiesFrom parameter for cookie sharing
- StealthUnitDownloader: catch WebDriverTimeoutException on navigation, log warning instead of throwing
- Bump version to 2.9.0
2026-04-03 11:51:19 +11:00
qwsdcvghyu89 cf75d4a5d5 feat: add deferred response buffering, TableDataProvider, and stealth improvements
- ApiResponse: add readToBuffer option to defer/stream body instead of eagerly buffering
- TableDataProvider: implement HTML table parser with per-column provider support
- StealthConfig: add 10s page load timeout and copyCookiesFrom parameter for cookie sharing
- StealthUnitDownloader: catch WebDriverTimeoutException on navigation, log warning instead of throwing
- Bump version to 2.9.0
2026-04-03 11:51:12 +11:00
qwsdcvghyu89 b16d17631e Add IDE configs, update Beam version, and enhance RelationalDataProvider
Added JetBrains Rider IDE configuration files and a backup for Beam.Api.csproj. Updated aeqw89.Beam project version to 2.7.0 and package references, including Selenium.WebDriver and System.IO.Hashing. Enhanced RelationalDataProvider to support NextSibling and PreviousSibling relations and configurable traversal distance.
2025-11-23 01:47:53 +11:00
qwsdcvghyu89 580ceb8c3c Add FollowRedirects option to downloader
Introduces a FollowRedirects property to UnitDownloaderOptions and its builder, allowing control over HTTP redirect behavior. Updates UnitDownloader to use this option, following redirects when enabled and reporting progress accordingly.
2025-11-16 01:11:22 +11:00
qwsdcvghyu89 6f37d217db Add Addon record and support for utility addons
Introduces the Addon record to represent browser addons and updates StealthConfig to support loading multiple utility addons per browser. The Firefox driver now installs specified addons from the UtilityAddons array, improving extensibility for browser automation.
2025-11-16 00:37:17 +11:00
qwsdcvghyu89 a20d48ef02 Add uBlock extension support for Firefox driver
Upgrades Selenium.WebDriver to 4.38.0 and adds logic to automatically install the uBlock extension for FirefoxDriver instances. The uBlock extension file is now included in the project and set to copy to output. Warnings are logged if the extension fails to load.
2025-11-16 00:26:56 +11:00
qwsdcvghyu89 f52aa6123b Refactor downloaders to use ByteDocument and add options builders
Replaces generic RawType with ByteDocument in downloaders and context classes, simplifying type usage. Adds builder classes for FailurePredicateOptions, FragmentOptions, SkipPredicateOptions, and UnitDownloaderOptions to improve configuration flexibility. Introduces DownloadTarget enum and SkipPredicate delegate for more granular download control. Refactors Fluent API interfaces and implementations to remove RawType generics and streamline usage. Adds Playwright and Stealth download strategies for extensibility.
2025-11-15 22:51:46 +11:00
qwsdcvghyu89 647b2b0f37 feat: introduce new composable data providers and increment version
- Added `AnchorDataProvider`, `AnchorCollectionDataProvider`, `ContentsDataProvider`, `ContentsArrayDataProvider`, `DropDownDataProvider`, `ListContentDataProvider`, and `ParagraphedContentDataProvider` for enhanced data extraction flexibility.
- Updated project version to 2.5.0.
2025-11-15 20:51:18 +11:00
qwsdcvghyu89 b5faf58b1a feat: add support for remote WebDriver and improve StealthConfig browser logic
- Added `RemoteAddress` property to `StealthConfig` for remote WebDriver support.
- Refactored browser driver creation logic with `DriverDefinition` for enhanced consistency.
- Improved error handling in browser fallback mechanism.
- Incremented project version to 2.4.6.
2025-11-14 04:36:03 +11:00
qwsdcvghyu89 76cf78006b fix: add missing break in StealthConfig browser driver fallback logic 2025-11-14 04:08:34 +11:00
qwsdcvghyu89 18c5ad83da Refactor data providers and update abstractions
- Removed obsolete data providers: `AnchorCollectionDataProvider`, `ContentsDataProvider`, and others, consolidating logic into new composable providers.
- Added `ComposeDataProviders`, `SelectDataProvider`, and `RelationalDataProvider` for improved flexibility and reusability.
- Introduced `IManySelectionComposableDataProvider` interface to support multiple-node selection.
- Enhanced `UnitDownloader` with more robust progress tracking.
- Updated package references and project dependencies for consistency.
- Improved error handling in `StealthConfig` initialization for better fallback on browser drivers.
- Incremented project version to 2.4.5.
2025-11-14 03:41:13 +11:00
qwsdcvghyu89 2958a26e4f Refactor downloaders to use generic options and unify logic
Replaces specialized binary and HTML downloaders with a generic, options-driven UnitDownloader and UnitFragmentDownloader pattern. Introduces UnitDownloaderOptions and builder classes for flexible configuration, updates interfaces and method signatures to support progress reporting, and removes redundant binary-specific classes. Updates Playwright and Stealth downloaders to use the new generic base, and adds improved error handling and reporting. Also updates dependency versions and makes minor API consistency improvements across the Fluent and Models layers.
2025-09-29 21:27:56 +10:00
qwsdcvghyu89 8e60109f5e Add required modifiers and generalize behaviour type
Marked UrlLocation properties as required in ResourceDefinition for improved null safety. Changed OrderedLinkGenerator to use the more general IStateChangeBehaviour instead of NumberedStateChanger, increasing flexibility.
2025-09-27 15:48:14 +10:00
qwsdcvghyu89 94b6c0645c Refactor fluent download pipelines 2025-09-27 15:38:58 +10:00
qwsdcvghyu89 13c6fbaf5f save 2025-09-27 13:37:40 +10:00
qwsdcvghyu89 db9bdecea6 Overall; fixed design of IState.cs and IReadOnlyState.cs, and fixed namespaces in Beam.Abstractions to remove all references of Beam.Abstract. 2025-09-26 14:21:38 +10:00
qwsdcvghyu89 67c6a46b09 chore: update package versions and package references
- Bumped Microsoft.Extensions.Logging packages to version 9.0.9 across all projects.
- Updated aeqw89.Beam project version to 2.1.4.
- Added new transitive package references, including Microsoft.Recognizers.Text.Number, Microsoft.Playwright, EntityFramework, and others.
- Commented out or removed Beam.Temporary.Cli references.
- Enhanced package structure by rearranging content includes and cleaning up redundant package references.
2025-09-24 15:14:30 +10:00
qwsdcvghyu89 7ed05abdb8 refactor: modularize Beam into new projects and interfaces
- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders.
- Refactored existing classes into appropriate namespaces and projects.
- Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.).
- Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility.
- Removed deprecated classes like SourceLinkBuilder and StateChangerFactory.
- Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`.
- Consolidated shared logic under Beam.Abstractions.
2025-09-22 01:51:46 +10:00
203 changed files with 3978 additions and 3392 deletions
+8
View File
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="UserContentModel">
<attachedFolders />
<explicitIncludes />
<explicitExcludes />
</component>
</project>
+8
View File
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RiderProjectSettingsUpdater">
<option name="singleClickDiffPreview" value="1" />
<option name="unhandledExceptionsIgnoreList" value="1" />
<option name="vcsConfiguration" value="3" />
</component>
</project>
+6
View File
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
+278
View File
@@ -0,0 +1,278 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="AutoGeneratedRunConfigurationManager">
<projectFile profileName="Beam.Temporary.Cli">Beam.Temporary.Cli/Beam.Temporary.Cli.csproj</projectFile>
</component>
<component name="AutoImportSettings">
<option name="autoReloadType" value="SELECTIVE" />
</component>
<component name="ChangeListManager">
<list default="true" id="a7e2f92c-8039-47f7-8b93-1c7d5c5d92cc" name="Changes" comment="">
<change afterPath="$PROJECT_DIR$/Beam.Dynamic/DataProviders/TableDataProvider.cs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/.idea.Beam/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/.idea.Beam/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/Beam.Stealth/StealthConfig.cs" beforeDir="false" afterPath="$PROJECT_DIR$/Beam.Stealth/StealthConfig.cs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/Beam.Stealth/StealthUnitDownloader.cs" beforeDir="false" afterPath="$PROJECT_DIR$/Beam.Stealth/StealthUnitDownloader.cs" afterDir="false" />
<change beforePath="$PROJECT_DIR$/aeqw89.Beam/aeqw89.Beam.csproj" beforeDir="false" afterPath="$PROJECT_DIR$/aeqw89.Beam/aeqw89.Beam.csproj" afterDir="false" />
<change beforePath="$PROJECT_DIR$/aeqw89.Beam/aeqw89.Beam.csproj.bak" beforeDir="false" afterPath="$PROJECT_DIR$/aeqw89.Beam/aeqw89.Beam.csproj.bak" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
<option name="RESET_MODE" value="HARD" />
</component>
<component name="GitHubPullRequestSearchHistory">{
&quot;lastFilter&quot;: {
&quot;state&quot;: &quot;OPEN&quot;,
&quot;assignee&quot;: &quot;qwsdcvghyu89&quot;
}
}</component>
<component name="GithubPullRequestsUISettings">{
&quot;selectedUrlAndAccountId&quot;: {
&quot;url&quot;: &quot;https://github.com/qwsdcvghyu89/Beam.git&quot;,
&quot;accountId&quot;: &quot;0754c5af-63f9-496e-afca-14f2e32ea4f5&quot;
}
}</component>
<component name="HighlightingSettingsPerFile">
<setting file="file://$PROJECT_DIR$/Beam.Abstractions/IComposableDataProvider.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Abstractions/IManySelectionComposableDataProvider.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Abstractions/IRetryReport.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Downloaders/UnitDownloaderOptions.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Dynamic/BindingsCollection.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Dynamic/DataProviderJsonTypeInfoResolver.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Dynamic/DataProviders/ComposeDataProviders.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Dynamic/DataProviders/ManyAnchorsDataProvider.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Dynamic/DataProviders/ManyComposeDataProviders.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Dynamic/DataProviders/RelationalDataProvider.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Dynamic/DataProviders/SelectDataProvider.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Dynamic/DataProviders/TableDataProvider.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Exceptions/AssertionException.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Exceptions/Exceptions.Designer.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Exceptions/Exceptions.resx" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Exceptions/MapException.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Models/ResourceDefinition.cs" root0="FORCE_HIGHLIGHTING" />
<setting file="file://$PROJECT_DIR$/Beam.Stealth/StealthConfig.cs" root0="FORCE_HIGHLIGHTING" />
</component>
<component name="MetaFilesCheckinStateConfiguration" checkMetaFiles="true" />
<component name="ProblemsViewState">
<option name="selectedTabId" value="SWEA" />
</component>
<component name="ProjectColorInfo">{
&quot;associatedIndex&quot;: 6
}</component>
<component name="ProjectId" id="32zouDNHv5eijOBwT2F3u5bkOoV" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">{
&quot;keyToString&quot;: {
&quot;ASKED_SHARE_PROJECT_CONFIGURATION_FILES&quot;: &quot;true&quot;,
&quot;ModuleVcsDetector.initialDetectionPerformed&quot;: &quot;true&quot;,
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
&quot;RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager.252&quot;: &quot;true&quot;,
&quot;RunOnceActivity.git.unshallow&quot;: &quot;true&quot;,
&quot;git-widget-placeholder&quot;: &quot;master&quot;,
&quot;ignore.virus.scanning.warn.message&quot;: &quot;true&quot;,
&quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
&quot;node.js.detected.package.tslint&quot;: &quot;true&quot;,
&quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;,
&quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;,
&quot;nodejs_package_manager_path&quot;: &quot;npm&quot;,
&quot;settings.editor.selected.configurable&quot;: &quot;ml.llm.LLMConfigurable&quot;,
&quot;vue.rearranger.settings.migration&quot;: &quot;true&quot;
}
}</component>
<component name="RunManager">
<configuration name="Beam.Temporary.Cli" type="LaunchSettings" factoryName=".NET Launch Settings Profile">
<option name="LAUNCH_PROFILE_PROJECT_FILE_PATH" value="$PROJECT_DIR$/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj" />
<option name="LAUNCH_PROFILE_TFM" value="net9.0" />
<option name="LAUNCH_PROFILE_NAME" value="Beam.Temporary.Cli" />
<option name="USE_EXTERNAL_CONSOLE" value="0" />
<option name="USE_MONO" value="0" />
<option name="RUNTIME_ARGUMENTS" value="" />
<option name="GENERATE_APPLICATIONHOST_CONFIG" value="1" />
<option name="SHOW_IIS_EXPRESS_OUTPUT" value="0" />
<option name="SEND_DEBUG_REQUEST" value="1" />
<option name="ADDITIONAL_IIS_EXPRESS_ARGUMENTS" value="" />
<option name="AUTO_ATTACH_CHILDREN" value="0" />
<method v="2">
<option name="Build" />
</method>
</configuration>
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="a7e2f92c-8039-47f7-8b93-1c7d5c5d92cc" name="Changes" comment="" />
<created>1758432983325</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1758432983325</updated>
<workItem from="1758432984690" duration="385000" />
<workItem from="1758439575700" duration="5235000" />
<workItem from="1758449822773" duration="14000" />
<workItem from="1758449873563" duration="12356000" />
<workItem from="1758468407188" duration="4443000" />
<workItem from="1758685019895" duration="5308000" />
<workItem from="1758862226031" duration="725000" />
<workItem from="1758863439326" duration="1581000" />
<workItem from="1758880679847" duration="8763000" />
<workItem from="1758951020923" duration="30109000" />
<workItem from="1759184656833" duration="437000" />
<workItem from="1759185101301" duration="3766000" />
<workItem from="1759482080989" duration="11241000" />
<workItem from="1759845817258" duration="647000" />
<workItem from="1760684473926" duration="4000" />
<workItem from="1763031614537" duration="17900000" />
<workItem from="1763822547278" duration="421000" />
<workItem from="1763824632550" duration="6009000" />
</task>
<task id="LOCAL-00001" summary="refactor: modularize Beam into new projects and interfaces&#10;&#10;- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders.&#10;- Refactored existing classes into appropriate namespaces and projects.&#10;- Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.).&#10;- Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility.&#10;- Removed deprecated classes like SourceLinkBuilder and StateChangerFactory.&#10;- Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`. &#10;- Consolidated shared logic under Beam.Abstractions.">
<option name="closed" value="true" />
<created>1758469918944</created>
<option name="number" value="00001" />
<option name="presentableId" value="LOCAL-00001" />
<option name="project" value="LOCAL" />
<updated>1758469918944</updated>
</task>
<task id="LOCAL-00002" summary="chore: update package versions and package references&#10;&#10;- Bumped Microsoft.Extensions.Logging packages to version 9.0.9 across all projects.&#10;- Updated aeqw89.Beam project version to 2.1.4.&#10;- Added new transitive package references, including Microsoft.Recognizers.Text.Number, Microsoft.Playwright, EntityFramework, and others.&#10;- Commented out or removed Beam.Temporary.Cli references.&#10;- Enhanced package structure by rearranging content includes and cleaning up redundant package references.">
<option name="closed" value="true" />
<created>1758690873724</created>
<option name="number" value="00002" />
<option name="presentableId" value="LOCAL-00002" />
<option name="project" value="LOCAL" />
<updated>1758690873724</updated>
</task>
<task id="LOCAL-00003" summary="Overall; fixed design of IState.cs and IReadOnlyState.cs, and fixed namespaces in Beam.Abstractions to remove all references of Beam.Abstract.">
<option name="closed" value="true" />
<created>1758860502921</created>
<option name="number" value="00003" />
<option name="presentableId" value="LOCAL-00003" />
<option name="project" value="LOCAL" />
<updated>1758860502921</updated>
</task>
<task id="LOCAL-00004" summary="save">
<option name="closed" value="true" />
<created>1758944260267</created>
<option name="number" value="00004" />
<option name="presentableId" value="LOCAL-00004" />
<option name="project" value="LOCAL" />
<updated>1758944260267</updated>
</task>
<task id="LOCAL-00005" summary="Refactor data providers and update abstractions&#10;&#10;- Removed obsolete data providers: `AnchorCollectionDataProvider`, `ContentsDataProvider`, and others, consolidating logic into new composable providers.&#10;- Added `ComposeDataProviders`, `SelectDataProvider`, and `RelationalDataProvider` for improved flexibility and reusability.&#10;- Introduced `IManySelectionComposableDataProvider` interface to support multiple-node selection.&#10;- Enhanced `UnitDownloader` with more robust progress tracking.&#10;- Updated package references and project dependencies for consistency.&#10;- Improved error handling in `StealthConfig` initialization for better fallback on browser drivers.&#10;- Incremented project version to 2.4.5.">
<option name="closed" value="true" />
<created>1763052080925</created>
<option name="number" value="00005" />
<option name="presentableId" value="LOCAL-00005" />
<option name="project" value="LOCAL" />
<updated>1763052080925</updated>
</task>
<task id="LOCAL-00006" summary="fix: add missing break in StealthConfig browser driver fallback logic">
<option name="closed" value="true" />
<created>1763053716098</created>
<option name="number" value="00006" />
<option name="presentableId" value="LOCAL-00006" />
<option name="project" value="LOCAL" />
<updated>1763053716098</updated>
</task>
<task id="LOCAL-00007" summary="feat: add support for remote WebDriver and improve StealthConfig browser logic&#10;&#10;- Added `RemoteAddress` property to `StealthConfig` for remote WebDriver support.&#10;- Refactored browser driver creation logic with `DriverDefinition` for enhanced consistency.&#10;- Improved error handling in browser fallback mechanism.&#10;- Incremented project version to 2.4.6.">
<option name="closed" value="true" />
<created>1763055366014</created>
<option name="number" value="00007" />
<option name="presentableId" value="LOCAL-00007" />
<option name="project" value="LOCAL" />
<updated>1763055366014</updated>
</task>
<task id="LOCAL-00008" summary="feat: introduce new composable data providers and increment version&#10;&#10;- Added `AnchorDataProvider`, `AnchorCollectionDataProvider`, `ContentsDataProvider`, `ContentsArrayDataProvider`, `DropDownDataProvider`, `ListContentDataProvider`, and `ParagraphedContentDataProvider` for enhanced data extraction flexibility.&#10;- Updated project version to 2.5.0.">
<option name="closed" value="true" />
<created>1763200281188</created>
<option name="number" value="00008" />
<option name="presentableId" value="LOCAL-00008" />
<option name="project" value="LOCAL" />
<updated>1763200281188</updated>
</task>
<option name="localTasksCounter" value="9" />
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="3" />
</component>
<component name="UnityCheckinConfiguration" checkUnsavedScenes="true" />
<component name="UnityProjectConfiguration" hasMinimizedUI="false" />
<component name="Vcs.Log.Tabs.Properties">
<option name="RECENT_FILTERS">
<map>
<entry key="Branch">
<value>
<list>
<RecentGroup>
<option name="FILTER_VALUES">
<option value="origin/dev" />
</option>
</RecentGroup>
</list>
</value>
</entry>
</map>
</option>
<option name="TAB_STATES">
<map>
<entry key="MAIN">
<value>
<State>
<option name="FILTERS">
<map>
<entry key="branch">
<value>
<list>
<option value="origin/dev" />
</list>
</value>
</entry>
</map>
</option>
</State>
</value>
</entry>
</map>
</option>
</component>
<component name="VcsManagerConfiguration">
<option name="CLEAR_INITIAL_COMMIT_MESSAGE" value="true" />
<MESSAGE value="refactor: modularize Beam into new projects and interfaces&#10;&#10;- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders.&#10;- Refactored existing classes into appropriate namespaces and projects.&#10;- Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.).&#10;- Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility.&#10;- Removed deprecated classes like SourceLinkBuilder and StateChangerFactory.&#10;- Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`. &#10;- Consolidated shared logic under Beam.Abstractions." />
<MESSAGE value="chore: update package versions and package references&#10;&#10;- Bumped Microsoft.Extensions.Logging packages to version 9.0.9 across all projects.&#10;- Updated aeqw89.Beam project version to 2.1.4.&#10;- Added new transitive package references, including Microsoft.Recognizers.Text.Number, Microsoft.Playwright, EntityFramework, and others.&#10;- Commented out or removed Beam.Temporary.Cli references.&#10;- Enhanced package structure by rearranging content includes and cleaning up redundant package references." />
<MESSAGE value="Overall; fixed design of IState.cs and IReadOnlyState.cs, and fixed namespaces in Beam.Abstractions to remove all references of Beam.Abstract." />
<MESSAGE value="save" />
<MESSAGE value="Refactor fluent download pipelines" />
<MESSAGE value="Refactor data providers and update abstractions&#10;&#10;- Removed obsolete data providers: `AnchorCollectionDataProvider`, `ContentsDataProvider`, and others, consolidating logic into new composable providers.&#10;- Added `ComposeDataProviders`, `SelectDataProvider`, and `RelationalDataProvider` for improved flexibility and reusability.&#10;- Introduced `IManySelectionComposableDataProvider` interface to support multiple-node selection.&#10;- Enhanced `UnitDownloader` with more robust progress tracking.&#10;- Updated package references and project dependencies for consistency.&#10;- Improved error handling in `StealthConfig` initialization for better fallback on browser drivers.&#10;- Incremented project version to 2.4.5." />
<MESSAGE value="fix: add missing break in StealthConfig browser driver fallback logic" />
<MESSAGE value="feat: add support for remote WebDriver and improve StealthConfig browser logic&#10;&#10;- Added `RemoteAddress` property to `StealthConfig` for remote WebDriver support.&#10;- Refactored browser driver creation logic with `DriverDefinition` for enhanced consistency.&#10;- Improved error handling in browser fallback mechanism.&#10;- Incremented project version to 2.4.6." />
<MESSAGE value="feat: introduce new composable data providers and increment version&#10;&#10;- Added `AnchorDataProvider`, `AnchorCollectionDataProvider`, `ContentsDataProvider`, `ContentsArrayDataProvider`, `DropDownDataProvider`, `ListContentDataProvider`, and `ParagraphedContentDataProvider` for enhanced data extraction flexibility.&#10;- Updated project version to 2.5.0." />
<option name="LAST_COMMIT_MESSAGE" value="feat: introduce new composable data providers and increment version&#10;&#10;- Added `AnchorDataProvider`, `AnchorCollectionDataProvider`, `ContentsDataProvider`, `ContentsArrayDataProvider`, `DropDownDataProvider`, `ListContentDataProvider`, and `ParagraphedContentDataProvider` for enhanced data extraction flexibility.&#10;- Updated project version to 2.5.0." />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<breakpoints>
<breakpoint enabled="true" type="DotNet_Exception_Breakpoints">
<properties exception="System.OperationCanceledException" breakIfHandledByOtherCode="false" displayValue="System.OperationCanceledException" />
<option name="timeStamp" value="1" />
</breakpoint>
<breakpoint enabled="true" type="DotNet_Exception_Breakpoints">
<properties exception="System.Threading.Tasks.TaskCanceledException" breakIfHandledByOtherCode="false" displayValue="System.Threading.Tasks.TaskCanceledException" />
<option name="timeStamp" value="2" />
</breakpoint>
<breakpoint enabled="true" type="DotNet_Exception_Breakpoints">
<properties exception="System.Threading.ThreadAbortException" breakIfHandledByOtherCode="false" displayValue="System.Threading.ThreadAbortException" />
<option name="timeStamp" value="3" />
</breakpoint>
</breakpoints>
</breakpoint-manager>
</component>
<component name="XSLT-Support.FileAssociations.UIState">
<expand />
<select />
</component>
</project>
@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>Beam.Abstract</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="aeqw89.DataKeys" Version="2.1.1" />
<PackageReference Include="aeqw89.PersistentData" Version="1.4.5" />
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
</ItemGroup>
</Project>
+10
View File
@@ -0,0 +1,10 @@
namespace Beam.Abstractions;
public interface IArticleData : IDocumentMetaData, IEquatable<IArticleData?> {
string? Name { get; set; }
string[] Authors { get; set; }
string? Language { get; set; }
string[] Categories { get; set; }
string? Version { get; set; }
string? Description { get; set; }
}
@@ -0,0 +1,12 @@
using HtmlAgilityPack;
namespace Beam.Abstractions;
public interface IComposableDataProvider : IComposableDataProvider<object> { }
public interface IComposableDataProvider<out T> : IDataProvider<T> {
public T Get(HtmlNode node);
public HtmlNode? Select(HtmlDocument doc);
public HtmlNode? Select(HtmlNode node);
}
+25
View File
@@ -0,0 +1,25 @@
namespace Beam.Abstractions;
public interface IDataBindings {
IDataProvider<string>? Title { get; set; }
IDataProvider<string[]>? Authors { get; set; }
IDataProvider<string>? Description { get; set; }
IDataProvider<string>? Content { get; set; }
IDataProvider<string[]>? Language { get; set; }
IDataProvider<string[]>? Tags { get; set; }
IDataProvider<string>? Publisher { get; set; }
IDataProvider<DateTimeOffset>? PublicationDate { get; set; }
IDataProvider<string>? ISBN { get; set; }
IDataProvider<int>? PageCount { get; set; }
IDataProvider<string>? CoverImage { get; set; }
IDataProvider<string[]>? Series { get; set; }
IDataProvider<int>? Edition { get; set; }
IDataProvider<string[]>? Contributors { get; set; }
IDataProvider<string[]>? Subjects { get; set; }
IDataProvider<string>? Rights { get; set; }
IDataProvider<string[]>? TableOfContents { get; set; }
IDataProvider<string[]>? PagesDropDown { get; set; }
IDataProvider<string>? NextPageButton { get; set; }
IDataProvider<string>? PreviousPageButton { get; set; }
Dictionary<string, IDataProvider?> Providers { get; set; }
}
+13
View File
@@ -0,0 +1,13 @@
using HtmlAgilityPack;
namespace Beam.Abstractions;
public interface IDataProvider {
public string GetString(HtmlDocument document)
=> (this as IDataProvider<object>)?.Get(document)?.ToString() ?? "";
}
public interface IDataProvider<out T> : IDataProvider {
public T Get(HtmlDocument document);
//public HtmlNode? GetNode(HtmlDocument document);
}
+28
View File
@@ -0,0 +1,28 @@
using aeqw89.DataKeys;
namespace Beam.Abstractions;
public interface IDocument {
/// <summary>
/// The file name of the document. Must be valid in both <c>UNIX</c>,
/// <c>WINDOWS</c>, <c>APPLE</c>, and <c>ANDROID</c> file systems.
/// </summary>
string Filename { get; }
/// <summary>
/// Additional descriptive data
/// </summary>
IDictionary<IDataKey<IDocumentMetaData>, IDocumentMetaData> MetaData { get; }
/// <summary>
/// Retrieves the binary representation for the <see cref="IDocument"/>
/// </summary>
/// <returns>Binary representation of the <see cref="IDocument"/></returns>
byte[] ToBytes();
/// <summary>
/// Retrieves the string representation for the <see cref="IDocument"/>
/// </summary>
/// <returns>String representation of the <see cref="IDocument"/></returns>
string ToString();
}
+7
View File
@@ -0,0 +1,7 @@
using System.Text.Json;
namespace Beam.Abstractions;
public interface IDocumentMetaData {
string AsJson(JsonSerializerOptions? options = null);
}
+6
View File
@@ -0,0 +1,6 @@
namespace Beam.Abstractions;
public interface IDownloadReport {
long BytesDownloaded { get; init; }
long? BytesRemaining { get; init; }
}
+9
View File
@@ -0,0 +1,9 @@
namespace Beam.Abstractions;
public interface ILinkBuilder {
/// <summary>
/// Produces a concrete <see cref="SourceLink"/> using values from an external <see cref="State"/> object.
/// </summary>
/// <param name="parameterValues">Object providing positional values.</param>
string Build(IReadOnlyState parameterValues);
}
@@ -0,0 +1,9 @@
using HtmlAgilityPack;
namespace Beam.Abstractions;
public interface IManySelectionComposableDataProvider<out T> : IDataProvider<T> {
public T ManyGet(HtmlNode[] node);
public HtmlNode[]? SelectMany(HtmlDocument doc);
public HtmlNode[]? SelectMany(HtmlNode[] node);
}
+6
View File
@@ -0,0 +1,6 @@
namespace Beam.Abstractions;
public interface IOrdered<out T> {
T Data { get; }
int Order { get; }
}
+6
View File
@@ -0,0 +1,6 @@
namespace Beam.Abstractions;
public interface IReadOnlyState {
public ReadOnlySpan<string> GetState();
IState Copy();
}
+3
View File
@@ -0,0 +1,3 @@
namespace Beam.Abstractions;
public interface IResourceDictionary { }
+6
View File
@@ -0,0 +1,6 @@
namespace Beam.Abstractions;
public interface IRetryReport {
int TryNumber { get; }
string Link { get; }
}
+9
View File
@@ -0,0 +1,9 @@
namespace Beam.Abstractions;
public interface IState {
string[] GetState();
void SetState(string[] state);
new IState Copy();
IReadOnlyState AsReadOnly();
}
@@ -0,0 +1,8 @@
namespace Beam.Abstractions;
/// <summary>
/// Defines how a url template should should be updated, in what order, and by how much
/// </summary>
public interface IStateChangeBehaviour {
public void Apply(IState state, object stimulus);
}
@@ -0,0 +1,5 @@
namespace Beam.Abstractions;
public interface IStateChangerFactory {
IStateChangeBehaviour Behavior { get; }
}
+8
View File
@@ -0,0 +1,8 @@
using aeqw89.DataKeys;
namespace Beam.Abstractions;
public interface ITemplate : IKeyed<ITemplate> {
IStateChangerFactory Factory { get; set; }
ILinkBuilder Builder { get; set; }
}
+7
View File
@@ -0,0 +1,7 @@
namespace Beam.Abstractions;
public interface IUnitDownloader<T> {
public int LinksPerDownload { get; }
public Task<(bool, T?)> TryDownload(IOrdered<string>[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress<IDownloadReport>? downProgress = null, IProgress<IRetryReport>? tryProgress = null);
}
+8 -6
View File
@@ -3,12 +3,14 @@ using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net; using System.Net;
using System.Net.Http;
using System.Net.Http.Json; using System.Net.Http.Json;
using System.Text; using System.Text;
using System.Text.Json; using System.Text.Json;
using System.Threading;
using System.Threading.Tasks; using System.Threading.Tasks;
namespace Beam { namespace Beam.Api;
public class ApiCall(HttpClient client, string uri, HttpMethod method, KeyValuePair<string, string[]>[] headers, object? requestData, object? body, params HashSet<HttpStatusCode> successCodes) { public class ApiCall(HttpClient client, string uri, HttpMethod method, KeyValuePair<string, string[]>[] headers, object? requestData, object? body, params HashSet<HttpStatusCode> successCodes) {
public HttpClient Client { get; } = client; public HttpClient Client { get; } = client;
public object? RequestData { get; } = requestData; public object? RequestData { get; } = requestData;
@@ -20,7 +22,7 @@ namespace Beam {
private string? ContentType = "application/json"; private string? ContentType = "application/json";
public async Task<ApiResponse> GetResponse(ILogger<ApiResponse>? logger, (int @try, int max)? tries = null, CancellationToken ct = default) { public async Task<ApiResponse> GetResponse(ILogger<ApiResponse>? logger, (int @try, int max)? tries = null, bool readToBuffer = true, CancellationToken ct = default) {
SanitizeHeaders(); SanitizeHeaders();
var request = new HttpRequestMessage(Method, Uri); var request = new HttpRequestMessage(Method, Uri);
@@ -33,15 +35,16 @@ namespace Beam {
foreach (var header in Headers) foreach (var header in Headers)
request.Headers.Add(header.Key, header.Value); request.Headers.Add(header.Key, header.Value);
logger?.LogInformation("Fetching '{}' with method '{}', content-type '{}', and headers '{}'", Uri, Method, ContentType, JsonSerializer.Serialize(request.Headers.ToDictionary())); logger?.LogInformation("Fetching '{}' with method '{}', content-type '{}', and headers '{}'", Uri, Method, ContentType, JsonSerializer.Serialize(request.Headers.ToDictionary()));
var response = await Client.SendAsync(request, ct); var response = await Client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, ct);
if (tries is not null && tries?.@try < tries?.max && !SuccessCodes.Contains(response.StatusCode)) { if (tries is not null && tries?.@try < tries?.max && !SuccessCodes.Contains(response.StatusCode)) {
await Task.Delay((int)Math.Min(Math.Pow(2, tries.Value.@try), 60) * 1000, ct); await Task.Delay((int)Math.Min(Math.Pow(2, tries.Value.@try), 60) * 1000, ct);
return await GetResponse(logger, (tries.Value.@try + 1, tries.Value.max), ct); return await GetResponse(logger, (tries.Value.@try + 1, tries.Value.max), readToBuffer, ct);
} }
return await ApiResponse.CreateAsync(response, logger, RequestData, ct); return await ApiResponse.CreateAsync(response, logger, RequestData, readToBuffer, ct);
} }
private void SanitizeHeaders() { private void SanitizeHeaders() {
@@ -63,4 +66,3 @@ namespace Beam {
public static async Task<ApiResponse> Get(HttpClient client, string url, ILoggerFactory factory) public static async Task<ApiResponse> Get(HttpClient client, string url, ILoggerFactory factory)
=> await new ApiCall(client, url, HttpMethod.Get, [], null, null).GetResponse(factory.CreateLogger<ApiResponse>()); => await new ApiCall(client, url, HttpMethod.Get, [], null, null).GetResponse(factory.CreateLogger<ApiResponse>());
} }
}
@@ -2,11 +2,12 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net; using System.Net;
using System.Net.Http;
using System.Reflection.PortableExecutable; using System.Reflection.PortableExecutable;
using System.Text; using System.Text;
using System.Threading.Tasks; using System.Threading.Tasks;
namespace Beam { namespace Beam.Api;
public class ApiCallBuilder(HttpClient client) { public class ApiCallBuilder(HttpClient client) {
HttpClient Client = client; HttpClient Client = client;
string Uri; string Uri;
@@ -25,10 +26,6 @@ namespace Beam {
return WithUri(uri.AbsoluteUri); return WithUri(uri.AbsoluteUri);
} }
public ApiCallBuilder WithUri(SourceLink uri) {
return WithUri(uri.Link);
}
public ApiCallBuilder WithRequestData(object? data) { public ApiCallBuilder WithRequestData(object? data) {
Data = data; Data = data;
return this; return this;
@@ -81,4 +78,4 @@ namespace Beam {
return new ApiCall(Client, Uri, Method, Headers.Select((x) => new KeyValuePair<string, string[]>(x.Key, x.Value.ToArray())).ToArray(), Data, Body, SuccessCodes); return new ApiCall(Client, Uri, Method, Headers.Select((x) => new KeyValuePair<string, string[]>(x.Key, x.Value.ToArray())).ToArray(), Data, Body, SuccessCodes);
} }
} }
}
+10 -4
View File
@@ -1,9 +1,15 @@
// ApiCalls.cs // ApiCalls.cs
using System;
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Net; using System.Net;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
namespace Beam { namespace Beam.Api;
/// <summary> /// <summary>
/// Executes a batch of <see cref="ApiCall"/>s using either sequential or parallel strategy. /// Executes a batch of <see cref="ApiCall"/>s using either sequential or parallel strategy.
/// </summary> /// </summary>
@@ -27,7 +33,7 @@ namespace Beam {
// sequential // sequential
var sequential = new List<ApiResponse>(_calls.Count); var sequential = new List<ApiResponse>(_calls.Count);
foreach (var call in _calls) foreach (var call in _calls)
sequential.Add(await call.GetResponse(logger, tries, ct)); sequential.Add(await call.GetResponse(logger, tries, true, ct));
return sequential; return sequential;
} }
@@ -37,7 +43,7 @@ namespace Beam {
_calls.Select((c, i) => (call: c, idx: i)), _calls.Select((c, i) => (call: c, idx: i)),
new ParallelOptions { MaxDegreeOfParallelism = _maxDegree, CancellationToken = ct }, new ParallelOptions { MaxDegreeOfParallelism = _maxDegree, CancellationToken = ct },
async (item, token) => { async (item, token) => {
var response = await item.call.GetResponse(logger, tries, token); var response = await item.call.GetResponse(logger, tries, true, token);
bag.Add((item.idx, response)); bag.Add((item.idx, response));
}); });
@@ -45,4 +51,4 @@ namespace Beam {
return bag.OrderBy(x => x.idx).Select(x => x.res).ToList(); return bag.OrderBy(x => x.idx).Select(x => x.res).ToList();
} }
} }
}
@@ -1,7 +1,10 @@
// ApiCallsBuilder.cs // ApiCallsBuilder.cs
using System;
using System.Collections.Generic;
using System.Net; using System.Net;
namespace Beam { namespace Beam.Api;
/// <summary> /// <summary>
/// Fluent builder for <see cref="ApiCalls"/>. /// Fluent builder for <see cref="ApiCalls"/>.
/// </summary> /// </summary>
@@ -43,5 +46,4 @@ namespace Beam {
throw new InvalidOperationException("At least one ApiCall is required."); throw new InvalidOperationException("At least one ApiCall is required.");
return new ApiCalls(_calls, _parallelism); return new ApiCalls(_calls, _parallelism);
} }
} }
}
+43 -13
View File
@@ -1,19 +1,26 @@
using Microsoft.Extensions.Logging; using System;
using System.IO;
using Microsoft.Extensions.Logging;
using System.Net; using System.Net;
using System.Net.Http;
using System.Net.Http.Json; using System.Net.Http.Json;
using System.Text; using System.Text;
using System.Text.Json; using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
namespace Beam { namespace Beam.Api;
/// <summary> /// <summary>
/// Wrapper that lets the response body be read any number of times (even concurrently). /// Wrapper that lets the response body be read any number of times (even concurrently).
/// </summary> /// </summary>
public sealed class ApiResponse { public sealed class ApiResponse {
private readonly byte[] _buffer; private byte[] _buffer;
private bool _read_has_been_deferred;
private ApiResponse(HttpResponseMessage response, byte[] buffer, ILogger<ApiResponse>? logger, object? requestData = null) { private ApiResponse(HttpResponseMessage response, byte[] buffer, ILogger<ApiResponse>? logger, object? requestData = null) {
Response = response; Response = response;
_buffer = buffer; _buffer = buffer;
_read_has_been_deferred = _buffer.Length == 0;
Logger = logger; Logger = logger;
RequestData = requestData; RequestData = requestData;
} }
@@ -28,8 +35,10 @@ namespace Beam {
HttpResponseMessage response, HttpResponseMessage response,
ILogger<ApiResponse>? logger = null, ILogger<ApiResponse>? logger = null,
object? requestData = null, object? requestData = null,
bool readToBuffer = true,
CancellationToken ct = default) { CancellationToken ct = default) {
if (response is null) throw new ArgumentNullException(nameof(response)); if (response is null) throw new ArgumentNullException(nameof(response));
if (!readToBuffer) return new ApiResponse(response, [], logger, requestData);
var buffer = response.Content is null var buffer = response.Content is null
? [] ? []
@@ -50,33 +59,54 @@ namespace Beam {
if (!Is200) errorHandler(Response.StatusCode); if (!Is200) errorHandler(Response.StatusCode);
return this; return this;
} }
/* ---------- content helpers ---------- */ /* ---------- content helpers ---------- */
public Task<T?> AsSerializedObject<T>(CancellationToken ct = default) { private async Task ReadToBuffer(CancellationToken ct = default) {
if (!_read_has_been_deferred) return;
_buffer = Response.Content is null
? []
: await Response.Content.ReadAsByteArrayAsync(ct).ConfigureAwait(false);
_read_has_been_deferred = false;
}
public async Task<T?> AsSerializedObject<T>(CancellationToken ct = default) {
if (!Is200) throw new InvalidOperationException(); if (!Is200) throw new InvalidOperationException();
if (Response.Content?.Headers.ContentType?.MediaType != "application/json") if (Response.Content?.Headers.ContentType?.MediaType != "application/json")
Logger?.LogWarning("Content-Type is not JSON, yet JSON deserialization was requested."); Logger?.LogWarning("Content-Type is not JSON, yet JSON deserialization was requested.");
return Task.FromResult(JsonSerializer.Deserialize<T>(_buffer)); if (_read_has_been_deferred) {
return await JsonSerializer.DeserializeAsync<T>(await Response.Content!.ReadAsStreamAsync(ct), (JsonSerializerOptions?)null, ct);
} else {
return JsonSerializer.Deserialize<T>(_buffer);
}
} }
public Task<T?> AsDynamicObject<T>(T _, CancellationToken ct = default) public Task<T?> AsDynamicObject<T>(T _, CancellationToken ct = default)
=> AsSerializedObject<T>(ct); => AsSerializedObject<T>(ct);
public Task<string> AsString(CancellationToken ct = default) { public async Task<string> AsString(CancellationToken ct = default) {
if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content."); if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content.");
return Task.FromResult(Encoding.UTF8.GetString(_buffer)); if (_read_has_been_deferred) {
await ReadToBuffer(ct);
}
return Encoding.UTF8.GetString(_buffer);
} }
public Task<byte[]> AsBinary(CancellationToken ct = default) { public async Task<byte[]> AsBinary(CancellationToken ct = default) {
if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content."); if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content.");
return Task.FromResult(_buffer); if (_read_has_been_deferred) {
await ReadToBuffer(ct);
}
return _buffer;
} }
public Task<Stream> AsStream(CancellationToken ct = default) { public async Task<Stream> AsStream(CancellationToken ct = default) {
if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content."); if (!Is200) Logger?.LogWarning("Non-success response; attempting to read content.");
return Task.FromResult<Stream>(new MemoryStream(_buffer, writable: false)); if (_read_has_been_deferred) {
return await Response.Content!.ReadAsStreamAsync(ct);
} else {
return new MemoryStream(_buffer, writable: false);
}
} }
}
} }
+17
View File
@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.9" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Beam.Abstractions\Beam.Abstractions.csproj" />
</ItemGroup>
</Project>
+17
View File
@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.9" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Beam.Abstractions\Beam.Abstractions.csproj" />
</ItemGroup>
</Project>
+14
View File
@@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj" />
<ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
</ItemGroup>
</Project>
+50
View File
@@ -0,0 +1,50 @@
using aeqw89.DataKeys;
using aeqw89.PersistentData;
using Beam.Dynamic;
using Beam.Models;
namespace Beam.Data {
using BeamFile = Models.File;
public class BeamDataContext : BaseDataDictionary {
#region Tables
public Table<Template> Templates {
get => GetOrCreateTable<Template>(nameof(Templates));
set => Set(nameof(Templates), value);
}
public Table<DataBindings> Bindings {
get => GetOrCreateTable<DataBindings>(nameof(Bindings));
set => Set(nameof(Bindings), value);
}
public Table<WebResource> Resources {
get => GetOrCreateTable<WebResource>(nameof(Resources));
set => Set(nameof(Resources), value);
}
public Table<ResourceDictionary> ResourceDictionaries {
get => GetOrCreateTable<ResourceDictionary>(nameof(ResourceDictionaries));
set => Set(nameof(ResourceDictionaries), value);
}
public Table<ImmutableState> InitialStates {
get => GetOrCreateTable<ImmutableState>(nameof(InitialStates));
set => Set(nameof(InitialStates), value);
}
public Table<BeamFile> Files {
get => GetOrCreateTable<BeamFile>(nameof(Files));
set => Set(nameof(Files), value);
}
#endregion
#region Junctions
public Junction<WebResource, ResourceDictionary> WebResourceToResourceDictionaryJunction {
get => GetOrCreateJunction<WebResource, ResourceDictionary>(nameof(WebResourceToResourceDictionaryJunction));
set => Set(nameof(WebResourceToResourceDictionaryJunction), value);
}
#endregion
#region Computed
public Dictionary<DataKey<WebResource>, ResourceDictionary[]> ResourceDictionariesByNovel =>
Resources.Keys.ToDictionary(x => x,
x => WebResourceToResourceDictionaryJunction[x].Select(y => ResourceDictionaries[y]).ToArray());
#endregion
}
}
@@ -1,10 +1,9 @@
using System; using System.Text;
using System.Collections.Generic; using Beam.Abstractions;
using System.Linq; using Beam.Models;
using System.Text; using static Beam.Exceptions.Exceptions;
using System.Threading.Tasks;
namespace Beam { namespace Beam.Data {
/// <summary> /// <summary>
/// Describes where a <see cref="Parameter"/> token should be inserted relative to the runtime value /// Describes where a <see cref="Parameter"/> token should be inserted relative to the runtime value
/// that ultimately replaces it when building a <see cref="SourceLink"/>. /// that ultimately replaces it when building a <see cref="SourceLink"/>.
@@ -129,7 +128,7 @@ namespace Beam {
/// </remarks> /// </remarks>
/// <param name="host">DNS host name (e.g. <c>api.example.com</c>).</param> /// <param name="host">DNS host name (e.g. <c>api.example.com</c>).</param>
/// <param name="protocol">Transport protocol; defaults to <c>https</c>.</param> /// <param name="protocol">Transport protocol; defaults to <c>https</c>.</param>
public class SourceLinkBuilder(string host, string protocol = "https") { public class LinkBuilder(string host, string protocol = "https") : ILinkBuilder {
/// <summary> /// <summary>
/// Gets or sets the scheme part of the URL (e.g. <c>https</c>, <c>http</c>). /// Gets or sets the scheme part of the URL (e.g. <c>https</c>, <c>http</c>).
/// </summary> /// </summary>
@@ -148,8 +147,8 @@ namespace Beam {
/// <summary> /// <summary>
/// Produces a deep copy whose <see cref="Segments"/> and contained collections are detached from the original. /// Produces a deep copy whose <see cref="Segments"/> and contained collections are detached from the original.
/// </summary> /// </summary>
public SourceLinkBuilder Clone() public LinkBuilder Clone()
=> new SourceLinkBuilder(Host, Protocol) { => new LinkBuilder(Host, Protocol) {
Segments = [.. Segments.Select(static x => x.Clone())] Segments = [.. Segments.Select(static x => x.Clone())]
}; };
@@ -241,7 +240,7 @@ namespace Beam {
/// Replaces the whole <see cref="Segments"/> collection with the supplied <paramref name="segments"/>, each represented as a <see cref="LinkSegment"/>. /// Replaces the whole <see cref="Segments"/> collection with the supplied <paramref name="segments"/>, each represented as a <see cref="LinkSegment"/>.
/// </summary> /// </summary>
/// <returns>This instance for fluent calls.</returns> /// <returns>This instance for fluent calls.</returns>
public SourceLinkBuilder WithSegments(params IEnumerable<string> segments) { public LinkBuilder WithSegments(params IEnumerable<string> segments) {
Segments = segments.Select(static x => new LinkSegment(x)).ToList(); Segments = segments.Select(static x => new LinkSegment(x)).ToList();
return this; return this;
} }
@@ -250,7 +249,7 @@ namespace Beam {
/// Replaces the <see cref="Segments"/> collection with <paramref name="count"/> empty segments. /// Replaces the <see cref="Segments"/> collection with <paramref name="count"/> empty segments.
/// </summary> /// </summary>
/// <param name="count">Number of segments to create.</param> /// <param name="count">Number of segments to create.</param>
public SourceLinkBuilder WithSegments(int count) public LinkBuilder WithSegments(int count)
=> WithSegments(Enumerable.Repeat("", count)); => WithSegments(Enumerable.Repeat("", count));
#endregion #endregion
@@ -258,7 +257,7 @@ namespace Beam {
/// <summary> /// <summary>
/// Replaces parameters of the <paramref name="i"/>th segment using the supplied identifiers. /// Replaces parameters of the <paramref name="i"/>th segment using the supplied identifiers.
/// </summary> /// </summary>
public SourceLinkBuilder WithParameters(int i, params string[] parameters) { public LinkBuilder WithParameters(int i, params string[] parameters) {
Segments[i].WithParameters(parameters); Segments[i].WithParameters(parameters);
return this; return this;
} }
@@ -266,7 +265,7 @@ namespace Beam {
/// <summary> /// <summary>
/// Replaces parameters of the <paramref name="i"/>th segment using explicit name/position tuples. /// Replaces parameters of the <paramref name="i"/>th segment using explicit name/position tuples.
/// </summary> /// </summary>
public SourceLinkBuilder WithParameters(int i, params (string, Position)[] parameters) { public LinkBuilder WithParameters(int i, params (string, Position)[] parameters) {
Segments[i].WithParameters(parameters); Segments[i].WithParameters(parameters);
return this; return this;
} }
@@ -325,12 +324,15 @@ namespace Beam {
return count; return count;
} }
public string Build(IReadOnlyState state)
=> Build(state.GetState().ToArray().ToArray<object>());
#region Build #region Build
/// <summary> /// <summary>
/// Produces a concrete <see cref="SourceLink"/> using values from an external <see cref="State"/> object. /// Produces a concrete <see cref="SourceLink"/> using values from an external <see cref="State"/> object.
/// </summary> /// </summary>
/// <param name="parameterValues">Object providing positional values.</param> /// <param name="parameterValues">Object providing positional values.</param>
public SourceLink Build(State parameterValues) public string Build(State parameterValues)
=> Build(parameterValues.GetState()); => Build(parameterValues.GetState());
/// <summary> /// <summary>
@@ -339,7 +341,7 @@ namespace Beam {
/// <param name="parameterValues">Flat array of values that will be written in the order that parameters appear when segments are enumerated lefttoright. Any optional parameters must still appear as null if missing.</param> /// <param name="parameterValues">Flat array of values that will be written in the order that parameters appear when segments are enumerated lefttoright. Any optional parameters must still appear as null if missing.</param>
/// <returns>The completed <see cref="SourceLink"/>.</returns> /// <returns>The completed <see cref="SourceLink"/>.</returns>
/// <exception cref="ArgumentOutOfRangeException">If the supplied value count does not match <see cref="GetParameterCount"/>().</exception> /// <exception cref="ArgumentOutOfRangeException">If the supplied value count does not match <see cref="GetParameterCount"/>().</exception>
public SourceLink Build(params object?[] parameterValues) { public string Build(params object?[] parameterValues) {
ArgumentOutOfRangeException.ThrowIfNotEqual(parameterValues.Length, GetParameterCount()); ArgumentOutOfRangeException.ThrowIfNotEqual(parameterValues.Length, GetParameterCount());
StringBuilder link = new(); StringBuilder link = new();
@@ -357,10 +359,10 @@ namespace Beam {
if (segment.Parameters[i].Position.HasFlag(Position.Optional)) if (segment.Parameters[i].Position.HasFlag(Position.Optional))
continue; continue;
else else
throw new ArgumentException(S.M.RequiredArgumentMissing); throw new ArgumentException(string.Format(link_builder_argument_missing, pvC, segment.Parameters[i].Name));
if (segment.Parameters[i].Position.HasFlag(Position.Query) && Segments[^1] != segment) if (segment.Parameters[i].Position.HasFlag(Position.Query) && Segments[^1] != segment)
throw new ArgumentException(S.M.QueryParametersOnlyAtLastSegment); throw new ArgumentException(string.Format(link_builder_query_only_at_last, i));
if (segment.Parameters[i].Position.HasFlag(Position.Query)) if (segment.Parameters[i].Position.HasFlag(Position.Query))
if (!startedQueryString) { if (!startedQueryString) {
@@ -378,10 +380,10 @@ namespace Beam {
if (parameterValues[pvC] is not null) if (parameterValues[pvC] is not null)
link.Append(parameterValues[pvC++]); link.Append(parameterValues[pvC++]);
else if (!segment.Parameters[i].Position.HasFlag(Position.Optional)) else if (!segment.Parameters[i].Position.HasFlag(Position.Optional))
throw new ArgumentException(S.M.RequiredArgumentMissing); throw new ArgumentException(string.Format(link_builder_argument_missing, pvC, segment.Parameters[i].Name));
if (segment.Parameters[i].Position.HasFlag(Position.Query | Position.After)) if (segment.Parameters[i].Position.HasFlag(Position.Query | Position.After))
throw new ArgumentException(S.M.QueryFlagIncompatibleWithAfterFlag); throw new ArgumentException(string.Format(link_builder_incompatible_flag, nameof(Position.Query), nameof(Position.After)));
if (segment.Parameters[i].Position.HasFlag(Position.After)) if (segment.Parameters[i].Position.HasFlag(Position.After))
link.Append(segment.Parameters[i].Name); link.Append(segment.Parameters[i].Name);
@@ -393,7 +395,7 @@ namespace Beam {
link.Append(segment.Suffix); link.Append(segment.Suffix);
} }
return new SourceLink(link.ToString()); return link.ToString();
} }
#endregion #endregion
} }
@@ -1,16 +1,10 @@
 using System.Text.Json.Serialization;
using aeqw89.DataKeys; using aeqw89.DataKeys;
using System; using Beam.Abstractions;
using System.Collections.Generic; using Beam.Models;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
namespace Beam.Models { namespace Beam.Data {
public class ResourceDictionary : IKeyed<ResourceDictionary> { public class ResourceDictionary : IKeyed<ResourceDictionary>, IResourceDictionary {
public required DataKey<ResourceDictionary> Key { get; set; } public required DataKey<ResourceDictionary> Key { get; set; }
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+19
View File
@@ -0,0 +1,19 @@
using aeqw89.DataKeys;
using Beam.Abstractions;
using Beam.Dynamic;
namespace Beam.Data {
public record class Template : ITemplate {
public required DataKey<ITemplate> Key { get; set; }
public required StateChangerFactory Factory { get; set; }
IStateChangerFactory ITemplate.Factory {
get => Factory;
set => Factory = (StateChangerFactory)value;
}
public required LinkBuilder Builder { get; set; }
ILinkBuilder ITemplate.Builder {
get => Builder;
set => Builder = (LinkBuilder)value;
}
}
}
@@ -1,13 +1,7 @@
using aeqw89.PersistentData; using aeqw89.DataKeys;
using aeqw89.DataKeys;
using Beam.Dynamic; using Beam.Dynamic;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Models { namespace Beam.Data {
/// <summary> /// <summary>
/// Represents a specific resource accessible online; e.g. a book's contents. /// Represents a specific resource accessible online; e.g. a book's contents.
/// </summary> /// </summary>
+21
View File
@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Beam.Abstractions\Beam.Abstractions.csproj" />
<ProjectReference Include="..\Beam.Exceptions\Beam.Exceptions.csproj" />
<ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9" />
<PackageReference Include="System.IO.Hashing" Version="10.0.0" />
<PackageReference Include="System.Linq.Async" Version="6.0.3" />
</ItemGroup>
</Project>
@@ -1,42 +1,34 @@
using HtmlAgilityPack; using Beam.Abstractions;
using System; using Beam.Models;
using System.Collections.Concurrent; using HtmlAgilityPack;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
namespace Beam { namespace Beam.Downloaders {
//public delegate T HtmlTransformer<out T>(HtmlDocument doc); //public delegate T HtmlTransformer<out T>(HtmlDocument doc);
public delegate Task<U> AsyncTransformer<in T, U>(T elem);
//public delegate Task<T> AsyncHtmlTransformer<T>(HtmlDocument doc); //public delegate Task<T> AsyncHtmlTransformer<T>(HtmlDocument doc);
//public delegate Task<T> AsyncBinaryTransformer<T>(byte[] bin); //public delegate Task<T> AsyncBinaryTransformer<T>(byte[] bin);
public class DownloadContext<RawType> : IDisposable { public class DownloadContext {
private bool disposedValue; private bool disposedValue;
public DownloadContextBuilder<RawType> CreateBuilder()
=> DownloadContextBuilder<RawType>.FromContext(this);
public HttpClient Client { get; } public HttpClient Client { get; }
public HtmlWeb Web { get; } public HtmlWeb Web { get; }
public IProgress<DownloadReport>? DownloadReporter { get; set; } public IProgress<IDownloadReport>? DownloadReporter { get; set; }
public IProgress<RetryReport>? RetryReporter { get; set; } public IProgress<IRetryReport>? RetryReporter { get; set; }
public AsyncDownloadFailurePredicate<RawType>?[]? AsyncFailurePredicates { get; } public AsyncDownloadFailurePredicate<ByteDocument>?[]? AsyncFailurePredicates { get; }
public TimeSpan TimeOut { get; set; } public TimeSpan TimeOut { get; set; }
public IEnumerable<SourceLink> Links { get; } public IEnumerable<string> Links { get; }
public CancellationToken CancellationToken { get; } public CancellationToken CancellationToken { get; }
public DocumentCache Cache { get; private set; } = []; public DocumentCache Cache { get; private set; } = [];
public ILogger? DownloadLogger { get; set; } public ILogger? DownloadLogger { get; set; }
public DownloadContext(HtmlWeb web, public DownloadContext(HtmlWeb web,
HttpClient client, HttpClient client,
IEnumerable<SourceLink> links, IEnumerable<string> links,
CancellationToken cancellationToken = default, CancellationToken cancellationToken = default,
IProgress<DownloadReport>? downloadReporter = null, IProgress<IDownloadReport>? downloadReporter = null,
IProgress<RetryReport>? retryReporter = null, IProgress<IRetryReport>? retryReporter = null,
AsyncDownloadFailurePredicate<RawType>?[]? asyncFailurePredicates = null, AsyncDownloadFailurePredicate<ByteDocument>?[]? asyncFailurePredicates = null,
TimeSpan? timeOut = null, TimeSpan? timeOut = null,
ILogger? downloadLogger = null) { ILogger? downloadLogger = null) {
ArgumentNullException.ThrowIfNull(web, nameof(web)); ArgumentNullException.ThrowIfNull(web, nameof(web));
@@ -1,21 +1,18 @@
using HtmlAgilityPack; using Beam.Abstractions;
using Beam.Models;
using HtmlAgilityPack;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam { namespace Beam.Downloaders {
public class DownloadContextBuilder<RawType> { public class DownloadContextBuilder {
private HtmlWeb _web; private HtmlWeb _web;
private HttpClient _client; private HttpClient _client;
private IProgress<DownloadReport>? _downloadReporter; private IProgress<IDownloadReport>? _downloadReporter;
private IProgress<RetryReport>? _retryReporter; private IProgress<IRetryReport>? _retryReporter;
private AsyncDownloadFailurePredicate<RawType>?[] _asyncFailurePredicates = []; private AsyncDownloadFailurePredicate<ByteDocument>?[] _asyncFailurePredicates = [];
private TimeSpan _timeOut; private TimeSpan _timeOut;
private IEnumerable<SourceLink> _links; private IEnumerable<string> _links;
private CancellationToken _cancellationToken; private CancellationToken _cancellationToken;
private DocumentCache _cache; private DocumentCache _cache;
private ILogger? _downloadLogger; private ILogger? _downloadLogger;
@@ -29,60 +26,60 @@ namespace Beam {
_links = []; _links = [];
} }
public DownloadContextBuilder<RawType> WithWeb(HtmlWeb web) { public DownloadContextBuilder WithWeb(HtmlWeb web) {
_web = web; _web = web;
return this; return this;
} }
public DownloadContextBuilder<RawType> WithClient(HttpClient client) { public DownloadContextBuilder WithClient(HttpClient client) {
_client = client; _client = client;
return this; return this;
} }
public DownloadContextBuilder<RawType> WithDownloadReporter(IProgress<DownloadReport> downloadReporter) { public DownloadContextBuilder WithDownloadReporter(IProgress<IDownloadReport> downloadReporter) {
_downloadReporter = downloadReporter; _downloadReporter = downloadReporter;
return this; return this;
} }
public DownloadContextBuilder<RawType> WithRetryReporter(IProgress<RetryReport> retryReporter) { public DownloadContextBuilder WithRetryReporter(IProgress<IRetryReport> retryReporter) {
_retryReporter = retryReporter; _retryReporter = retryReporter;
return this; return this;
} }
public DownloadContextBuilder<RawType> WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate<RawType>[] predicates) { public DownloadContextBuilder WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate<ByteDocument>[] predicates) {
_asyncFailurePredicates = predicates; _asyncFailurePredicates = predicates;
return this; return this;
} }
public DownloadContextBuilder<RawType> WithTimeOut(TimeSpan timeOut) { public DownloadContextBuilder WithTimeOut(TimeSpan timeOut) {
_timeOut = timeOut; _timeOut = timeOut;
return this; return this;
} }
public DownloadContextBuilder<RawType> WithLinks(IEnumerable<SourceLink> links) { public DownloadContextBuilder WithLinks(IEnumerable<string> links) {
_links = links; _links = links;
return this; return this;
} }
public DownloadContextBuilder<RawType> WithCancellationToken(CancellationToken cancellationToken) { public DownloadContextBuilder WithCancellationToken(CancellationToken cancellationToken) {
_cancellationToken = cancellationToken; _cancellationToken = cancellationToken;
return this; return this;
} }
public DownloadContextBuilder<RawType> WithCache(DocumentCache cache) { public DownloadContextBuilder WithCache(DocumentCache cache) {
_cache = cache; _cache = cache;
return this; return this;
} }
public DownloadContextBuilder<RawType> WithDownloadLogger(ILogger downloadLogger) { public DownloadContextBuilder WithDownloadLogger(ILogger downloadLogger) {
_downloadLogger = downloadLogger; _downloadLogger = downloadLogger;
return this; return this;
} }
public DownloadContext<RawType> Build() { public DownloadContext Build() {
// Construct the DownloadContext<T> using the collected values. // Construct the DownloadContext<T> using the collected values.
var context = new DownloadContext<RawType>( var context = new DownloadContext(
web: _web, web: _web,
client: _client, client: _client,
links: _links, links: _links,
@@ -103,15 +100,15 @@ namespace Beam {
return context; return context;
} }
public static DownloadContextBuilder<RawType> FromContext(DownloadContext<RawType> existing) { public static DownloadContextBuilder FromContext(DownloadContext existing) {
if (existing == null) throw new ArgumentNullException(nameof(existing)); if (existing == null) throw new ArgumentNullException(nameof(existing));
return new DownloadContextBuilder<RawType>(existing.Client, existing.Web) return new DownloadContextBuilder(existing.Client, existing.Web)
.WithLinks(existing.Links) .WithLinks(existing.Links)
.WithCancellationToken(existing.CancellationToken) .WithCancellationToken(existing.CancellationToken)
.WithDownloadReporter(existing.DownloadReporter!) .WithDownloadReporter(existing.DownloadReporter!)
.WithRetryReporter(existing.RetryReporter!) .WithRetryReporter(existing.RetryReporter!)
.WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty<AsyncDownloadFailurePredicate<RawType>>()) .WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty<AsyncDownloadFailurePredicate<ByteDocument>>())
.WithTimeOut(existing.TimeOut) .WithTimeOut(existing.TimeOut)
.WithDownloadLogger(existing.DownloadLogger!) .WithDownloadLogger(existing.DownloadLogger!)
.WithCache(existing.Cache); .WithCache(existing.Cache);
@@ -0,0 +1,9 @@
using Beam.Models;
namespace Beam.Downloaders;
public record class FailurePredicateOptions<RawType> {
public required AsyncDownloadFailurePredicate<RawType>?[]? AsyncDownloadFailurePredicates { get; init; }
public bool ProcessInParallel { get; init; } = false;
public int? ParallelThreads { get; init; }
}
@@ -0,0 +1,56 @@
using Beam.Models;
namespace Beam.Downloaders;
public sealed class FailurePredicateOptionsBuilder<TRaw>
{
private readonly System.Collections.Generic.List<AsyncDownloadFailurePredicate<TRaw>?> _predicates =
new System.Collections.Generic.List<AsyncDownloadFailurePredicate<TRaw>?>();
private bool _processInParallel = false;
private int? _parallelThreads = null;
public FailurePredicateOptionsBuilder<TRaw> WithPredicate(AsyncDownloadFailurePredicate<TRaw>? predicate)
{
_predicates.Add(predicate);
return this;
}
public FailurePredicateOptionsBuilder<TRaw> WithPredicates(System.Collections.Generic.IEnumerable<AsyncDownloadFailurePredicate<TRaw>?> predicates)
{
if (predicates == null) throw new System.ArgumentNullException(nameof(predicates));
_predicates.AddRange(predicates);
return this;
}
public FailurePredicateOptionsBuilder<TRaw> WithPredicates(params AsyncDownloadFailurePredicate<TRaw>?[] predicates)
{
_predicates.Clear();
if (predicates != null) _predicates.AddRange(predicates);
return this;
}
public FailurePredicateOptionsBuilder<TRaw> WithProcessInParallel(bool value = true)
{
_processInParallel = value;
return this;
}
public FailurePredicateOptionsBuilder<TRaw> WithParallelThreads(int? threads)
{
if (threads.HasValue && threads.Value <= 0)
throw new System.ArgumentOutOfRangeException(nameof(threads));
_parallelThreads = threads;
return this;
}
public FailurePredicateOptions<TRaw> Build()
{
var arr = _predicates.Count == 0 ? [] : _predicates.ToArray();
return new FailurePredicateOptions<TRaw>
{
AsyncDownloadFailurePredicates = arr,
ProcessInParallel = _processInParallel,
ParallelThreads = _parallelThreads
};
}
}
+7
View File
@@ -0,0 +1,7 @@
namespace Beam.Downloaders;
public record class FragmentOptions {
public required int FragmentSize { get; init; }
public bool DownloadInParallel { get; init; } = false;
public int? ParallelThreads { get; init; }
}
@@ -0,0 +1,36 @@
namespace Beam.Downloaders;
public sealed class FragmentOptionsBuilder {
private int? _fragmentSize;
private bool _downloadInParallel = false;
private int? _parallelThreads = null;
public FragmentOptionsBuilder WithFragmentSize(int bytes) {
if (bytes <= 0) throw new System.ArgumentOutOfRangeException(nameof(bytes));
_fragmentSize = bytes;
return this;
}
public FragmentOptionsBuilder WithDownloadInParallel(bool value = true) {
_downloadInParallel = value;
return this;
}
public FragmentOptionsBuilder WithParallelThreads(int? threads) {
if (threads.HasValue && threads.Value <= 0)
throw new System.ArgumentOutOfRangeException(nameof(threads));
_parallelThreads = threads;
return this;
}
public FragmentOptions Build() {
if (!_fragmentSize.HasValue)
throw new System.InvalidOperationException("FragmentSize must be provided.");
return new FragmentOptions {
FragmentSize = _fragmentSize.Value,
DownloadInParallel = _downloadInParallel,
ParallelThreads = _parallelThreads
};
}
}
@@ -1,18 +1,19 @@
using HtmlAgilityPack; using Beam.Abstractions;
using Beam.Models;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
namespace Beam { namespace Beam.Downloaders {
public class SequentialDownloader<RawType, OutType> : IAsyncEnumerator<OutType> { public class SequentialDownloader<OutType> : IAsyncEnumerator<OutType> {
public OutType Current { get; protected set; } public OutType Current { get; protected set; }
public DownloadContext<RawType> Context { get; } public DownloadContext Context { get; }
public ILogger? Logger { get; set; } public ILogger? Logger { get; set; }
public int LastOrder { get; set; } = 0; public int LastOrder { get; set; } = 0;
protected IEnumerator<SourceLink> LinksEnumerator; protected IEnumerator<string> LinksEnumerator;
public Func<IUnitDownloader<OutType>> GetUnitDownloader { get; set; } public Func<IUnitDownloader<OutType>> GetUnitDownloader { get; set; }
public SequentialDownloader(DownloadContext<RawType> context, Func<DownloadContext<RawType>, IUnitDownloader<OutType>> getUnitDownloader, ILogger? logger = null) { public SequentialDownloader(DownloadContext context, Func<DownloadContext, IUnitDownloader<OutType>> getUnitDownloader, ILogger? logger = null) {
Context = context; Context = context;
Logger = logger; Logger = logger;
LinksEnumerator = Context.Links.GetEnumerator(); LinksEnumerator = Context.Links.GetEnumerator();
@@ -41,22 +42,29 @@ namespace Beam {
List<Ordered<string>> links = []; List<Ordered<string>> links = [];
//Logger?.LogInformation("MoveNextAsync() \n\t -> Links.Current = {} ", LinksEnumerator.Current.Link.AbsoluteUri); //Logger?.LogInformation("MoveNextAsync() \n\t -> Links.Current = {} ", LinksEnumerator.Current.Link.AbsoluteUri);
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.AbsoluteUri, LastOrder++)); links.Add(new Ordered<string>(LinksEnumerator.Current, LastOrder++));
while (links.Count < idealLinkCount && LinksEnumerator.MoveNext()) {
if (string.IsNullOrWhiteSpace(LinksEnumerator.Current)) {
return false;
}
links.Add(new Ordered<string>(LinksEnumerator.Current, LastOrder++));
}
while (LinksEnumerator.MoveNext() && LinksEnumerator.Current != SourceLink.InvalidLink && links.Count < idealLinkCount)
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.AbsoluteUri, LastOrder++));
//Logger?.LogInformation("MoveNextAsync() \n\t -> links.Count = {} ", links.Count); //Logger?.LogInformation("MoveNextAsync() \n\t -> links.Count = {} ", links.Count);
if (links.Count == 0) { if (links.Count == 0) {
Logger?.LogInformation("Out of links!"); Logger?.LogInformation("Out of links!");
return false; return false;
} }
if (links.Any((x) => x.Data == SourceLink.InvalidLink.Link.AbsoluteUri)) if (links.Any((x) => string.IsNullOrWhiteSpace(x.Data)))
return false; return false;
var (result, downloadedT) = await unit.TryDownload( var (result, downloadedT) = await unit.TryDownload(
links.ToArray(), links.ToArray(),
Context.CancellationToken, Context.CancellationToken,
downProgress: Context.DownloadReporter,
tryProgress: Context.RetryReporter); tryProgress: Context.RetryReporter);
if (!result) { if (!result) {
@@ -1,11 +1,12 @@
using Microsoft.Extensions.Logging; using Beam.Abstractions;
using System.Collections.Concurrent; using Beam.Models;
using Microsoft.Extensions.Logging;
namespace Beam { namespace Beam.Downloaders {
public class SequentialFragmentDownloader<RawType, OutType> : SequentialDownloader<RawType, Fragment<Ordered<OutType>>> { public class SequentialFragmentDownloader<OutType> : SequentialDownloader<Fragment<Ordered<OutType>>> {
public SequentialFragmentDownloader( public SequentialFragmentDownloader(
DownloadContext<RawType> context, DownloadContext context,
Func<DownloadContext<RawType>, IUnitDownloader<Fragment<Ordered<OutType>>>> getUnitDownloader, Func<DownloadContext, IUnitDownloader<Fragment<Ordered<OutType>>>> getUnitDownloader,
ILogger? logger = null) ILogger? logger = null)
: base(context, getUnitDownloader, logger) {} : base(context, getUnitDownloader, logger) {}
} }
+48
View File
@@ -0,0 +1,48 @@
using Beam.Models;
namespace Beam.Downloaders;
public class SkipPredicateOptions<OutType> {
public required SkipPredicate<OutType>?[]? SkipPredicates { get; init; }
public bool ProcessInParallel { get; init; } = false;
public int? ParallelThreads { get; init; }
}
public class SkipPredicateOptionsBuilder<OutType> {
private List<SkipPredicate<OutType>?> _skipPredicates { get; set; } = [];
private bool _processInParallel { get; set; } = false;
private int? _parallelThreads { get; set; }
public SkipPredicateOptionsBuilder<OutType> WithSkipPredicate(SkipPredicate<OutType> predicate, bool replace=false) {
if (replace)
_skipPredicates.Clear();
_skipPredicates.Add(predicate);
return this;
}
public SkipPredicateOptionsBuilder<OutType> WithSkipPredicates(SkipPredicate<OutType>[] predicates,
bool replace = true) {
if (replace)
_skipPredicates.Clear();
_skipPredicates.AddRange(predicates);
return this;
}
public SkipPredicateOptionsBuilder<OutType> ProcessInParallel(bool processInParallel = true) {
_processInParallel = processInParallel;
return this;
}
public SkipPredicateOptionsBuilder<OutType> WithParallelThreads(int parallelThreads) {
_parallelThreads = parallelThreads;
return this;
}
public SkipPredicateOptions<OutType> Build() {
return new SkipPredicateOptions<OutType>() {
SkipPredicates = _skipPredicates.ToArray(),
ParallelThreads = _parallelThreads,
ProcessInParallel = _processInParallel
};
}
}
+200
View File
@@ -0,0 +1,200 @@
using System.Diagnostics.CodeAnalysis;
using System.Text;
using Beam.Abstractions;
using Beam.Models;
using HtmlAgilityPack;
using File = System.IO.File;
namespace Beam.Downloaders {
/// <summary>
/// A download managing class that manages a singular download with failure-detection and exponential-backoff retries. This class is safe to instantiate per request.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="web"></param>
/// <param name="transformer"></param>
/// <param name="failurePredicate"></param>
public class UnitDownloader<OutType>(UnitDownloaderOptions<OutType> options) : IUnitDownloader<OutType> {
public UnitDownloaderOptions<OutType> Options { get; } = options;
public HttpClient Client => Options.Client;
public virtual AsyncTransformer<ByteDocument, OutType> Transformer => Options.AsyncTransformer;
public virtual AsyncDownloadFailurePredicate<ByteDocument>?[]? FailurePredicates =>
Options?.FailurePredicateOptions?.AsyncDownloadFailurePredicates;
public int LinksPerDownload { get; } = 1;
protected virtual async Task DownloadToStream(string url, int bufferSize, Stream destinationStream, IProgress<IDownloadReport> progress,
CancellationToken ct) {
if (options.FollowRedirects) {
var response = await Client.GetAsync(url, ct); // automatically follows redirects
await response.Content.CopyToAsync(destinationStream, ct);
progress?.Report(new DownloadReport() {
BytesDownloaded = destinationStream.Length,
BytesRemaining = 0
});
return;
}
var stream = await Client.GetStreamAsync(url, ct); // does not follow redirects
byte[] buffer = new byte[bufferSize];
int inBuffer = 0;
long downloaded = 0;
long? remaining() {
try {
return stream.Length - downloaded;
}
catch {
return null;
}
}
while ((inBuffer = stream.Read(buffer)) > 0) {
downloaded += inBuffer;
await destinationStream.WriteAsync(buffer.AsMemory(0, inBuffer), ct);
progress?.Report(new DownloadReport() {
BytesDownloaded = inBuffer,
BytesRemaining = remaining()
});
ct.ThrowIfCancellationRequested();
}
}
protected virtual async Task DownloadToFile(string url, int bufferSize, string path,
IProgress<IDownloadReport> progress, CancellationToken ct) {
if (!Directory.Exists(Path.GetDirectoryName(path)))
throw new InvalidOperationException(
string.Format(Exceptions.Exceptions.unit_download_directory_nonexistant, path));
await using var file = File.OpenWrite(path);
await DownloadToStream(url, bufferSize, file, progress, ct);
}
protected virtual async Task<ByteDocument> DownloadToMemory(string url, int bufferSize,
IProgress<IDownloadReport> progress, CancellationToken ct) {
await using var ms = new MemoryStream();
await DownloadToStream(url, bufferSize, ms, progress, ct);
if (!ms.TryGetBuffer(out var bytes))
throw new Exception(Exceptions.Exceptions.unit_download_invalid_memory_stream);
return new ByteDocument(url, bytes);
}
protected virtual async Task<bool> IsFailure(ByteDocument doc, CancellationToken ct) {
if (FailurePredicates is null)
return false;
if (!(Options?.FailurePredicateOptions?.ProcessInParallel ?? false))
foreach (var pred in FailurePredicates) {
if (pred is null)
continue;
if (await pred(doc))
return true;
}
else {
var failed = false;
await Parallel.ForEachAsync(FailurePredicates, new ParallelOptions() {
MaxDegreeOfParallelism = Options?.FailurePredicateOptions?.ParallelThreads ?? 4,
CancellationToken = ct
},
async (predicate, token) => {
if (token.IsCancellationRequested)
return;
if (failed)
return;
if (predicate == null)
return;
if (await predicate(doc))
Interlocked.CompareExchange(ref failed, true, false);
}
);
return failed;
}
return false;
}
protected virtual async Task<ByteDocument> _Download(string link, IProgress<IDownloadReport> progress, CancellationToken ct) {
if (Options.DownloadFolder is not null) {
var path = Path.Combine(Options.DownloadFolder, options.GetFileNameForDownload(link, []));
await DownloadToFile(link, Options.BufferSize, path, progress, ct);
return new ByteDocument(link, Encoding.UTF8.GetBytes(path));
}
else {
return await DownloadToMemory(link, Options.BufferSize, progress, ct);
}
}
protected virtual async Task<(bool, OutType?)> Transform(ByteDocument download, CancellationToken ct) {
try {
if (FailurePredicates is null || !(await IsFailure(download, ct)))
return (true, await Transformer(download));
else
return (false, default);
} catch(Exception) {
return (false, default);
}
}
public async Task<(bool, OutType?)> TryDownload(IOrdered<string>[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress<IDownloadReport>? downProgress = null, IProgress<IRetryReport>? tryProgress = null) {
if (link.Length == 0)
return (false, default);
downProgress ??= new Progress<IDownloadReport>();
if (ShouldSkip(link[0].Data, out var defaultType))
return (true, defaultType);
OutType? ot = default;
int tryCount = 0;
while (tryCount < maximumRetryCount) {
ct.ThrowIfCancellationRequested();
var rt = await _Download(link[0].Data, downProgress, ct);
(var success, ot) = await Transform(rt, ct);
if (success && ot != null)
return (true, ot);
++tryCount;
tryProgress?.Report(new RetryReport(tryCount, link[0].Data));
await Task.Delay((int)Math.Pow(2, tryCount) * 1000);
}
return (false, ot);
}
private bool ShouldSkip(string link, [NotNullWhen(true)] out OutType? outType) {
outType = default;
if (Options.SkipPredicateOptions?.SkipPredicates is null)
return false;
if (!Options.SkipPredicateOptions.ProcessInParallel)
foreach (var pred in Options.SkipPredicateOptions.SkipPredicates) {
if (pred is null)
continue;
if (pred(link, out outType))
return true;
}
else {
var shouldSkip = false;
OutType? _outType = default;
Parallel.ForEach(Options.SkipPredicateOptions.SkipPredicates, new ParallelOptions() {
MaxDegreeOfParallelism = Options?.FailurePredicateOptions?.ParallelThreads ?? 4
},
(predicate, parallelLoopState) => {
if (parallelLoopState.ShouldExitCurrentIteration)
return;
if (predicate == null)
return;
if (predicate(link, out var _innerLoopOutType)) {
Interlocked.CompareExchange(ref shouldSkip, true, false);
Interlocked.CompareExchange(ref _outType, _innerLoopOutType, default);
parallelLoopState.Break();
}
}
);
outType = _outType;
return shouldSkip;
}
return false;
}
}
}
+40
View File
@@ -0,0 +1,40 @@
using System.Diagnostics.CodeAnalysis;
using System.Security.Cryptography;
using System.Text;
using Beam.Models;
namespace Beam.Downloaders;
public record class UnitDownloaderOptions<OutType> {
public HttpClient Client { get; init; } = new();
public DownloadTarget Target { get; init; } = DownloadTarget.URL;
public SkipPredicateOptions<OutType>? SkipPredicateOptions { get; init; }
public FailurePredicateOptions<ByteDocument>? FailurePredicateOptions { get; init; }
public FragmentOptions? FragmentOptions { get; init; }
public required AsyncTransformer<ByteDocument, OutType> AsyncTransformer { get; init; }
/// <summary>
/// The location where the download is stored.
/// </summary>
/// <remarks>
/// If not defined, <c>UnitDownloader.TryDownload()</c> downloads to memory.
/// </remarks>
public string? DownloadFolder { get; init; } = null;
public int BufferSize { get; init; } = 80 * 1024; // 80kb
public bool FollowRedirects { get; init; } = true;
public string GetFileNameForDownload(string url, byte[] additionalData) {
byte[] bytes = [..Encoding.UTF8.GetBytes(url), ..additionalData];
var name = Convert.ToBase64String(System.IO.Hashing.XxHash64.Hash(bytes));
return name.Replace('+', '-').Replace('/', '_').Replace('=', ' ').Trim();
}
}
// ---------- UnitDownloaderOptions Builder ----------
// ---------- FailurePredicateOptions Builder ----------
// ---------- FragmentOptions Builder ----------
@@ -0,0 +1,114 @@
using Beam.Models;
namespace Beam.Downloaders;
public sealed class UnitDownloaderOptionsBuilder<OutType> {
private DownloadTarget _target = DownloadTarget.URL;
private HttpClient _client = new HttpClient();
private FailurePredicateOptionsBuilder<ByteDocument> _failureOptionsBuilder = new();
private FailurePredicateOptions<ByteDocument>? _failurePredicateOptionsOverride = null;
private SkipPredicateOptionsBuilder<OutType> _skipPredicateOptionsBuilder = new();
private SkipPredicateOptions<OutType>? _skipPredicateOptionsOverride = null;
private FragmentOptions? _fragmentOptions;
private AsyncTransformer<ByteDocument, OutType>? _asyncTransformer;
private string? _downloadFolder = null;
private int _bufferSize = 80 * 1024;
private bool _followRedirects = true;
public UnitDownloaderOptionsBuilder<OutType> WithTarget(DownloadTarget target) {
_target = target;
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithFollowRedirects(bool followRedirects) {
_followRedirects = followRedirects;
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithClient(HttpClient client)
{
_client = client ?? throw new System.ArgumentNullException(nameof(client));
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithFailurePredicateOptions(FailurePredicateOptions<ByteDocument>? options)
{
_failurePredicateOptionsOverride = options;
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithFailurePredicates(System.Action<FailurePredicateOptionsBuilder<ByteDocument>> configure)
{
if (configure == null) throw new System.ArgumentNullException(nameof(configure));
configure(_failureOptionsBuilder);
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithFragmentOptions(FragmentOptions? options)
{
_fragmentOptions = options;
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithSkipPredicates(Action<SkipPredicateOptionsBuilder<OutType>> configure) {
if (configure == null) throw new ArgumentNullException(nameof(configure));
configure(_skipPredicateOptionsBuilder);
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithSkipPredicateOptions(
SkipPredicateOptions<OutType> skipPredicateOptions) {
_skipPredicateOptionsOverride = skipPredicateOptions;
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithFragments(System.Action<FragmentOptionsBuilder> configure)
{
if (configure == null) throw new System.ArgumentNullException(nameof(configure));
var b = new FragmentOptionsBuilder();
configure(b);
_fragmentOptions = b.Build();
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithAsyncTransformer(AsyncTransformer<ByteDocument, OutType> transformer)
{
_asyncTransformer = transformer ?? throw new System.ArgumentNullException(nameof(transformer));
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithDownloadFolder(string? downloadFolder)
{
_downloadFolder = downloadFolder;
return this;
}
public UnitDownloaderOptionsBuilder<OutType> WithBufferSize(int bytes)
{
if (bytes <= 0) throw new System.ArgumentOutOfRangeException(nameof(bytes));
_bufferSize = bytes;
return this;
}
public UnitDownloaderOptions<OutType> Build()
{
if (_asyncTransformer == null)
throw new System.InvalidOperationException("AsyncTransformer must be provided.");
_failurePredicateOptionsOverride ??= _failureOptionsBuilder.Build();
_skipPredicateOptionsOverride ??= _skipPredicateOptionsBuilder.Build();
return new UnitDownloaderOptions<OutType>
{
Target = _target,
Client = _client,
FailurePredicateOptions = _failurePredicateOptionsOverride,
SkipPredicateOptions = _skipPredicateOptionsOverride,
FollowRedirects = _followRedirects,
FragmentOptions = _fragmentOptions,
AsyncTransformer = _asyncTransformer,
DownloadFolder = _downloadFolder,
BufferSize = _bufferSize
};
}
}
@@ -0,0 +1,46 @@
using Beam.Abstractions;
using Beam.Exceptions;
using Beam.Models;
using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
namespace Beam.Downloaders {
public class UnitFragmentDownloader<OutType>(UnitDownloaderOptions<OutType> options,
IUnitDownloader<OutType>? internalDownloader = null) : IUnitDownloader<Fragment<Ordered<OutType>>> {
public UnitDownloaderOptions<OutType> Options { get; } = options;
public int LinksPerDownload { get; set; }
private IUnitDownloader<OutType> UnitDownloader { get; } = internalDownloader ?? new UnitDownloader<OutType>(options);
async Task<(bool, Fragment<Ordered<OutType>>?)> IUnitDownloader<Fragment<Ordered<OutType>>>.TryDownload(IOrdered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<IDownloadReport>? downProgress, IProgress<IRetryReport>? tryProgress) {
Fragment<Ordered<OutType>> fragment = new Fragment<Ordered<OutType>>(link.Length);
if (!Fragment<Ordered<OutType>>.TryAcquireUpdater(fragment, out var updater))
throw new AssertionException(Exceptions.Exceptions.fragment_locked);
bool isFailure = false;
await Parallel.ForEachAsync(link, async (x, pct) => {
pct.ThrowIfCancellationRequested();
ct.ThrowIfCancellationRequested();
if (isFailure)
return;
var (result, downloadedT) = await UnitDownloader.TryDownload([x], ct, maximumRetryCount, downProgress, tryProgress);
if (!result) {
Interlocked.Exchange(ref isFailure, true);
return;
}
if (downloadedT == null) {
Interlocked.Exchange(ref isFailure, true);
return;
}
updater(new Ordered<OutType>(downloadedT, x.Order));
});
if (!isFailure)
Fragment<Ordered<OutType>>.SetComplete(fragment, true);
Fragment<Ordered<OutType>>.TryReleaseUpdater(fragment, updater);
return (!isFailure, fragment);
}
}
}
@@ -1,53 +0,0 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Dynamic {
public class AnchorCollectionDataProvider : IDataProvider<string[]>, IDataProvider<SourceLink[]> {
public IBinding? Content { get; set; }
public string? RelativeTo { get; set; }
private string GetAbsolute(string? @base, string relative) {
if (@base is null)
return relative;
if (@base.EndsWith('/'))
@base = @base[..^1];
if (relative.StartsWith('/'))
relative = relative[1..];
return @base + '/' + relative;
}
public string[] Get(HtmlDocument document) {
if (Content is null)
return [];
var node = Content.Select(document);
if (node is null)
return [];
List<string> links = [];
foreach (var child in node.Descendants())
links.Add(child.GetAttributeValue("href", ""));
return links.Where(x => !string.IsNullOrWhiteSpace(x)).ToArray();
}
SourceLink[] IDataProvider<SourceLink[]>.Get(HtmlDocument document) {
var links = Get(document);
if (links.Length == 0)
return [];
List<SourceLink> slinks = [];
foreach (var link in links)
if (Uri.TryCreate(GetAbsolute(RelativeTo, link), UriKind.RelativeOrAbsolute, out _))
slinks.Add(new SourceLink(GetAbsolute(RelativeTo, link)));
return slinks.ToArray();
}
}
}
-31
View File
@@ -1,31 +0,0 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Dynamic {
public class AnchorDataProvider : IDataProvider<SourceLink>, IDataProvider<string> {
public IBinding? Content { get; set; }
public string Get(HtmlDocument document) {
if (Content is null)
return "";
return Content.Select(document)?.GetAttributeValue("href", "") ?? "";
}
SourceLink IDataProvider<SourceLink>.Get(HtmlDocument document) {
var content = Get(document);
if (content is null)
return SourceLink.InvalidLink;
if (!Uri.TryCreate(content, UriKind.RelativeOrAbsolute, out _))
return SourceLink.InvalidLink;
return new SourceLink(content);
}
}
}
@@ -1,13 +1,7 @@
using aeqw89.DataKeys; using aeqw89.DataKeys;
using HtmlAgilityPack; using Beam.Abstractions;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli { namespace Beam.Dynamic {
/// <summary> /// <summary>
/// <para> /// <para>
/// A collection of specific useful methods and constants that facilitate the use of the application; allows other parts of the application to depend on architecture-specific arbitrary choices without compromising the Single-Responsibility principle or increasing redundant code. /// A collection of specific useful methods and constants that facilitate the use of the application; allows other parts of the application to depend on architecture-specific arbitrary choices without compromising the Single-Responsibility principle or increasing redundant code.
+5 -5
View File
@@ -6,14 +6,14 @@
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" /> <PackageReference Include="aeqw89.DataKeys" Version="2.1.1" />
<PackageReference Include="aeqw89.PersistentData" Version="1.1.0" /> <PackageReference Include="aeqw89.PersistentData" Version="1.4.5" />
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" /> <PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
<PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13" /> <PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\Beam\Beam.csproj"> <ProjectReference Include="..\Beam.Abstractions\Beam.Abstractions.csproj" />
<PrivateAssets>all</PrivateAssets> <ProjectReference Include="..\Beam.Exceptions\Beam.Exceptions.csproj" />
</ProjectReference> <ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
</ItemGroup> </ItemGroup>
</Project> </Project>
@@ -0,0 +1,2 @@
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=dataproviders/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
+12
View File
@@ -0,0 +1,12 @@
using System.Collections;
using System.Diagnostics.CodeAnalysis;
using aeqw89.DataKeys;
using aeqw89.PersistentData;
using Beam.Abstractions;
namespace Beam.Dynamic;
public class BindingsCollection(Table<IDataProvider>? providers = null) {
public Table<IDataProvider> Providers { get; } = providers ?? [];
}
@@ -3,33 +3,35 @@ using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Text; using System.Text;
using System.Threading.Tasks; using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam { namespace Beam.Dynamic {
public static class CommonStateChangers { public static class CommonStateChangers {
public static IStateChangeBehaviour LastAsNumber => new NumberedStateChanger((x, i) => { public static IStateChangeBehaviour LastAsNumber => new NumberedStateChanger((x, i) => {
object last = x[^1]; object last = x.GetState()[^1];
if (!int.TryParse(last.ToString(), out var number)) if (!int.TryParse(last.ToString(), out var number))
throw new InvalidOperationException(S.M.StateChangeError); throw new InvalidOperationException(Exceptions.Exceptions.state_change_error); // TODO use more specific exception
x[^1] = (number + i).ToString(); x.GetState()[^1] = (number + i).ToString();
}); });
public static IStateChangeBehaviour Constant => new ConstantStateChanger(); public static IStateChangeBehaviour Constant => new ConstantStateChanger();
public static IStateChangeBehaviour NthAsNumber(Index n, bool keepSuffix = true) public static IStateChangeBehaviour NthAsNumber(Index n, bool keepSuffix = true)
=> new NumberedStateChanger((x, i) => { => new NumberedStateChanger((x, i) => {
string? nth = x[n]?.ToString(); string? nth = x.GetState()[n]?.ToString();
string[] xState = x.GetState();
if (nth is null) if (nth is null)
throw new InvalidOperationException(S.M.StateChangeError); throw new InvalidOperationException(Exceptions.Exceptions.state_change_error); // TODO use more specific exception
if (!int.TryParse(nth, out var number)) if (!int.TryParse(nth, out var number))
if (keepSuffix) { if (keepSuffix) {
string[] split = nth.Split('.'); string[] split = nth.Split('.');
if (!int.TryParse(split[0], out number)) if (!int.TryParse(split[0], out number))
throw new InvalidOperationException(S.M.StateChangeError); throw new InvalidOperationException(Exceptions.Exceptions.state_change_error); // TODO use more specific exception
x[n] = (number + i) + split[1..].Aggregate((x, y) => $"{x}.{y}"); xState[n] = (number + i) + split[1..].Aggregate((x, y) => $"{x}.{y}");
return; return;
} else } else
throw new InvalidOperationException(S.M.StateChangeError); throw new InvalidOperationException(Exceptions.Exceptions.state_change_error); // TODO use more specific exception
x[n] = (number + i).ToString(); xState[n] = (number + i).ToString();
}); });
+45
View File
@@ -0,0 +1,45 @@
using aeqw89.DataKeys;
using Beam.Abstractions;
using Beam.Models;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public static class CommonTransformers {
public static AsyncTransformer<HtmlDocument, ArticleData> ArticleDataTransformer(DataBindings? binding) =>
(x) => {
return Task.FromResult(new ArticleData() {
Authors = binding?.Authors?.Get(x)?.Select(StringCleaner.Clean)?.ToArray() ?? [],
Name = StringCleaner.Clean(binding?.Title?.Get(x) ?? ""),
Categories = binding?.Tags?.Get(x)?.Select(StringCleaner.Clean)?.ToArray() ?? [],
Description = StringCleaner.Clean(binding?.Description?.Get(x) ?? "")
});
};
public static AsyncTransformer<HtmlDocument, TableOfContentsData>
TableOfContentsTransformer(DataBindings? binding) => (x) => {
return Task.FromResult(new TableOfContentsData() {
Authors = binding?.Authors?.Get(x)?.Select(StringCleaner.Clean)?.ToArray() ?? [],
Name = StringCleaner.Clean(binding?.Title?.Get(x) ?? ""),
Categories = binding?.Tags?.Get(x)?.Select(StringCleaner.Clean)?.ToArray() ?? [],
Description = StringCleaner.Clean(binding?.Description?.Get(x) ?? ""),
ContentLinks = binding?.TableOfContents?.Get(x) ?? [],
PagesLinks = binding?.PagesDropDown?.Get(x) ?? []
});
};
public static AsyncTransformer<HtmlDocument, StringDocument> DocumentTransformer(DataBindings? binding,
IDocumentMetaData? metaData = null) => (x) => {
var resolved = binding?.Resolve(x);
var articleData = new ArticleData() {
Name = StringCleaner.Clean(resolved?.Title),
};
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
meta.Add(IArchitecture.Default.ChapterKey, articleData);
if (metaData is not null)
meta.Add(IArchitecture.Default.BookKey, metaData);
return Task.FromResult(new StringDocument(Path.GetRandomFileName(), StringCleaner.Clean(resolved?.Content)) {
MetaData = meta
});
};
}
+9
View File
@@ -0,0 +1,9 @@
using Beam.Abstractions;
namespace Beam.Dynamic {
public class ConstantStateChanger : IStateChangeBehaviour {
public void Apply(IState state, object stimulus) {
return;
}
}
}
-15
View File
@@ -1,15 +0,0 @@
using HtmlAgilityPack;
namespace Beam.Dynamic {
public class ContentsArrayDataProvider : ContentsDataProvider, IDataProvider<string[]> {
public string[] ArrayDelimiters { get; set; } = [";"];
string[] IDataProvider<string[]>.Get(HtmlDocument document) {
if (Content is null)
return [];
return Content.Select(document)?.InnerText?.Split(ArrayDelimiters, StringSplitOptions.RemoveEmptyEntries) ?? [];
}
}
}
-20
View File
@@ -1,20 +0,0 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Dynamic {
public class ContentsDataProvider : IDataProvider<string> {
public IBinding? Content { get; set; }
public string Get(HtmlDocument document) {
if (Content is null)
return "";
return Content.Select(document)?.InnerText ?? "";
}
}
}
+183 -182
View File
@@ -1,191 +1,192 @@
using HtmlAgilityPack; using System.Text.Json.Serialization;
using System.Text.Json.Serialization; using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic { namespace Beam.Dynamic;
public record class DataBindings {
#region ---------------------- Common Bindings ----------------------
[JsonIgnore]
public IDataProvider<string>? Title {
get => Get<string>(nameof(Title));
set => Providers[nameof(Title)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Authors {
get => Get<string[]>(nameof(Authors));
set => Providers[nameof(Authors)] = value;
}
[JsonIgnore]
public IDataProvider<string>? Description {
get => Get<string>(nameof(Description));
set => Providers[nameof(Description)] = value;
}
[JsonIgnore]
public IDataProvider<string>? Content {
get => Get<string>(nameof(Content));
set => Providers[nameof(Content)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Language {
get => Get<string[]>(nameof(Language));
set => Providers[nameof(Language)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Tags {
get => Get<string[]>(nameof(Tags));
set => Providers[nameof(Tags)] = value;
}
[JsonIgnore]
public IDataProvider<string>? Publisher {
get => Get<string>(nameof(Publisher));
set => Providers[nameof(Publisher)] = value;
}
[JsonIgnore]
public IDataProvider<DateTimeOffset>? PublicationDate {
get => Get<DateTimeOffset>(nameof(PublicationDate));
set => Providers[nameof(PublicationDate)] = value;
}
[JsonIgnore]
public IDataProvider<string>? ISBN {
get => Get<string>(nameof(ISBN));
set => Providers[nameof(ISBN)] = value;
}
[JsonIgnore]
public IDataProvider<int>? PageCount {
get => Get<int>(nameof(PageCount));
set => Providers[nameof(PageCount)] = value;
}
[JsonIgnore]
public IDataProvider<SourceLink>? CoverImage {
get => Get<SourceLink>(nameof(CoverImage));
set => Providers[nameof(CoverImage)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Series {
get => Get<string[]>(nameof(Series));
set => Providers[nameof(Series)] = value;
}
[JsonIgnore]
public IDataProvider<int>? Edition {
get => Get<int>(nameof(Edition));
set => Providers[nameof(Edition)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Contributors {
get => Get<string[]>(nameof(Contributors));
set => Providers[nameof(Contributors)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Subjects {
get => Get<string[]>(nameof(Subjects));
set => Providers[nameof(Subjects)] = value;
}
[JsonIgnore]
public IDataProvider<string>? Rights {
get => Get<string>(nameof(Rights));
set => Providers[nameof(Rights)] = value;
}
[JsonIgnore]
public IDataProvider<SourceLink[]>? TableOfContents {
get => Get<SourceLink[]>(nameof(TableOfContents));
set => Providers[nameof(TableOfContents)] = value;
}
[JsonIgnore]
public IDataProvider<SourceLink[]>? PagesDropDown {
get => Get<SourceLink[]>(nameof(PagesDropDown));
set => Providers[nameof(PagesDropDown)] = value;
}
[JsonIgnore]
public IDataProvider<SourceLink>? NextPageButton {
get => Get<SourceLink>(nameof(NextPageButton));
set => Providers[nameof(NextPageButton)] = value;
}
[JsonIgnore]
public IDataProvider<SourceLink>? PreviousPageButton {
get => Get<SourceLink>(nameof(PreviousPageButton));
set => Providers[nameof(PreviousPageButton)] = value;
}
#endregion
public Dictionary<string, IDataProvider?> Providers { get; set; } = []; public record class DataBindings : IDataBindings {
#region ---------------------- Common Bindings ----------------------
[JsonIgnore]
public IDataProvider<string>? Title {
get => Get<string>(nameof(Title));
set => Providers[nameof(Title)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Authors {
get => Get<string[]>(nameof(Authors));
set => Providers[nameof(Authors)] = value;
}
[JsonIgnore]
public IDataProvider<string>? Description {
get => Get<string>(nameof(Description));
set => Providers[nameof(Description)] = value;
}
[JsonIgnore]
public IDataProvider<string>? Content {
get => Get<string>(nameof(Content));
set => Providers[nameof(Content)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Language {
get => Get<string[]>(nameof(Language));
set => Providers[nameof(Language)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Tags {
get => Get<string[]>(nameof(Tags));
set => Providers[nameof(Tags)] = value;
}
[JsonIgnore]
public IDataProvider<string>? Publisher {
get => Get<string>(nameof(Publisher));
set => Providers[nameof(Publisher)] = value;
}
[JsonIgnore]
public IDataProvider<DateTimeOffset>? PublicationDate {
get => Get<DateTimeOffset>(nameof(PublicationDate));
set => Providers[nameof(PublicationDate)] = value;
}
[JsonIgnore]
public IDataProvider<string>? ISBN {
get => Get<string>(nameof(ISBN));
set => Providers[nameof(ISBN)] = value;
}
[JsonIgnore]
public IDataProvider<int>? PageCount {
get => Get<int>(nameof(PageCount));
set => Providers[nameof(PageCount)] = value;
}
[JsonIgnore]
public IDataProvider<string>? CoverImage {
get => Get<string>(nameof(CoverImage));
set => Providers[nameof(CoverImage)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Series {
get => Get<string[]>(nameof(Series));
set => Providers[nameof(Series)] = value;
}
[JsonIgnore]
public IDataProvider<int>? Edition {
get => Get<int>(nameof(Edition));
set => Providers[nameof(Edition)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Contributors {
get => Get<string[]>(nameof(Contributors));
set => Providers[nameof(Contributors)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? Subjects {
get => Get<string[]>(nameof(Subjects));
set => Providers[nameof(Subjects)] = value;
}
[JsonIgnore]
public IDataProvider<string>? Rights {
get => Get<string>(nameof(Rights));
set => Providers[nameof(Rights)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? TableOfContents {
get => Get<string[]>(nameof(TableOfContents));
set => Providers[nameof(TableOfContents)] = value;
}
[JsonIgnore]
public IDataProvider<string[]>? PagesDropDown {
get => Get<string[]>(nameof(PagesDropDown));
set => Providers[nameof(PagesDropDown)] = value;
}
[JsonIgnore]
public IDataProvider<string>? NextPageButton {
get => Get<string>(nameof(NextPageButton));
set => Providers[nameof(NextPageButton)] = value;
}
[JsonIgnore]
public IDataProvider<string>? PreviousPageButton {
get => Get<string>(nameof(PreviousPageButton));
set => Providers[nameof(PreviousPageButton)] = value;
}
#endregion
private IDataProvider<T>? Get<T>(string key) { public Dictionary<string, IDataProvider?> Providers { get; set; } = [];
if (Providers.TryGetValue(key, out var k) && k is IDataProvider<T> ks)
return ks;
return default;
}
public virtual ResolvedBindings Resolve(HtmlDocument doc) { private IDataProvider<T>? Get<T>(string key) {
// explicit fields already handled below if (Providers.TryGetValue(key, out var k) && k is IDataProvider<T> ks)
var mappedKeys = new HashSet<string> { return ks;
nameof(Title), nameof(Authors), nameof(Description), nameof(Content), return default;
nameof(Language), nameof(Tags), nameof(Publisher), nameof(PublicationDate),
nameof(ISBN), nameof(PageCount), nameof(CoverImage), nameof(Series),
nameof(Edition), nameof(Contributors), nameof(Subjects), nameof(Rights),
nameof(TableOfContents), nameof(PagesDropDown), nameof(NextPageButton),
nameof(PreviousPageButton)
};
var additional = new Dictionary<string, object?>();
foreach (var (key, provider) in Providers) {
if (!mappedKeys.Contains(key) && provider is not null) {
// dynamic call so any IDataProvider<T> works
additional[key] = ((dynamic)provider).Get(doc);
}
}
return new ResolvedBindings {
Title = Title?.Get(doc),
Authors = Authors?.Get(doc) ?? [],
Description = Description?.Get(doc),
Content = Content?.Get(doc),
Language = Language?.Get(doc),
Tags = Tags?.Get(doc) ?? [],
Publisher = Publisher?.Get(doc),
PublicationDate = PublicationDate?.Get(doc),
ISBN = ISBN?.Get(doc),
PageCount = PageCount?.Get(doc),
CoverImage = CoverImage?.Get(doc),
Series = Series?.Get(doc) ?? [],
Edition = Edition?.Get(doc),
Contributors = Contributors?.Get(doc) ?? [],
Subjects = Subjects?.Get(doc) ?? [],
Rights = Rights?.Get(doc),
TableOfContents = TableOfContents?.Get(doc) ?? [],
PagesDropDown = PagesDropDown?.Get(doc),
NextPageButton = NextPageButton?.Get(doc),
PreviousPageButton = PreviousPageButton?.Get(doc),
Additional = additional
};
}
} }
public record class ResolvedBindings { public virtual ResolvedBindings Resolve(HtmlDocument doc) {
public string? Title { get; set; } // explicit fields already handled below
public string[]? Authors { get; set; } var mappedKeys = new HashSet<string> {
public string? Description { get; set; } nameof(Title), nameof(Authors), nameof(Description), nameof(Content),
public string? Content { get; set; } nameof(Language), nameof(Tags), nameof(Publisher), nameof(PublicationDate),
public string[]? Language { get; set; } nameof(ISBN), nameof(PageCount), nameof(CoverImage), nameof(Series),
public string[]? Tags { get; set; } nameof(Edition), nameof(Contributors), nameof(Subjects), nameof(Rights),
public string? Publisher { get; set; } nameof(TableOfContents), nameof(PagesDropDown), nameof(NextPageButton),
public DateTimeOffset? PublicationDate { get; set; } nameof(PreviousPageButton)
public string? ISBN { get; set; } };
public int? PageCount { get; set; }
public SourceLink? CoverImage { get; set; }
public string[]? Series { get; set; }
public int? Edition { get; set; }
public string[]? Contributors { get; set; }
public string[]? Subjects { get; set; }
public string? Rights { get; set; }
public SourceLink[]? TableOfContents { get; set; }
public SourceLink[]? PagesDropDown { get; set; }
public SourceLink? NextPageButton { get; set; }
public SourceLink? PreviousPageButton { get; set; }
/// <summary> var additional = new Dictionary<string, object?>();
/// Values resolved from any providers whose keys arent represented
/// by the named properties above. foreach (var (key, provider) in Providers) {
/// </summary> if (!mappedKeys.Contains(key) && provider is not null) {
public Dictionary<string, object?> Additional { get; set; } = []; // dynamic call so any IDataProvider<T> works
additional[key] = ((dynamic)provider).Get(doc);
}
}
return new ResolvedBindings {
Title = Title?.Get(doc),
Authors = Authors?.Get(doc) ?? [],
Description = Description?.Get(doc),
Content = Content?.Get(doc),
Language = Language?.Get(doc),
Tags = Tags?.Get(doc) ?? [],
Publisher = Publisher?.Get(doc),
PublicationDate = PublicationDate?.Get(doc),
ISBN = ISBN?.Get(doc),
PageCount = PageCount?.Get(doc),
CoverImage = CoverImage?.Get(doc),
Series = Series?.Get(doc) ?? [],
Edition = Edition?.Get(doc),
Contributors = Contributors?.Get(doc) ?? [],
Subjects = Subjects?.Get(doc) ?? [],
Rights = Rights?.Get(doc),
TableOfContents = TableOfContents?.Get(doc) ?? [],
PagesDropDown = PagesDropDown?.Get(doc),
NextPageButton = NextPageButton?.Get(doc),
PreviousPageButton = PreviousPageButton?.Get(doc),
Additional = additional
};
} }
} }
public record class ResolvedBindings {
public string? Title { get; set; }
public string[]? Authors { get; set; }
public string? Description { get; set; }
public string? Content { get; set; }
public string[]? Language { get; set; }
public string[]? Tags { get; set; }
public string? Publisher { get; set; }
public DateTimeOffset? PublicationDate { get; set; }
public string? ISBN { get; set; }
public int? PageCount { get; set; }
public string? CoverImage { get; set; }
public string[]? Series { get; set; }
public int? Edition { get; set; }
public string[]? Contributors { get; set; }
public string[]? Subjects { get; set; }
public string? Rights { get; set; }
public string[]? TableOfContents { get; set; }
public string[]? PagesDropDown { get; set; }
public string? NextPageButton { get; set; }
public string? PreviousPageButton { get; set; }
/// <summary>
/// Values resolved from any providers whose keys arent represented
/// by the named properties above.
/// </summary>
public Dictionary<string, object?> Additional { get; set; } = [];
}
@@ -0,0 +1,38 @@
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text.Json.Serialization.Metadata;
using Beam.Abstractions;
namespace Beam.Dynamic;
// [JsonDerivedType(typeof(ParagraphedContentDataProvider), "paragraphed")]
// [JsonDerivedType(typeof(ListContentDataProvider), "list")]
// [JsonDerivedType(typeof(ContentsArrayDataProvider), "array")]
// [JsonDerivedType(typeof(ContentsDataProvider), "single")]
// [JsonDerivedType(typeof(DropDownDataProvider), "dropdown")]
// [JsonDerivedType(typeof(AnchorCollectionDataProvider), "anchor-list")]
// [JsonDerivedType(typeof(AnchorDataProvider), "anchor")]
public class DataProviderJsonTypeInfoResolver : DefaultJsonTypeInfoResolver {
public override JsonTypeInfo GetTypeInfo(Type type, JsonSerializerOptions options) {
JsonTypeInfo jsonTypeInfo = base.GetTypeInfo(type, options);
Type basePointType = typeof(IDataProvider);
if (jsonTypeInfo.Type == basePointType) {
jsonTypeInfo.PolymorphismOptions = new JsonPolymorphismOptions {
UnknownDerivedTypeHandling = JsonUnknownDerivedTypeHandling.FallBackToNearestAncestor,
DerivedTypes = {
new JsonDerivedType(typeof(ParagraphedContentDataProvider), "paragraphed"),
new JsonDerivedType(typeof(ListContentDataProvider), "list"),
new JsonDerivedType(typeof(ContentsArrayDataProvider), "array"),
new JsonDerivedType(typeof(ContentsDataProvider), "single"),
new JsonDerivedType(typeof(DropDownDataProvider), "dropdown"),
new JsonDerivedType(typeof(AnchorCollectionDataProvider), "anchor-list"),
new JsonDerivedType(typeof(AnchorDataProvider), "anchor"),
}
};
}
return jsonTypeInfo;
}
}
@@ -0,0 +1,46 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class AnchorCollectionDataProvider : IComposableDataProvider<string[]> {
public IBinding? Content { get; set; }
public Uri? RelativeTo { get; set; }
public string[] Get(HtmlDocument document) {
var node = Select(document);
return node is null ? [] : Get(node);
}
public string[] Get(HtmlNode node) {
List<string> links = [];
foreach (var child in node.Descendants()) {
var href = child.GetAttributeValue("href", "");
if (Uri.TryCreate(RelativeTo, href, out var uri))
links.Add(uri.AbsoluteUri);
}
return links.Where(x => !string.IsNullOrWhiteSpace(x)).ToArray();
}
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
public HtmlNode? Select(HtmlNode node) {
return node;
}
public HtmlNode[]? SelectMany(HtmlDocument doc) {
throw new NotImplementedException();
}
public HtmlNode[]? SelectMany(HtmlNode[] node) {
throw new NotImplementedException();
}
}
}
@@ -0,0 +1,32 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class AnchorDataProvider : IComposableDataProvider<string> {
public IBinding? Content { get; set; }
public Uri? RelativeTo { get; set; }
public string Get(HtmlDocument document) {
var node = Select(document);
return node is null ? "" : Get(node);
}
public virtual string Get(HtmlNode node) {
if (Uri.TryCreate(RelativeTo, node.GetAttributeValue("href", ""), out var uri))
return uri.AbsoluteUri;
return "";
}
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
public HtmlNode? Select(HtmlNode node) {
return node;
}
}
}
@@ -0,0 +1,83 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
/// <summary>
/// Allows composition of different data providers to adapt to different types of data.
/// </summary>
/// <typeparam name="T"></typeparam>
public class ComposeDataProviders<T> : IComposableDataProvider<T> {
public required IComposableDataProvider<object>[] SelectWith { get; init; }
public required IComposableDataProvider<T> GetWith { get; init; }
private ComposeDataProviders() {}
public static ComposeDataProviders<T> Create(IComposableDataProvider<object> selectWith, IComposableDataProvider<T> getWith) {
return new ComposeDataProviders<T>() {
GetWith = getWith,
SelectWith = [selectWith]
};
}
public static ComposeDataProviders<T> Create(IComposableDataProvider<object>[] selectWiths, IComposableDataProvider<T> getWith) {
return new ComposeDataProviders<T>() {
GetWith = getWith,
SelectWith = selectWiths
};
}
/// <summary>
/// Composes the data providers, first selecting a node with <see cref="SelectWith"/>, then getting the data with <see cref="GetWith"/>.
/// </summary>
/// <exception cref="Exception">Throws when <see cref="SelectWith"/> returns a null value.</exception>
/// <param name="document"></param>
/// <returns></returns>
public T Get(HtmlDocument document) {
var selected = Select(document);
if (selected is null)
throw new Exception("Selection operation failed.");
return GetWith.Get(selected);
}
/// <summary>
/// Uses the <see cref="GetWith"/> data provider to get the data from the supplied node.
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
public T Get(HtmlNode node) {
return GetWith.Get(node);
}
/// <summary>
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
/// </summary>
/// <param name="doc"></param>
/// <returns></returns>
public HtmlNode? Select(HtmlDocument doc) {
var selected = SelectWith[0].Select(doc);
foreach(var provider in SelectWith.Skip(1)) {
if (selected is null)
return null;
selected = provider.Select(selected);
}
return selected;
}
/// <summary>
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
/// </summary>
/// <param name="doc"></param>
/// <returns></returns>
public HtmlNode? Select(HtmlNode node) {
var selected = SelectWith[0].Select(node);
foreach(var provider in SelectWith.Skip(1)) {
if (selected is null)
return null;
selected = provider.Select(selected);
}
return selected;
}
}
@@ -0,0 +1,18 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic {
public class ContentsArrayDataProvider : ContentsDataProvider, IComposableDataProvider<string[]> {
public string[] ArrayDelimiters { get; set; } = [";"];
string[] IDataProvider<string[]>.Get(HtmlDocument document) {
var node = Select(document);
return node is null ? [] : Get(node);
}
public new string[] Get(HtmlNode node) {
return node.InnerText?.Split(ArrayDelimiters, StringSplitOptions.RemoveEmptyEntries) ?? [];
}
}
}
@@ -0,0 +1,30 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic {
public class ContentsDataProvider : IComposableDataProvider<string> {
public IBinding? Content { get; set; }
public string Get(HtmlDocument document) {
var node = Select(document);
return node is null ? "" : Get(node);
}
public string Get(HtmlNode node) {
return node.InnerText;
}
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
public HtmlNode? Select(HtmlNode node) {
return node;
}
}
}
@@ -0,0 +1,59 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Runtime.InteropServices.Marshalling;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic;
public class DropDownDataProvider : IComposableDataProvider<string>, IComposableDataProvider<string[]> {
public IBinding? Content { get; set; }
public Uri? RelativeTo { get; set; }
public string[] Get(HtmlDocument document) {
if (Content is null)
return [];
var node = Select(document);
if (node is null)
return [];
return Get(node);
}
string IDataProvider<string>.Get(HtmlDocument document) {
var node = Select(document);
return node is null ? "" : (this as IComposableDataProvider<string>).Get(node);
}
public string[] Get(HtmlNode node) {
List<string> links = [];
foreach (var child in node.ChildNodes.Where(x => x.Name == "option")) {
var childValue = child.GetAttributeValue("value", null);
if (!Uri.TryCreate(RelativeTo, childValue, out var uri))
continue;
links.Add(uri.AbsoluteUri);
}
return links.ToArray();
}
string IComposableDataProvider<string>.Get(HtmlNode node) {
return JsonSerializer.Serialize(Get(node));
}
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
HtmlNode? IComposableDataProvider<string[]>.Select(HtmlNode node) {
return node;
}
HtmlNode? IComposableDataProvider<string>.Select(HtmlNode node) {
return node;
}
}
@@ -1,18 +1,21 @@
using HtmlAgilityPack; using HtmlAgilityPack;
using System.Text; using System.Text;
using Beam.Abstractions;
namespace Beam.Dynamic { namespace Beam.Dynamic {
public class ListContentDataProvider : IDataProvider<string> { public class ListContentDataProvider : IComposableDataProvider<string> {
public IBinding? Content { get; set; } public IBinding? Content { get; set; }
public string Get(HtmlDocument document) { public string Get(HtmlDocument document) {
if (Content is null) if (Content is null)
return ""; return "";
var node = Content.Select(document); var node = Select(document);
if (node is null) return node is null ? "" : Get(node);
return ""; }
public string Get(HtmlNode node) {
StringBuilder content = new(); StringBuilder content = new();
foreach(var childNode in node.ChildNodes.SkipLast(1)) { foreach(var childNode in node.ChildNodes.SkipLast(1)) {
if (childNode.Name != "li") if (childNode.Name != "li")
@@ -23,5 +26,12 @@ namespace Beam.Dynamic {
content.Append(node.ChildNodes.Last().InnerText.Trim()); content.Append(node.ChildNodes.Last().InnerText.Trim());
return content.ToString(); return content.ToString();
} }
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
public HtmlNode? Select(HtmlNode node) {
return node;
}
} }
} }
@@ -0,0 +1,20 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public class ManyAnchorsDataProvider : AnchorDataProvider, IManySelectionComposableDataProvider<string[]> {
public new string[] Get(HtmlDocument document) {
return [base.Get(document)];
}
public string[] ManyGet(HtmlNode[] node) {
return node.Select(x => base.Get(x)).ToArray();
}
public HtmlNode[]? SelectMany(HtmlDocument doc) {
var k = Select(doc);
return k == null ? null : [k];
}
public HtmlNode[]? SelectMany(HtmlNode[] node) {
return node.Select(x => Select(x) ?? null).Where(x => x is not null).Cast<HtmlNode>().ToArray();
}
}
@@ -0,0 +1,80 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public class ManyComposeDataProviders<T> : IManySelectionComposableDataProvider<T> {
public required IManySelectionComposableDataProvider<object>[] SelectWith { get; init; }
public required IManySelectionComposableDataProvider<T> GetWith { get; init; }
private ManyComposeDataProviders() {}
public static ManyComposeDataProviders<T> Create(IManySelectionComposableDataProvider<object> selectWith, IManySelectionComposableDataProvider<T> getWith) {
return new ManyComposeDataProviders<T>() {
GetWith = getWith,
SelectWith = [selectWith]
};
}
public static ManyComposeDataProviders<T> Create(IManySelectionComposableDataProvider<object>[] selectWiths, IManySelectionComposableDataProvider<T> getWith) {
return new ManyComposeDataProviders<T>() {
GetWith = getWith,
SelectWith = selectWiths
};
}
/// <summary>
/// Composes the data providers, first selecting a node with <see cref="SelectWith"/>, then getting the data with <see cref="GetWith"/>.
/// </summary>
/// <exception cref="Exception">Throws when <see cref="SelectWith"/> returns a null value.</exception>
/// <param name="document"></param>
/// <returns></returns>
public T Get(HtmlDocument document) {
var selected = SelectMany(document);
if (selected is null)
throw new Exception("Selection operation failed.");
return GetWith.ManyGet(selected);
}
/// <summary>
/// Uses the <see cref="GetWith"/> data provider to get the data from the supplied node.
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
public T ManyGet(HtmlNode[] node) {
return GetWith.ManyGet(node);
}
/// <summary>
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
/// </summary>
/// <param name="doc"></param>
/// <returns></returns>
public HtmlNode[]? SelectMany(HtmlDocument doc) {
var selected = SelectWith[0].SelectMany(doc);
foreach(var provider in SelectWith.Skip(1)) {
if (selected is null)
return null;
selected = provider.SelectMany(selected);
}
return selected;
}
/// <summary>
/// Uses the <see cref="SelectWith"/> data provider to select a node from the supplied document.
/// </summary>
/// <param name="doc"></param>
/// <returns></returns>
public HtmlNode[]? SelectMany(HtmlNode[] node) {
var selected = SelectWith[0].SelectMany(node);
foreach(var provider in SelectWith.Skip(1)) {
if (selected is null)
return null;
selected = provider.SelectMany(selected);
}
return selected;
}
}
@@ -4,9 +4,10 @@ using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Text; using System.Text;
using System.Threading.Tasks; using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic { namespace Beam.Dynamic {
public class ParagraphedContentDataProvider : IDataProvider<string> { public class ParagraphedContentDataProvider : IComposableDataProvider<string> {
public IBinding? Content { get; set; } public IBinding? Content { get; set; }
public string Get(HtmlDocument document) { public string Get(HtmlDocument document) {
@@ -14,9 +15,10 @@ namespace Beam.Dynamic {
return ""; return "";
var node = Content.Select(document); var node = Content.Select(document);
if (node is null) return node is null ? "" : Get(node);
return ""; }
public string Get(HtmlNode node) {
StringBuilder content = new(); StringBuilder content = new();
foreach(var childNode in node.ChildNodes) { foreach(var childNode in node.ChildNodes) {
if (childNode.Name != "p") if (childNode.Name != "p")
@@ -26,5 +28,12 @@ namespace Beam.Dynamic {
return content.ToString(); return content.ToString();
} }
public HtmlNode? Select(HtmlDocument doc) {
return Content?.Select(doc);
}
public HtmlNode? Select(HtmlNode node) {
return node;
}
} }
} }
@@ -0,0 +1,45 @@
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public enum RelationType {
Parent,
Child,
NextSibling,
PreviousSibling,
}
public class RelationalDataProvider : IComposableDataProvider<HtmlNode?> {
public RelationType RelationType { get; set; } = RelationType.Parent;
public int Distance { get; set; } = 1;
public IBinding? Content { get; set; }
public HtmlNode? Get(HtmlDocument document) {
return Select(document);
}
public HtmlNode? Get(HtmlNode node) {
return Select(node);
}
public HtmlNode? Select(HtmlDocument doc) {
return Select(Content?.Select(doc) ?? doc.DocumentNode);
}
public HtmlNode? Select(HtmlNode node) {
return _Select(node, Distance);
}
private HtmlNode? _Select(HtmlNode node, int distance = 0) {
while (true) {
if (distance == 0) return node;
node = RelationType switch {
RelationType.Parent => node.ParentNode,
RelationType.Child => node.FirstChild,
RelationType.NextSibling => node.NextSibling,
RelationType.PreviousSibling => node.PreviousSibling,
_ => throw new NotSupportedException()
};
distance = distance - 1;
}
}
}
@@ -0,0 +1,155 @@
using System.Text.RegularExpressions;
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public enum SearchStrategy {
DepthFirst,
BreadthFirst,
}
public enum SearchStringDefaultSelection {
First,
Last,
GreatestChildren,
Any,
None,
}
public class SearchStringOptions {
public required bool SearchStringIsRegex { get; set; }
public required bool SearchInBody { get; set; }
public required string? SearchInAttribute { get; set; }
/// <summary>
/// Only used when both <see cref="SearchInBody"/> and <see cref="SearchInAttribute"/> are false/null, or no match is found for the search criteria.
/// </summary>
public SearchStringDefaultSelection DefaultSelection { get; set; } = SearchStringDefaultSelection.First;
/// <summary>
/// Only used when <see cref="SearchStringIsRegex"/> is false.
/// </summary>
public IEqualityComparer<string> UseComparer { get; set; } = StringComparer.CurrentCulture;
}
public class SelectDataProvider : IComposableDataProvider<HtmlNode?>, IManySelectionComposableDataProvider<HtmlNode?> {
public SearchStrategy SearchStrategyType { get; set; } = SearchStrategy.DepthFirst;
public SearchStringOptions SearchStringOptions { get; set; } = new SearchStringOptions() {
SearchStringIsRegex = false,
SearchInBody = true,
SearchInAttribute = null
};
public string? SearchString { get; set; }
public IBinding? Content { get; set; }
/// <summary>
/// Returns the first node that matches the search criteria.
/// </summary>
/// <param name="document"></param>
/// <returns></returns>
public HtmlNode? Get(HtmlDocument document) {
return Select(document);
}
/// <summary>
/// Returns the first child node that matches the search criteria.
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
public HtmlNode? Get(HtmlNode node) {
return Select(node);
}
public HtmlNode? Get(HtmlNode[] node) {
throw new NotSupportedException();
}
public HtmlNode[]? _Select(HtmlNode node) {
LinkedList<HtmlNode> searchSet = new();
LinkedListNode<HtmlNode> currentNode = searchSet.AddLast(node);
HashSet<HtmlNode> visited = [node];
void breadthFirst(HtmlNode node) {
foreach (var child in node.ChildNodes) {
if (visited.Contains(child))
continue;
searchSet.AddLast(child);
visited.Add(child);
}
}
void depthFirst(HtmlNode node) {
foreach (var child in node.ChildNodes.Reverse()) {
if (visited.Contains(child))
continue;
searchSet.AddAfter(currentNode, child);
visited.Add(child);
}
}
Action<HtmlNode> enqueueStartegy = SearchStrategyType switch {
SearchStrategy.BreadthFirst => breadthFirst,
SearchStrategy.DepthFirst => depthFirst,
_ => throw new NotSupportedException()
};
var bestCandidate = currentNode.Value;
List<HtmlNode> selected = [];
do {
var n = currentNode.Value;
if (SearchStringOptions.SearchInBody)
if (SearchStringOptions.SearchStringIsRegex && Regex.IsMatch(n.InnerText ?? "", SearchString ?? ""))
selected.Add(n);
else if (SearchStringOptions.UseComparer.Equals(n.InnerText, SearchString ?? ""))
selected.Add(n);
if (SearchStringOptions.SearchInAttribute is not null)
if (SearchStringOptions.SearchStringIsRegex && n.GetAttributeValue(SearchStringOptions.SearchInAttribute, null) != null &&
Regex.IsMatch(n.GetAttributeValue(SearchStringOptions.SearchInAttribute, ""), SearchString ?? ""))
selected.Add(n);
else if (SearchStringOptions.UseComparer.Equals(n.GetAttributeValue(SearchStringOptions.SearchInAttribute, null), SearchString ?? ""))
selected.Add(n);
switch (SearchStringOptions.DefaultSelection) {
case SearchStringDefaultSelection.GreatestChildren:
if (n.ChildNodes.Count > bestCandidate.ChildNodes.Count)
bestCandidate = n;
break;
case SearchStringDefaultSelection.Last:
bestCandidate = n;
break;
case SearchStringDefaultSelection.Any:
case SearchStringDefaultSelection.First:
case SearchStringDefaultSelection.None:
default:
break;
}
enqueueStartegy(n);
} while ((currentNode = currentNode.Next!) != null);
if (selected.Count == 0 && SearchStringOptions.DefaultSelection != SearchStringDefaultSelection.None)
selected.Add(bestCandidate);
return selected.ToArray();
}
public HtmlNode? Select(HtmlDocument document) {
return Select(Content?.Select(document) ?? document.DocumentNode);
}
public HtmlNode? Select(HtmlNode node) {
return _Select(node)?.FirstOrDefault();
}
public HtmlNode? ManyGet(HtmlNode[] node) {
throw new NotSupportedException();
}
public HtmlNode[]? SelectMany(HtmlDocument doc) {
return _Select(Content?.Select(doc) ?? doc.DocumentNode);
}
public HtmlNode[]? SelectMany(HtmlNode[] node) {
return node.SelectMany(x => _Select(x) ?? []).ToArray();
}
}
@@ -0,0 +1,154 @@
using System.Text.Json;
using Beam.Abstractions;
using HtmlAgilityPack;
namespace Beam.Dynamic;
public class TableDataProvider
: IComposableDataProvider<string>,
IComposableDataProvider<string[][]>
{
public IBinding? Content { get; set; }
/// <summary>
/// One provider per column. Each provider is executed per row.
/// Missing columns are filled with defaults that return the td/th text at that column index.
/// </summary>
public IDataProvider<string>[]? ColumnProviders { get; set; }
public string[][] Get(HtmlDocument document)
{
if (Content is null)
return [];
var node = Select(document);
if (node is null)
return [];
return Get(node);
}
string IDataProvider<string>.Get(HtmlDocument document)
{
var node = Select(document);
return node is null ? "" : (this as IComposableDataProvider<string>).Get(node);
}
public string[][] Get(HtmlNode node)
{
var rows = node.Descendants("tr").ToList();
if (rows.Count == 0)
return [];
// Determine how many columns we should output:
// max of provided providers length and max cell count across rows.
var maxCellsInAnyRow = rows
.Select(r => r.ChildNodes.Count(n => n.Name == "td" || n.Name == "th"))
.DefaultIfEmpty(0)
.Max();
var providedCount = ColumnProviders?.Length ?? 0;
var columnCount = Math.Max(providedCount, maxCellsInAnyRow);
if (columnCount == 0)
return [];
var effectiveProviders = BuildEffectiveProviders(columnCount);
var result = new string[rows.Count][];
for (int r = 0; r < rows.Count; r++)
{
var rowNode = rows[r];
var rowOut = new string[columnCount];
for (int c = 0; c < columnCount; c++)
{
var provider = effectiveProviders[c];
if (provider is IComposableDataProvider<string> composable)
{
// Execute with row context.
rowOut[c] = composable.Get(rowNode);
}
else
{
// Fallback to document context.
rowOut[c] = provider.Get(rowNode.OwnerDocument);
}
rowOut[c] ??= "";
}
result[r] = rowOut;
}
return result;
}
string IComposableDataProvider<string>.Get(HtmlNode node)
{
return JsonSerializer.Serialize(Get(node));
}
public HtmlNode? Select(HtmlDocument doc) => Content?.Select(doc);
HtmlNode? IComposableDataProvider<string[][]>.Select(HtmlNode node) => node;
HtmlNode? IComposableDataProvider<string>.Select(HtmlNode node) => node;
private IDataProvider<string>[] BuildEffectiveProviders(int columnCount)
{
var effective = new IDataProvider<string>[columnCount];
if (ColumnProviders is null || ColumnProviders.Length == 0)
{
for (int i = 0; i < columnCount; i++)
effective[i] = new ColumnCellContentsProvider(i);
return effective;
}
var maxCopy = Math.Min(ColumnProviders.Length, columnCount);
for (int i = 0; i < maxCopy; i++)
effective[i] = ColumnProviders[i] ?? new ColumnCellContentsProvider(i);
for (int i = maxCopy; i < columnCount; i++)
effective[i] = new ColumnCellContentsProvider(i);
return effective;
}
/// <summary>
/// Default column provider: for a given row, returns text of td/th at ColumnIndex.
/// </summary>
private sealed class ColumnCellContentsProvider : IComposableDataProvider<string>
{
public int ColumnIndex { get; }
public ColumnCellContentsProvider(int columnIndex)
{
ColumnIndex = columnIndex;
}
public string Get(HtmlDocument document)
{
var node = Select(document);
return node is null ? "" : Get(node);
}
public string Get(HtmlNode rowNode)
{
var cells = rowNode
.ChildNodes
.Where(n => n.Name == "td" || n.Name == "th")
.ToList();
if (ColumnIndex < 0 || ColumnIndex >= cells.Count)
return "";
return cells[ColumnIndex].InnerText;
}
public HtmlNode? Select(HtmlDocument doc) => doc.DocumentNode;
public HtmlNode? Select(HtmlNode node) => node;
}
}
-55
View File
@@ -1,55 +0,0 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Runtime.InteropServices.Marshalling;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
namespace Beam.Dynamic {
public class DropDownDataProvider
: IDataProvider<string>,
IDataProvider<string[]>,
IDataProvider<SourceLink[]> {
public IBinding? Content { get; set; }
public string? RelativeTo { get; set; }
private string GetAbsolute(string? @base, string relative) {
if (@base is null)
return relative;
if (@base.EndsWith('/'))
@base = @base[..^1];
if (relative.StartsWith('/'))
relative = relative[1..];
return @base + '/' + relative;
}
public SourceLink[] Get(HtmlDocument document) {
if (Content is null)
return [];
var node = Content.Select(document);
if (node is null)
return [];
List<SourceLink> links = [];
foreach (var child in node.ChildNodes.Where(x => x.Name == "option")) {
var childValue = child.GetAttributeValue("value", null);
if (!Uri.TryCreate(GetAbsolute(RelativeTo, childValue), UriKind.Absolute, out _))
continue;
links.Add(new SourceLink(GetAbsolute(RelativeTo, childValue)));
}
return links.ToArray();
}
string[] IDataProvider<string[]>.Get(HtmlDocument document) {
return this.Get(document).Select(x => x.Link.AbsoluteUri).ToArray();
}
string IDataProvider<string>.Get(HtmlDocument document) {
return JsonSerializer.Serialize(this.Get(document));
}
}
}
-21
View File
@@ -1,21 +0,0 @@
using HtmlAgilityPack;
using System.Text.Json.Serialization;
namespace Beam.Dynamic {
[JsonDerivedType(typeof(ParagraphedContentDataProvider), "paragraphed")]
[JsonDerivedType(typeof(ListContentDataProvider), "list")]
[JsonDerivedType(typeof(ContentsArrayDataProvider), "array")]
[JsonDerivedType(typeof(ContentsDataProvider), "single")]
[JsonDerivedType(typeof(DropDownDataProvider), "dropdown")]
[JsonDerivedType(typeof(AnchorCollectionDataProvider), "anchor-list")]
[JsonDerivedType(typeof(AnchorDataProvider), "anchor")]
public interface IDataProvider {
public string GetString(HtmlDocument document)
=> (this as IDataProvider<object>)?.Get(document)?.ToString() ?? "";
}
public interface IDataProvider<out T> : IDataProvider {
public T Get(HtmlDocument document);
//public HtmlNode? GetNode(HtmlDocument document);
}
}
@@ -1,9 +1,7 @@
using aeqw89.DataKeys; using aeqw89.DataKeys;
using Beam.Dynamic; using Beam.Abstractions;
using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
namespace Beam.Temporary.Cli { namespace Beam.Dynamic {
public partial interface IArchitecture { public partial interface IArchitecture {
private class MainArchitecture : IArchitecture { private class MainArchitecture : IArchitecture {
public MainArchitecture() { } public MainArchitecture() { }
@@ -1,15 +1,17 @@
namespace Beam { using Beam.Abstractions;
namespace Beam.Dynamic {
public class NumberedStateChanger(NumberedStateChanger.MoveState moveState) : IStateChangeBehaviour { public class NumberedStateChanger(NumberedStateChanger.MoveState moveState) : IStateChangeBehaviour {
public delegate void MoveState(State state, int amount); public delegate void MoveState(IState state, int amount);
public MoveState MoveStateDlgte { get; set; } = moveState; public MoveState MoveStateDlgte { get; set; } = moveState;
public virtual void Apply(State state, object stimulus) { public virtual void Apply(IState state, object stimulus) {
if (stimulus is not int amount) if (stimulus is not int amount)
throw new ArgumentException(S.M.StimulusMustBeInt, nameof(stimulus)); throw new ArgumentException(string.Format(Exceptions.Exceptions.num_state_changer_stimulus_must_be_int, stimulus.GetType().Name), nameof(stimulus));
Apply(state, amount); Apply(state, amount);
} }
public virtual void Apply(State state, int amount) { public virtual void Apply(IState state, int amount) {
MoveStateDlgte(state, amount); MoveStateDlgte(state, amount);
} }
+2 -1
View File
@@ -4,9 +4,10 @@ using System.Linq;
using System.Text; using System.Text;
using System.Text.Json.Serialization; using System.Text.Json.Serialization;
using System.Threading.Tasks; using System.Threading.Tasks;
using Beam.Abstractions;
namespace Beam.Dynamic { namespace Beam.Dynamic {
public class StateChangerFactory { public class StateChangerFactory : IStateChangerFactory {
[JsonIgnore] [JsonIgnore]
public IStateChangeBehaviour Behavior => FactoryTable[StateChangerKey](); public IStateChangeBehaviour Behavior => FactoryTable[StateChangerKey]();
@@ -10,7 +10,7 @@ using System.Threading.Tasks;
using System.Web; using System.Web;
namespace Beam.Dynamic { namespace Beam.Dynamic {
public static partial class OnlineCleaner { public static partial class StringCleaner {
[GeneratedRegex("&#x?[\\d\\w]{1,4};")] [GeneratedRegex("&#x?[\\d\\w]{1,4};")]
public static partial Regex MochaBlendUnicodeEscapeSequence(); public static partial Regex MochaBlendUnicodeEscapeSequence();
+14
View File
@@ -0,0 +1,14 @@
namespace Beam.Exceptions;
/// <summary>
/// The kind of exception that should never happen
/// </summary>
[Serializable]
public class AssertionException : Exception {
public AssertionException() { }
public AssertionException(string message) : base(message) { }
public AssertionException(string message, Exception inner) : base(message, inner) { }
protected AssertionException(
System.Runtime.Serialization.SerializationInfo info,
System.Runtime.Serialization.StreamingContext context) : base(info, context) { }
}
+24
View File
@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<EmbeddedResource Update="Exceptions.resx">
<Generator>PublicResXFileCodeGenerator</Generator>
<LastGenOutput>Exceptions.Designer.cs</LastGenOutput>
</EmbeddedResource>
</ItemGroup>
<ItemGroup>
<Compile Update="Exceptions.Designer.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>Exceptions.resx</DependentUpon>
</Compile>
</ItemGroup>
</Project>
+188
View File
@@ -0,0 +1,188 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace Beam.Exceptions {
using System;
/// <summary>
/// A strongly-typed resource class, for looking up localized strings, etc.
/// </summary>
// This class was auto-generated by the StronglyTypedResourceBuilder
// class via a tool like ResGen or Visual Studio.
// To add or remove a member, edit your .ResX file then rerun ResGen
// with the /str option, or rebuild your VS project.
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
public class Exceptions {
private static global::System.Resources.ResourceManager resourceMan;
private static global::System.Globalization.CultureInfo resourceCulture;
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
internal Exceptions() {
}
/// <summary>
/// Returns the cached ResourceManager instance used by this class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
public static global::System.Resources.ResourceManager ResourceManager {
get {
if (object.ReferenceEquals(resourceMan, null)) {
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Beam.Exceptions.Exceptions", typeof(Exceptions).Assembly);
resourceMan = temp;
}
return resourceMan;
}
}
/// <summary>
/// Overrides the current thread's CurrentUICulture property for all
/// resource lookups using this strongly typed resource class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
public static global::System.Globalization.CultureInfo Culture {
get {
return resourceCulture;
}
set {
resourceCulture = value;
}
}
/// <summary>
/// Looks up a localized string similar to The state of the builder is invalid..
/// </summary>
public static string fluent_invalid_state {
get {
return ResourceManager.GetString("fluent_invalid_state", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to A type conversion from &apos;{0}&apos; to &apos;{1}&apos; has failed, indicating an invalid state..
/// </summary>
public static string fluent_type_conversion_failure {
get {
return ResourceManager.GetString("fluent_type_conversion_failure", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to The builder state &apos;{0}&apos; points to an unsupported unit downloader..
/// </summary>
public static string fluent_unsupported_pattern {
get {
return ResourceManager.GetString("fluent_unsupported_pattern", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to The transformer type &apos;{0}&apos; is unsupported by this fluent pathway. Consider manually instantiating DownloadContext..
/// </summary>
public static string fluent_unsupported_transformer {
get {
return ResourceManager.GetString("fluent_unsupported_transformer", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to A fragment is locked when it should be free; failed to obtain updater..
/// </summary>
public static string fragment_locked {
get {
return ResourceManager.GetString("fragment_locked", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to The argument at index &apos;{0}&apos; with name &apos;{1}&apos; is not marked optional and is missing..
/// </summary>
public static string link_builder_argument_missing {
get {
return ResourceManager.GetString("link_builder_argument_missing", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to The flag &apos;{0}&apos; is incompatible with the flag(s) &apos;{1}&apos;.
/// </summary>
public static string link_builder_incompatible_flag {
get {
return ResourceManager.GetString("link_builder_incompatible_flag", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to The query flag is only allowed on the last segment; found on segment index &apos;{0}&apos;.
/// </summary>
public static string link_builder_query_only_at_last {
get {
return ResourceManager.GetString("link_builder_query_only_at_last", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to The stimulus must be an integer; got &apos;{0}&apos;.
/// </summary>
public static string num_state_changer_stimulus_must_be_int {
get {
return ResourceManager.GetString("num_state_changer_stimulus_must_be_int", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to There must be at least one state in resource definition..
/// </summary>
public static string resource_definition_invalid_states_count {
get {
return ResourceManager.GetString("resource_definition_invalid_states_count", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Encountered an error while changing state.
/// </summary>
public static string state_change_error {
get {
return ResourceManager.GetString("state_change_error", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Could not open a filestream to a non-existant directory &apos;{0}&apos;..
/// </summary>
public static string unit_download_directory_nonexistant {
get {
return ResourceManager.GetString("unit_download_directory_nonexistant", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to The memory stream was created with an invisible inner byte array..
/// </summary>
public static string unit_download_invalid_memory_stream {
get {
return ResourceManager.GetString("unit_download_invalid_memory_stream", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to The base unit downloader class only supports RawType&apos;s of string and ByteDocument..
/// </summary>
public static string unit_downloader_limited_support {
get {
return ResourceManager.GetString("unit_downloader_limited_support", resourceCulture);
}
}
}
}
+63
View File
@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:element name="root" msdata:IsDataSet="true">
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>1.3</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<data name="link_builder_argument_missing" xml:space="preserve">
<value>The argument at index '{0}' with name '{1}' is not marked optional and is missing.</value>
</data>
<data name="link_builder_incompatible_flag" xml:space="preserve">
<value>The flag '{0}' is incompatible with the flag(s) '{1}'</value>
</data>
<data name="link_builder_query_only_at_last" xml:space="preserve">
<value>The query flag is only allowed on the last segment; found on segment index '{0}'</value>
</data>
<data name="num_state_changer_stimulus_must_be_int" xml:space="preserve">
<value>The stimulus must be an integer; got '{0}'</value>
</data>
<data name="state_change_error" xml:space="preserve">
<value>Encountered an error while changing state</value>
</data>
<data name="fragment_locked" xml:space="preserve">
<value>A fragment is locked when it should be free; failed to obtain updater.</value>
</data>
<data name="fluent_unsupported_transformer" xml:space="preserve">
<value>The transformer type '{0}' is unsupported by this fluent pathway. Consider manually instantiating DownloadContext.</value>
</data>
<data name="fluent_invalid_state" xml:space="preserve">
<value>The state of the builder is invalid.</value>
</data>
<data name="fluent_type_conversion_failure" xml:space="preserve">
<value>A type conversion from '{0}' to '{1}' has failed, indicating an invalid state.</value>
</data>
<data name="fluent_unsupported_pattern" xml:space="preserve">
<value>The builder state '{0}' points to an unsupported unit downloader.</value>
</data>
<data name="resource_definition_invalid_states_count" xml:space="preserve">
<value>There must be at least one state in resource definition.</value>
</data>
<data name="unit_download_directory_nonexistant" xml:space="preserve">
<value>Could not open a filestream to a non-existant directory '{0}'.</value>
</data>
<data name="unit_download_invalid_memory_stream" xml:space="preserve">
<value>The memory stream was created with an invisible inner byte array.</value>
</data>
<data name="unit_downloader_limited_support" xml:space="preserve">
<value>The base unit downloader class only supports RawType's of string and ByteDocument.</value>
</data>
</root>
+11
View File
@@ -0,0 +1,11 @@
namespace Beam.Exceptions;
[Serializable]
public class MapException : ArgumentException {
public MapException() { }
public MapException(string message) : base(message) { }
public MapException(string message, Exception inner) : base(message, inner) { }
protected MapException(
System.Runtime.Serialization.SerializationInfo info,
System.Runtime.Serialization.StreamingContext context) : base(info, context) { }
}
+2 -4
View File
@@ -6,11 +6,9 @@
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\Beam\Beam.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
</ItemGroup> </ItemGroup>
</Project> </Project>
+5 -2
View File
@@ -1,8 +1,11 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net;
using System.Text; using System.Text;
using System.Threading.Tasks; using System.Threading.Tasks;
using Beam.Abstractions;
using Beam.Models;
namespace Beam.Exports { namespace Beam.Exports {
public class PlainTextExporter : IExporter, IAsyncExporter { public class PlainTextExporter : IExporter, IAsyncExporter {
@@ -24,14 +27,14 @@ namespace Beam.Exports {
var text = Convert(); var text = Convert();
if (!Directory.Exists(Path.GetDirectoryName(path))) if (!Directory.Exists(Path.GetDirectoryName(path)))
throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path)); throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path));
File.WriteAllText(path, text, Encoding.Unicode); System.IO.File.WriteAllText(path, text, Encoding.Unicode);
} }
public virtual async Task WriteAsync(string path) { public virtual async Task WriteAsync(string path) {
var text = await ConvertAsync(); var text = await ConvertAsync();
if (!Directory.Exists(path)) if (!Directory.Exists(path))
throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path)); throw new ArgumentException(S.M.FileDirectoryDoesNotExist, nameof(path));
await File.WriteAllTextAsync(path, text); await System.IO.File.WriteAllTextAsync(path, text);
} }
} }
} }
+2
View File
@@ -1,4 +1,6 @@
using System.Text; using System.Text;
using Beam.Abstractions;
using Beam.Models;
namespace Beam.Exports { namespace Beam.Exports {
public class HtmlExporter : PlainTextExporter { public class HtmlExporter : PlainTextExporter {
+9 -9
View File
@@ -6,17 +6,19 @@
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" /> <PackageReference Include="aeqw89.DataKeys" Version="2.1.1" />
<PackageReference Include="aeqw89.PersistentData" Version="1.1.0" /> <PackageReference Include="aeqw89.PersistentData" Version="1.4.5" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.7" /> <PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.9" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.7" /> <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.7" /> <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.9" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\Beam.Data\Beam.Data.csproj" />
<ProjectReference Include="..\Beam.Downloaders\Beam.Downloaders.csproj" />
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj"> <ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\Beam.Exceptions\Beam.Exceptions.csproj" />
<ProjectReference Include="..\Beam.Models\Beam.Models.csproj" /> <ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
<ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj"> <ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
@@ -24,8 +26,6 @@
<ProjectReference Include="..\Beam.Stealth\Beam.Stealth.csproj"> <ProjectReference Include="..\Beam.Stealth\Beam.Stealth.csproj">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\Beam\Beam.csproj"> <ProjectReference Include="..\Beam\Beam.csproj" />
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
</ItemGroup> </ItemGroup>
</Project> </Project>
+189
View File
@@ -0,0 +1,189 @@
using Beam.Models;
using HtmlAgilityPack;
using Beam.Playwright;
using Beam.Stealth;
using Beam;
using Beam.Abstractions;
using Beam.Downloaders;
namespace Beam.Fluent;
internal sealed class ContextStage<OutType> : IContextStage<OutType> {
private readonly DownloadContextBuilder _ctxBuilder;
private readonly AsyncTransformer<ByteDocument, OutType> _transformer;
private FragmentMode _fragmentMode = FragmentMode.Single;
private Channel _channel = Channel.Plain;
private readonly ContentKind _contentKind;
private int _parallelism = 4;
private UnitDownloaderOptionsBuilder<OutType> _optionsBuilder = new();
// ──────────────── playwright ────────────────
private PlaywrightAsyncManipulator? _playwrightManipulator = null;
// ────────────────────────────────────────────
// ──────────────── stealth ───────────────────
private StealthAsyncManipulator? _stealthManipulator = null;
private StealthConfig? _stealthConfig = null;
// ────────────────────────────────────────────
public ContextStage(DownloadContextBuilder ctxBuilder,
AsyncTransformer<ByteDocument, OutType> transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
_contentKind = transformer switch {
AsyncTransformer<StringDocument, OutType> => ContentKind.File,
AsyncTransformer<ByteDocument, OutType> => ContentKind.Binary,
_ => throw new ArgumentException(string.Format(Exceptions.Exceptions.fluent_unsupported_transformer,
transformer.GetType()
.AsUniqueName()))
};
_optionsBuilder
.WithAsyncTransformer(_transformer);
}
public IContextStage<OutType> Configure(Action<DownloadContextBuilder> configure) {
configure(_ctxBuilder);
return this;
}
public IContextStage<OutType> ConfigureUnitDownloaderOptions(
Action<UnitDownloaderOptionsBuilder< OutType>> configure) {
configure(_optionsBuilder);
return this;
}
public IContextStage< OutType> WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
}
public IContextStage< OutType> WithTimeout(TimeSpan timeout) {
_ctxBuilder.WithTimeOut(timeout);
return this;
}
public IContextStage< OutType> WithRetryReporter(IProgress<IRetryReport> reporter) {
_ctxBuilder.WithRetryReporter(reporter);
return this;
}
/// <summary>
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePlaywright(PlaywrightAsyncManipulator)"/>
/// </summary>
/// <returns></returns>
public IContextStage< OutType> UseFragments() {
if (_playwrightManipulator is not null)
_playwrightManipulator = null;
if (_channel == Channel.Playwright)
_channel = Channel.Plain;
_fragmentMode = FragmentMode.Fragmented;
return this;
}
/// <summary>
/// Use a puppet browser to download the links. This strategy is mutually exclusive with <see cref="UseFragments"/>
/// </summary>
/// <param name="manipulator">The page manipulator</param>
/// <returns></returns>
public IContextStage< OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator) {
if (_fragmentMode == FragmentMode.Fragmented)
_fragmentMode = FragmentMode.Single;
if (_stealthManipulator is not null)
_stealthManipulator = null;
_channel = Channel.Playwright;
_playwrightManipulator = manipulator;
return this;
}
public IContextStage< OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
if (_playwrightManipulator is not null)
_playwrightManipulator = null;
_channel = Channel.Stealth;
_stealthManipulator = manipulator;
_stealthConfig = config;
return this;
}
private object ConstructUnitDownloader(DownloadContext context) {
#region Utility functions
T To<T>(object? o) where T : class
=> (o as T) ??
throw new Exception(
string.Format(Exceptions.Exceptions.fluent_type_conversion_failure,
o?.GetType().AsUniqueName() ?? "null", typeof(T).AsUniqueName()));
AsyncTransformer<StringDocument, OutType> FileTransformer()
=> To<AsyncTransformer<StringDocument, OutType>>(_transformer);
AsyncTransformer<ByteDocument, OutType> ByteTransformer()
=> To<AsyncTransformer<ByteDocument, OutType>>(_transformer);
AsyncDownloadFailurePredicate<StringDocument>[] FileFailurePredicates()
=> To<AsyncDownloadFailurePredicate<StringDocument>[]>(context.AsyncFailurePredicates);
AsyncDownloadFailurePredicate<ByteDocument>[] ByteFailurePredicates()
=> To<AsyncDownloadFailurePredicate<ByteDocument>[]>(context.AsyncFailurePredicates);
T EnsureExists<T>(T? o) where T : class
=> (o ?? throw new Exception(Exceptions.Exceptions.fluent_invalid_state));
#endregion
if (context.AsyncFailurePredicates is not null)
_optionsBuilder
.WithFailurePredicates(x => x.WithPredicates(context.AsyncFailurePredicates));
var options = _optionsBuilder
.WithClient(context.Client)
.Build();
return (_channel, _fragmentMode, _contentKind) switch {
// ──────────────── fragmented ────────────────
(Channel.Plain, FragmentMode.Fragmented, _)
=> new UnitFragmentDownloader< OutType>(options),
// ──────────────── single ────────────────
(Channel.Plain, FragmentMode.Single, _)
=> new UnitDownloader< OutType>(options),
// ──────────────── single playwright ────────────────
(Channel.Playwright, FragmentMode.Single, _)
=> new PlaywrightUnitDownloader< OutType>(options, EnsureExists(_playwrightManipulator)),
// ──────────────── single stealth ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.Binary)
=> new StealthUnitDownloader< OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
// ──────────────── fragment stealth ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary)
=> new StealthFragmentDownloader< OutType>(options,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator)),
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
$"({_channel}, {_fragmentMode}, {_contentKind})")),
};
}
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext context) {
var copyOfContext = DownloadContextBuilder.FromContext(context).Build();
return _fragmentMode switch {
FragmentMode.Fragmented => new SequentialFragmentDownloader<OutType>(
copyOfContext,
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
FragmentMode.Single => new SequentialDownloader< OutType>(
copyOfContext,
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered(),
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
$"{_fragmentMode}")),
};
}
public DownloadEnumerable<OutType> Build() {
var context = _ctxBuilder.Build();
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
return enumerable;
}
}
+17
View File
@@ -0,0 +1,17 @@
namespace Beam.Fluent;
public enum FragmentMode {
Single,
Fragmented
}
public enum Channel {
Plain,
Stealth,
Playwright
}
public enum ContentKind {
File,
Binary
}
+20
View File
@@ -0,0 +1,20 @@
using Beam.Abstractions;
using Beam.Downloaders;
using Beam.Models;
using Beam.Playwright;
using Beam.Stealth;
namespace Beam.Fluent;
public interface IContextStage<OutType> {
IContextStage<OutType> Configure(Action<DownloadContextBuilder> configure);
IContextStage<OutType> WithParallelism(int degree);
IContextStage<OutType> WithTimeout(TimeSpan timeout);
IContextStage<OutType> WithRetryReporter(IProgress<IRetryReport> reporter);
IContextStage<OutType> UseFragments();
IContextStage<OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator);
IContextStage<OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config);
IContextStage<OutType> ConfigureUnitDownloaderOptions(
Action<UnitDownloaderOptionsBuilder<OutType>> configure);
DownloadEnumerable<OutType> Build();
}
+12
View File
@@ -0,0 +1,12 @@
using System.Collections.Concurrent;
namespace Beam.Fluent;
public interface IDownloadStage<OutType> {
IDownloadStage<OutType> SaveToDirectory(string dir);
IDownloadStage<OutType> SaveToFiles(IEnumerable<string> files);
IDownloadStage<OutType> SaveToMemory(ConcurrentBag<OutType> bag);
void WaitForDownload();
Task WaitForDownloadAsync();
DownloadEnumerable<OutType> AsAsyncEnumerable();
}
+8
View File
@@ -0,0 +1,8 @@
using Beam.Dynamic;
using Beam.Models;
namespace Beam.Fluent;
public interface ITransformStage<OutType> {
IContextStage<OutType> WithTransformer(AsyncTransformer<ByteDocument, OutType> factory);
}
-186
View File
@@ -1,186 +0,0 @@
using HtmlAgilityPack;
using Beam.Playwright;
using Beam.Stealth;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
private sealed class ContextStage : IContextStage {
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
private readonly AsyncTransformer<RawType, OutType> _transformer;
private int _parallelism = 4;
private bool _useFragments = false;
private PlaywrightAsyncManipulator? _usePlaywrightManipulator = null;
private StealthAsyncManipulator? _useStealthManipulator = null;
private StealthConfig? _stealthConfig = null;
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
}
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
configure(_ctxBuilder);
return this;
}
public IContextStage WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
}
public IContextStage WithTimeout(TimeSpan timeout) {
_ctxBuilder.WithTimeOut(timeout);
return this;
}
public IContextStage WithRetryReporter(IProgress<RetryReport> reporter) {
_ctxBuilder.WithRetryReporter(reporter);
return this;
}
/// <summary>
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePlaywright(PlaywrightAsyncManipulator)"/>
/// </summary>
/// <returns></returns>
public IContextStage UseFragments() {
if (_usePlaywrightManipulator is not null)
_usePlaywrightManipulator = null;
_useFragments = true;
return this;
}
/// <summary>
/// Use a puppet browser to download the links. This strategy is mutually exclusive with <see cref="UseFragments"/>
/// </summary>
/// <param name="manipulator">The page manipulator</param>
/// <returns></returns>
public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) {
if (_useFragments)
_useFragments = false;
if (_useStealthManipulator is not null)
_useStealthManipulator = null;
_usePlaywrightManipulator = manipulator;
return this;
}
public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
if (_usePlaywrightManipulator is not null)
_usePlaywrightManipulator = null;
_useStealthManipulator = manipulator;
_stealthConfig = config;
return this;
}
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
return (_useFragments, _useStealthManipulator, _usePlaywrightManipulator, _transformer, context.AsyncFailurePredicates) switch {
// ──────────────── fragmented HTML ────────────────
(true, null, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new UnitFragmentDownloader<OutType>(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── fragmented binary ────────────────
(true, null, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new UnitFragmentDownloaderBinary<OutType>(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── single HTML ────────────────
(false, null, null, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new UnitDownloader<OutType>(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single binary ────────────────
(false, null, null, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new UnitDownloaderBinary<OutType>(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── single playwright binary ────────────────
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new PlaywrightUnitDownloader<OutType>(
context.Client,
manipulator,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── single playwrigt HTML ────────────────
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new PlaywrightUnitPageDownloader<OutType>(
context.Web,
manipulator,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single stealth HTML ────────────────
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new StealthUnitPageDownloader<OutType>(
context.Web,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single stealth binary ────────────────
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new StealthUnitDownloader<OutType>(
context.Client,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── fragment stealth HTML ────────────────
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] downloadFailurePredicates)
=> new StealthFragmentPageDownloader<OutType>(
context.Web,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncHtmlTransformer),
// ──────────────── fragment stealth binary ────────────────
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new StealthFragmentDownloader<OutType>(
context.Client,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncBinaryTransformer),
_ => throw new Exception($"Unsupported transformer / failure-predicate combinatAsyion. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
};
}
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
var copyOfContext = context.CreateBuilder().Build();
return _useFragments switch {
true => new SequentialFragmentDownloader<RawType, OutType>(
copyOfContext,
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
false => new SequentialDownloader<RawType, OutType>(
copyOfContext,
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered()
};
}
public DownloadEnumerable<OutType> Build() {
var context = _ctxBuilder.Build();
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
return enumerable;
}
}
}
}
@@ -1,43 +0,0 @@
using System.Collections.Concurrent;
using System.Text.Json;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
private sealed class DownloadStage(DownloadEnumerable<OutType> download) : IDownloadStage {
private IAsyncEnumerable<Ordered<OutType>> _download = download;
public DownloadEnumerable<OutType> AsAsyncEnumerable() {
return new DownloadEnumerable<OutType>(_download.GetAsyncEnumerator());
}
private async IAsyncEnumerable<Ordered<OutType>> _SaveToDirectory(string dir) {
Directory.CreateDirectory(dir);
await foreach(var download in _download) {
await System.IO.File.WriteAllTextAsync(Path.Combine(dir, $"{Path.GetRandomFileName()}.{download.Order}.json"), JsonSerializer.Serialize(dir));
yield return download;
}
}
public IDownloadStage SaveToDirectory(string dir) {
_download = _SaveToDirectory(dir);
return this;
}
public IDownloadStage SaveToFiles(IEnumerable<string> files) {
throw new NotImplementedException();
}
public IDownloadStage SaveToMemory(ConcurrentBag<OutType> bag) {
throw new NotImplementedException();
}
public void WaitForDownload() {
throw new NotImplementedException();
}
public Task WaitForDownloadAsync() {
throw new NotImplementedException();
}
}
}
}
@@ -1,7 +0,0 @@
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
public interface IAlternativeLinkStage {
IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links);
}
}
}
@@ -1,10 +0,0 @@
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
public interface IAlternativeTransformStage {
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
IContextStage WithTransformer(Func<RawType, OutType> transformer) {
return WithTransformer(rt => Task.FromResult(transformer(rt)));
}
}
}
}

Some files were not shown because too many files have changed in this diff Show More