a7d148a96f
Added new Beam.Fluent and Beam.Models projects with staged download builder and data context models. Refactored and moved model classes from Beam.Temporary.Cli to Beam.Models. Added new data providers and extended DataBindings in Beam.Dynamic. Renamed Beam.Puppeteer to Beam.Playwright and updated related classes. Updated project references and package versions. Removed obsolete and unused files from Beam.Temporary.Cli.
230 lines
11 KiB
C#
230 lines
11 KiB
C#
using aeqw89.PersistentData;
|
|
using aeqw89.DataKeys;
|
|
using Beam.Dynamic;
|
|
using HtmlAgilityPack;
|
|
using Microsoft.Extensions.DependencyInjection;
|
|
using Microsoft.Extensions.Logging;
|
|
using System.Text.Json;
|
|
using System.Text.Json.Serialization;
|
|
using System.Text.Json.Serialization.Metadata;
|
|
using Beam.Temporary.Cli.Templates.Classic;
|
|
using Beam.Exports;
|
|
using System.Diagnostics;
|
|
using Beam.Models;
|
|
using Beam.Stealth;
|
|
|
|
namespace Beam.Temporary.Cli {
|
|
internal class Program {
|
|
|
|
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
|
|
|
|
public static BeamDataContext BeamData { get; set; } = [];
|
|
|
|
public static IArchitecture Architecture = IArchitecture.Default;
|
|
|
|
const string BeamDataPath = "data/.dat";
|
|
|
|
static async Task Main(string[] args) {
|
|
ConversionOptions.Converters.AddPersistentDataRequiredConverters();
|
|
ConversionOptions.WriteIndented = true;
|
|
|
|
var web = new HtmlWeb();
|
|
|
|
var lf = LoggerFactory.Create((x) => x
|
|
.AddConsole()
|
|
.SetMinimumLevel(LogLevel.Trace)
|
|
);
|
|
|
|
ILogger logger = lf
|
|
.CreateLogger("Program");
|
|
|
|
await using var sharedContext = await DataDictionaryContext<BeamDataContext>.Create(
|
|
BeamDataPath,
|
|
false,
|
|
DataKind.Shared,
|
|
logger,
|
|
ConversionOptions
|
|
);
|
|
|
|
BeamData = sharedContext.Data;
|
|
|
|
BeamData.Clear();
|
|
NovelStatics.Define_YeBiQuge(BeamData);
|
|
NovelStatics.Define_YeBiQuge_LordOfMysteries(BeamData);
|
|
NovelStatics.Define_KuaiShu5(BeamData);
|
|
NovelStatics.Define_KuaiShu5_LordOfMysteries(BeamData);
|
|
ClassicTemplates.Register(BeamData);
|
|
|
|
await sharedContext.ForceSave();
|
|
BeamData = sharedContext.Data; // need to refresh instance after forced save!
|
|
|
|
CancellationTokenSource cts = new();
|
|
|
|
using var config = StealthConfig.Create(true, null, TimeSpan.FromMinutes(2), Browser.Chrome, lf.CreateLogger<StealthConfig>());
|
|
var unit = new StealthUnitPageDownloader<HtmlDocument>(new(), config, (x) => {
|
|
return Task.CompletedTask;
|
|
}, x => Task.FromResult(x));
|
|
|
|
var (success, result) = await unit.TryDownload([new("https://duckduckgo.com/?t=ffab&q=C%23+stealth+headless+browser&ia=web", 0)], default);
|
|
if (success)
|
|
logger?.LogInformation("Success! Downloaded '{}'", result?.DocumentNode.Name);
|
|
else
|
|
logger?.LogError("Failed to download!");
|
|
|
|
Console.WriteLine(result?.DocumentNode.OuterHtml);
|
|
|
|
//var novelResDict = new DataKey<ResourceDictionary>("kuaishu5:novels:lord_of_the_mysteries");
|
|
|
|
//var metadata2 = await DownloadBuilder<HtmlDocument, TableOfContentsData>.FromResource(novelResDict, nameof(TableOfContentsData), BeamData)
|
|
// .WithLink()
|
|
// .WithTransformer(CommonTransformers.TableOfContentsTransformer)
|
|
// .Configure((x) => x
|
|
// .WithDownloadLogger(logger)
|
|
// .WithRetryReporter(new Progress<RetryReport>())
|
|
// .WithTimeOut(TimeSpan.FromSeconds(15)))
|
|
// .Build()
|
|
// .FirstAsync();
|
|
|
|
//if (metadata2.Data.PagesLinks is null || metadata2.Data.PagesLinks.Length == 0)
|
|
// Debugger.Break();
|
|
|
|
//var pageLinks = DownloadBuilder<HtmlDocument, TableOfContentsData>.FromScratch()
|
|
// .WithLinks(metadata2.Data.PagesLinks)
|
|
// .WithTransformer(CommonTransformers.TableOfContentsTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(TableOfContentsData)]].Bindings]))
|
|
// .Configure(x => x
|
|
// .WithDownloadLogger(logger)
|
|
// .WithRetryReporter(new Progress<RetryReport>())
|
|
// .WithTimeOut(TimeSpan.FromSeconds(15)))
|
|
// .Build();
|
|
|
|
//var links = (await pageLinks
|
|
// .ToListAsync())
|
|
// .Where(x => x?.Data?.ContentLinks is not null)
|
|
// .SelectMany(x => x.Data.ContentLinks!)
|
|
// .DistinctBy(x => x.Link.AbsoluteUri);
|
|
|
|
//var downloader = DownloadBuilder<HtmlDocument, StringDocument>.FromScratch()
|
|
// .WithLinks(links)
|
|
// .WithTransformer(CommonTransformers.DocumentTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(IDocument)]].Bindings]))
|
|
// .Configure(x => x
|
|
// .WithDownloadLogger(logger)
|
|
// .WithRetryReporter(new Progress<RetryReport>(x => logger?.LogWarning("Retrying download {} for the {} time", x.Link, x.TryNumber)))
|
|
// .WithTimeOut(TimeSpan.FromSeconds(15)))
|
|
// .WithParallelism(4)
|
|
// .UseFragments()
|
|
// .Build();
|
|
|
|
//HashSet<Ordered<StringDocument>> downloaded = [];
|
|
//try {
|
|
// await foreach (var download in downloader) {
|
|
// logger?.LogInformation("Downloaded chapter with order={}", download.Order);
|
|
// try {
|
|
// downloaded.Add(download);
|
|
// } catch (Exception e) {
|
|
// logger?.LogError(e, "Unknown error occurred");
|
|
// }
|
|
// }
|
|
//} catch (Exception e) {
|
|
// logger?.LogError(e, "Uncaught error detected!");
|
|
//} finally {
|
|
// logger?.LogInformation("Done with loop, downloaded {}", downloaded.Count);
|
|
// try {
|
|
// string serialized = JsonSerializer.Serialize(downloaded.Select(x => new { x.Order, x.Data.Content, x.Data.MetaData }).ToArray(), ConversionOptions);
|
|
// System.IO.File.WriteAllText("lordOfTheMysteries.json", serialized);
|
|
// } catch (Exception e) {
|
|
// logger?.LogInformation(e, "Failed to serialize chapters");
|
|
// }
|
|
//}
|
|
|
|
//var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
|
|
// .WithRange(1..5)
|
|
// .WithLinkGenerator()
|
|
// .WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
|
|
// .Configure((x) => x
|
|
// .WithDownloadLogger(logger)
|
|
// .WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
|
|
// .WithTimeOut(TimeSpan.FromSeconds(15))
|
|
// )
|
|
// .Build();
|
|
|
|
|
|
|
|
//List<Task<Ordered<IDocument>>> translationTasks = [];
|
|
//List<Ordered<IDocument>> documents = [];
|
|
|
|
//await foreach (var download in downloader2.Take(10)) {
|
|
// if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
|
|
// continue;
|
|
// if (meta is not ArticleData articleMetaData)
|
|
// continue;
|
|
// if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
|
|
// continue;
|
|
// if (meta is not ArticleData bookMetaData)
|
|
// continue;
|
|
// //Console.WriteLine($"Title: {data.Name}");
|
|
// //Console.WriteLine($"Description: {data.Description}");
|
|
// //Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
|
|
// //Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
|
|
// Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
|
// Console.WriteLine($"Book title: {bookMetaData.Name}");
|
|
// //Console.WriteLine($"Content: {download}");
|
|
|
|
// //translationTasks.Add(Task.Run(async () => {
|
|
// // logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
|
|
// // var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
|
|
// // logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
|
|
// // return ret;
|
|
// //}));
|
|
//}
|
|
|
|
//documents = (await Task.WhenAll(translationTasks)).ToList();
|
|
|
|
//string testDir = Path.Combine("txt", Path.GetRandomFileName());
|
|
//Directory.CreateDirectory(testDir);
|
|
|
|
//int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
|
|
//foreach (var document in documents.OrderBy((x) => x.Order)) {
|
|
// document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
|
|
// Dictionary<string, string> linkButtons = new();
|
|
// if (document.Order != 0)
|
|
// linkButtons.Add("Previous", $"{document.Order - 1}.html");
|
|
// if (document.Order != len)
|
|
// linkButtons.Add("Next", $"{document.Order + 1}.html");
|
|
// new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
|
|
//}
|
|
|
|
Console.ReadKey();
|
|
|
|
//foreach (var download in documents.OrderBy((x) => x.Order)) {
|
|
// if (download.Data.TryGetTaggedMetaData<ArticleData>(Architecture.ChapterKey, out var meta))
|
|
// Console.WriteLine($"{download.Order}:{meta.Name}");
|
|
//}
|
|
|
|
//string[] templates = new DataKey<File>[] {
|
|
// HtmlBook.Keys.ContentPage,
|
|
// HtmlBook.Keys.NoContentPage,
|
|
// HtmlBook.Keys.TitlePage,
|
|
// HtmlBook.Keys.StylesPage,
|
|
//}.Select(
|
|
// (x) => BeamData.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
|
|
//).ToArray();
|
|
|
|
//HtmlBook book = new(
|
|
// bookname: Path.Combine(Path.GetRandomFileName(), "I Alone Level Up"),
|
|
// new CssData(),
|
|
// new ArticleData(),
|
|
// new HtmlBookTemplates() {
|
|
// ContentPageTemplate = templates[0],
|
|
// NoContentTemplate = templates[1],
|
|
// TitlePageTemplate = templates[2],
|
|
// CssTemplate = templates[3],
|
|
// },
|
|
// documents: documents.Select((x) => x.Data).ToList()
|
|
//);
|
|
|
|
//book.Update();
|
|
//Console.WriteLine("One variable!");
|
|
}
|
|
}
|
|
}
|