From 5d48a11021b265f5e0d6f0845cc9e2d0b706f857 Mon Sep 17 00:00:00 2001 From: Bilal Durrani Date: Mon, 12 Nov 2018 18:38:57 -0500 Subject: [PATCH] Searching example (#751) --- .../Page.WaitForSelectorAsync.Searching.md | 28 ++++++++ docfx_project/examples/toc.yml | 2 + samples/Samples.sln | 68 ++++++++++--------- samples/searching/Program.cs | 50 ++++++++++++++ samples/searching/README.md | 4 ++ samples/searching/searching.csproj | 13 ++++ 6 files changed, 134 insertions(+), 31 deletions(-) create mode 100644 docfx_project/examples/Page.WaitForSelectorAsync.Searching.md create mode 100644 samples/searching/Program.cs create mode 100644 samples/searching/README.md create mode 100644 samples/searching/searching.csproj diff --git a/docfx_project/examples/Page.WaitForSelectorAsync.Searching.md b/docfx_project/examples/Page.WaitForSelectorAsync.Searching.md new file mode 100644 index 000000000..20e5dc979 --- /dev/null +++ b/docfx_project/examples/Page.WaitForSelectorAsync.Searching.md @@ -0,0 +1,28 @@ +# How to wait for a selector + +_Contributors: [Bilal Durrani](https://bilaldurrani.io/)_ + +## Problem + +You need to wait for a selector to exist before operating on it. + +## Solution + +Use `Page.WaitForSelectorAsync()` to delay execution until the selector is available. + +```cs +using (var browser = await Puppeteer.LaunchAsync(options)) +using (var page = await browser.NewPageAsync()) +{ + await page.GoToAsync("https://developers.google.com/web/"); + // Type into search box. + await page.TypeAsync("#searchbox input", "Headless Chrome"); + + // Wait for suggest overlay to appear and click "show all results". + var allResultsSelector = ".devsite-suggest-all-results"; + await page.WaitForSelectorAsync(allResultsSelector); + await page.ClickAsync(allResultsSelector); + + // continue the operation +} +``` diff --git a/docfx_project/examples/toc.yml b/docfx_project/examples/toc.yml index 3049fa2e4..57949d52d 100644 --- a/docfx_project/examples/toc.yml +++ b/docfx_project/examples/toc.yml @@ -6,4 +6,6 @@ href: Page.EvaluateExpressionAsync.GetAllLinks.md - name: How to map complex javascript objects to .NET objects href: Page.EvaluateFunctionAsync.ComplexJSObjects.md + - name: How to wait for a selector + href: Page.WaitForSelectorAsync.Searching.md diff --git a/samples/Samples.sln b/samples/Samples.sln index 3b936fbe2..d791cbe61 100644 --- a/samples/Samples.sln +++ b/samples/Samples.sln @@ -1,31 +1,37 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.28010.2046 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "get-all-links", "get-all-links\get-all-links.csproj", "{8A25F29D-EC44-4AA9-86D6-C86A476B85CC}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "complex-js-objects", "complex-js-objects\complex-js-objects.csproj", "{47A6DEE8-5581-472A-91E2-9FC8A2924D70}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Release|Any CPU = Release|Any CPU - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {8A25F29D-EC44-4AA9-86D6-C86A476B85CC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {8A25F29D-EC44-4AA9-86D6-C86A476B85CC}.Debug|Any CPU.Build.0 = Debug|Any CPU - {8A25F29D-EC44-4AA9-86D6-C86A476B85CC}.Release|Any CPU.ActiveCfg = Release|Any CPU - {8A25F29D-EC44-4AA9-86D6-C86A476B85CC}.Release|Any CPU.Build.0 = Release|Any CPU - {47A6DEE8-5581-472A-91E2-9FC8A2924D70}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {47A6DEE8-5581-472A-91E2-9FC8A2924D70}.Debug|Any CPU.Build.0 = Debug|Any CPU - {47A6DEE8-5581-472A-91E2-9FC8A2924D70}.Release|Any CPU.ActiveCfg = Release|Any CPU - {47A6DEE8-5581-472A-91E2-9FC8A2924D70}.Release|Any CPU.Build.0 = Release|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {9813FF07-4856-4880-BB3A-E34F69F783BA} - EndGlobalSection -EndGlobal + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28010.2046 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "get-all-links", "get-all-links\get-all-links.csproj", "{8A25F29D-EC44-4AA9-86D6-C86A476B85CC}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "complex-js-objects", "complex-js-objects\complex-js-objects.csproj", "{47A6DEE8-5581-472A-91E2-9FC8A2924D70}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "searching", "searching\searching.csproj", "{DEB4FB4A-3353-46AE-965D-F9D9D2092157}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {8A25F29D-EC44-4AA9-86D6-C86A476B85CC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {8A25F29D-EC44-4AA9-86D6-C86A476B85CC}.Debug|Any CPU.Build.0 = Debug|Any CPU + {8A25F29D-EC44-4AA9-86D6-C86A476B85CC}.Release|Any CPU.ActiveCfg = Release|Any CPU + {8A25F29D-EC44-4AA9-86D6-C86A476B85CC}.Release|Any CPU.Build.0 = Release|Any CPU + {47A6DEE8-5581-472A-91E2-9FC8A2924D70}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {47A6DEE8-5581-472A-91E2-9FC8A2924D70}.Debug|Any CPU.Build.0 = Debug|Any CPU + {47A6DEE8-5581-472A-91E2-9FC8A2924D70}.Release|Any CPU.ActiveCfg = Release|Any CPU + {47A6DEE8-5581-472A-91E2-9FC8A2924D70}.Release|Any CPU.Build.0 = Release|Any CPU + {DEB4FB4A-3353-46AE-965D-F9D9D2092157}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {DEB4FB4A-3353-46AE-965D-F9D9D2092157}.Debug|Any CPU.Build.0 = Debug|Any CPU + {DEB4FB4A-3353-46AE-965D-F9D9D2092157}.Release|Any CPU.ActiveCfg = Release|Any CPU + {DEB4FB4A-3353-46AE-965D-F9D9D2092157}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {9813FF07-4856-4880-BB3A-E34F69F783BA} + EndGlobalSection +EndGlobal diff --git a/samples/searching/Program.cs b/samples/searching/Program.cs new file mode 100644 index 000000000..1e252e882 --- /dev/null +++ b/samples/searching/Program.cs @@ -0,0 +1,50 @@ +using System; +using System.Threading.Tasks; +using PuppeteerSharp; + +namespace Example.Searching +{ + class Program + { + public static async Task Main(string[] args) + { + var options = new LaunchOptions { Headless = true }; + Console.WriteLine("Downloading chromium"); + + await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision); + Console.WriteLine("Navigating to developers.google.com"); + + using (var browser = await Puppeteer.LaunchAsync(options)) + using (var page = await browser.NewPageAsync()) + { + await page.GoToAsync("https://developers.google.com/web/"); + // Type into search box. + await page.TypeAsync("#searchbox input", "Headless Chrome"); + + // Wait for suggest overlay to appear and click "show all results". + var allResultsSelector = ".devsite-suggest-all-results"; + await page.WaitForSelectorAsync(allResultsSelector); + await page.ClickAsync(allResultsSelector); + + // Wait for the results page to load and display the results. + var resultsSelector = ".gsc-results .gsc-thumbnail-inside a.gs-title"; + await page.WaitForSelectorAsync(resultsSelector); + var links = await page.EvaluateFunctionAsync(@"(resultsSelector) => { + const anchors = Array.from(document.querySelectorAll(resultsSelector)); + return anchors.map(anchor => { + const title = anchor.textContent.split('|')[0].trim(); + return `${title} - ${anchor.href}`; + }); +}", resultsSelector); + + foreach (var link in links) + { + Console.WriteLine(link); + } + + Console.WriteLine("Press any key to continue..."); + Console.ReadLine(); + } + } + } +} diff --git a/samples/searching/README.md b/samples/searching/README.md new file mode 100644 index 000000000..3c9e27ed0 --- /dev/null +++ b/samples/searching/README.md @@ -0,0 +1,4 @@ +# Scraping search results from a website + +This example searches developers.google.com/web for articles tagged +"Headless Chrome" and scrape results from the results page. \ No newline at end of file diff --git a/samples/searching/searching.csproj b/samples/searching/searching.csproj new file mode 100644 index 000000000..d143d73a4 --- /dev/null +++ b/samples/searching/searching.csproj @@ -0,0 +1,13 @@ + + + + Exe + netcoreapp2.1 + 7.1 + + + + + + +