Skip to content

Commit

Permalink
Merge pull request stereobooster#353 from ilanbm/link_alternate_scrap
Browse files Browse the repository at this point in the history
Fix stereobooster#293 Crawl `<link rel="alternate">`
  • Loading branch information
stereobooster authored Apr 1, 2019
2 parents d21328d + 0dda17e commit f1a68a6
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/puppeteer_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ const enableLogging = opt => {
const getLinks = async opt => {
const { page } = opt;
const anchors = await page.evaluate(() =>
Array.from(document.querySelectorAll("a")).map(anchor => {
Array.from(document.querySelectorAll("a,link[rel='alternate']")).map(anchor => {
if (anchor.href.baseVal) {
const a = document.createElement("a");
a.href = anchor.href.baseVal;
Expand Down
1 change: 1 addition & 0 deletions tests/examples/many-pages/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

<head>
<meta charset="utf-8">
<link rel="alternate" href='/5' />
</head>

<body>
Expand Down
5 changes: 3 additions & 2 deletions tests/run.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,14 @@ describe("many pages", () => {
} = mockFs();
beforeAll(() => snapRun(fs, { source }));
test("crawls all links and saves as index.html in separate folders", () => {
expect(filesCreated()).toEqual(6);
expect(filesCreated()).toEqual(7);
expect(names()).toEqual(
expect.arrayContaining([
`/${source}/1/index.html`, // without slash in the end
`/${source}/2/index.html`, // with slash in the end
`/${source}/3/index.html`, // ignores hash
`/${source}/4/index.html` // ignores query
`/${source}/4/index.html`, // ignores query
`/${source}/5/index.html`, // link rel="alternate"
])
);
});
Expand Down

0 comments on commit f1a68a6

Please sign in to comment.