Skip to content

Commit

Permalink
refactor: facebook page (DIYgod#4293)
Browse files Browse the repository at this point in the history
  • Loading branch information
aa900031 authored Apr 2, 2020
1 parent 0984970 commit 2392452
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 38 deletions.
21 changes: 0 additions & 21 deletions lib/routes/facebook/article.js

This file was deleted.

138 changes: 121 additions & 17 deletions lib/routes/facebook/page.js
Original file line number Diff line number Diff line change
@@ -1,31 +1,135 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const getArticle = require('./article');

const fetchPageHtml = async (linkPath, cacheKey, cache) => {
const url = `https://mbasic.facebook.com${linkPath}`;

return cache.tryGet(cacheKey, async () => {
const { data: html } = await got.get(url);
return html;
});
};

const parseStoryPage = async (linkPath, cache) => {
const { searchParams: q } = new URL('https://mbasic.facebook.com' + linkPath);
const storyFbId = q.get('story_fbid');
const storyId = q.get('id');
const cacheKey = `story/${storyFbId}/${storyId}`;

const html = await fetchPageHtml(linkPath, cacheKey, cache);
const $ = cheerio.load(html);

const title = $('h3 strong a')
.first()
.text();
const url = `https://www.facebook.com/story.php?story_fbid=${storyFbId}&id=${storyId}`;
const $story = $('#m_story_permalink_view').first();
const $content = $story.find('div > div > div > div.cc');

let content = '';
if ($content.find('p').length === 0) {
$content.find('br').replaceWith('\n');
content = $content.text();
} else {
const $ps = $content.find('p');
$ps.find('br').replaceWith('\n');
content = $ps
.toArray()
.map((p) => $(p).text())
.join('\n');
}
const imageLinks = $story
.find('div > div > div > div > div > a')
.toArray()
.map((a) => $(a).attr('href'));
const images = await Promise.all(imageLinks.map((imageLink) => parsePhotoPage(imageLink, cache)));

return {
url,
title,
content,
images,
};
};

const parsePhotoPage = async (linkPath, cache) => {
const { pathname } = new URL('https://mbasic.facebook.com' + linkPath);
const cacheKey = `photos${pathname}`;

const html = await fetchPageHtml(linkPath, cacheKey, cache);
const $ = cheerio.load(html);

const title = $('#MPhotoContent div.msg > a > strong')
.first()
.text();
const url = `https://www.facebook.com${pathname}`;
const $content = $('#MPhotoContent div.msg > div');
$content.find('br').replaceWith('\n');
const content = $content.text();
const image = $('#MPhotoContent div.desc.attachment > span > div > span > a[target=_blank].sec').attr('href');

return {
title,
url,
content,
image,
};
};

module.exports = async (ctx) => {
const { id } = ctx.params;
const { data: html } = await got.get(`https://mbasic.facebook.com/${encodeURIComponent(id)}/`);
const pageId = encodeURIComponent(id);
const linkPath = `/${pageId}`;

const html = await fetchPageHtml(linkPath, pageId, ctx.cache);
const $ = cheerio.load(html);

const itemLinks = $('div[role=article] > div:nth-child(2) > div:nth-child(2)')
.toArray()
.map((div) =>
$(div)
.find('a')
.last()
.attr('href')
);
const items = await Promise.all(
itemLinks.map(async (itemLink) => {
if (new RegExp(`^/.+/photos/`).test(itemLink)) {
const data = await parsePhotoPage(itemLink, ctx.cache);
return {
title: data.title,
link: data.url,
description: `<img src="${data.image}"><br>${data.content.replace(/\n/g, '<br>')}`,
};
}
if (new RegExp(`^/story.php`).test(itemLink)) {
const data = await parseStoryPage(itemLink, ctx.cache);
const isSingleImageStory = data.images.length === 1;
const isEmptyImageList = data.images.length === 0;

let desc = '';
desc += data.images.map((image) => `<img src="${image.image}"><br>${image.content.replace(/\n/g, '<br>')}`).join('<br>');
if (!isSingleImageStory) {
!isEmptyImageList && (desc += '<br>');
desc += data.content.replace(/\n/g, '<br>');
}

return {
title: data.title,
link: data.url,
description: desc,
};
}
})
);

ctx.state.data = {
title: $('#m-timeline-cover-section h1 span').text(),
link: `https://www.facebook.com/${encodeURIComponent(id)}/`,
link: `https://www.facebook.com/${pageId}`,
description: $('#sub_profile_pic_content>div>div:nth-child(3) div>span')
.find('br')
.replaceWith('\n')
.text(),
item: await Promise.all(
$('div[role=article]>div:nth-child(2)>div:nth-child(2)>span+a')
.toArray()
.map((x) => $(x).attr('href'))
.map(async (url) => {
const d = await getArticle('https://mbasic.facebook.com' + url);
return {
title: d.title,
description: d.imgs.map((u) => `<img src="${u}">`).join('<br>') + '<br>' + d.content.replace(/\n/g, '<br>'),
link: d.url,
};
})
),
item: items.filter((item) => !!item),
};
return $;
};

0 comments on commit 2392452

Please sign in to comment.