Skip to content

Commit

Permalink
fix(route): bloomberg api 404 error (#12834)
Browse files Browse the repository at this point in the history
* fix bloomberg api 404 error

* fix(route): AP News时区调整 (#12842)

* feat(route): add 爱思想专题 (#12845)

* chore: bring back lint-staged (#12844)

* chore: bring back lint-staged

* fix: gha label

* fix: yml format

* newsletter  ok

* forget to remove debug

* fix(route): qidian chapter (#12850)

* fix(route): qidian chapter

* fix: chapter and forum author

refs:
#414
#501
#732

* fix(route): agefans (#12851)

* feat(route): add V2rayShare (#12843)

* add V2rayShare

* style: auto format

* add V2rayShare

* style: auto format

* Update docs/other.md

* Update lib/v2/v2rayshare/maintainer.js

* Update index.js

* style: auto format

* Update lib/v2/v2rayshare/index.js

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* chore(deps): bump fluxninja/openai-pr-reviewer from 0 to 1 (#12852)

* chore(deps): bump fluxninja/openai-pr-reviewer from 0 to 1

Bumps [fluxninja/openai-pr-reviewer](https://github.com/fluxninja/openai-pr-reviewer) from 0 to 1.
- [Release notes](https://github.com/fluxninja/openai-pr-reviewer/releases)
- [Commits](coderabbitai/ai-pr-reviewer@v0...v1)

---
updated-dependencies:
- dependency-name: fluxninja/openai-pr-reviewer
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* fix: don't relabel for `dependabot[bot]`

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* fix: twitter keyword

* feat(route): 支持上海市文旅局审批公告 (#12862)

* feat(route): 支持上海市文旅局审批公告

* fix(route): wgj

* fix(route): currentUrl

* fix(route): search params

* feat: increase the timeout time for the fulltext_mode test

* feat: support reverse proxy

* style: auto format

* fix: typo

* test: fix empty request error

* feat: bypass reverse proxy for requests with cookies

* test: fix empty request error and remove node 16 support

* fix: reverse proxy fails for some requests

* feat: exclude some requests for reverse proxy

* feat: expand the scope of retries and set proxy strategy

* fix: logger.debug

* fix(route/twitter): mixed media incomplete (#12863)

* fix(route/twitter): media t.co links not purged (#12864)

* Add fix for bloomberg newsletter

* Fix deepscan error

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Andvari <31068367+dzx-dzx@users.noreply.github.com>
Co-authored-by: Ethan Shen <42264778+nczitzk@users.noreply.github.com>
Co-authored-by: 钛白 <86600901+77taibai@users.noreply.github.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: DIYgod <i@diygod.me>
Co-authored-by: GideonSenku <39037656+GideonSenku@users.noreply.github.com>
Co-authored-by: Rongrong <i@rong.moe>
Co-authored-by: Chenfei Xu <chenfxu@ebay.com>
  • Loading branch information
10 people authored Jul 24, 2023
1 parent a832e97 commit 1f640b0
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 4 deletions.
181 changes: 181 additions & 0 deletions lib/v2/bloomberg/react-renderer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
const art = require('art-template');
const path = require('path');
const { processVideo } = require('./utils');

const nodeRenderers = {
paragraph: (node, nextNode) => `<p>${nextNode(node.content)}</p>`,
text: (node) => {
const { attributes: attr, value: val } = node;
if (attr?.emphasis && attr?.strong) {
return `<strong><em>${val}</em></strong>`;
} else if (attr?.emphasis) {
return `<em>${val}</em>`;
} else if (attr?.strong) {
return `<strong>${val}</strong>`;
} else {
return val;
}
},
'inline-newsletter': (node, nextNode) => `<div>${nextNode(node.content)}</div>`,
heading: (node, nextNode) => {
const nodeData = node.data;
if (nodeData.level === 2 || nodeData.level === 3) {
return `<h3>${nextNode(node.content)}</h3>`;
}
},
link: (node, nextNode) => {
const dest = node.data.destination;
const web = dest.web;
const bbg = dest.bbg;
const title = node.data.title;
if (web) {
return `<a href="${web}" title="${title}" target="_blank">${nextNode(node.content)}</a>`;
}

if (bbg && bbg.startsWith('bbg://news/stories')) {
const o = bbg.split('bbg://news/stories/').pop();
const s = 'https://www.bloomberg.com/news/terminal/'.concat(o);
return `<a href="${s}" title="${title}" target="_blank">${nextNode(node.content)}</a>`;
}
return String(nextNode(node.content));
},
entity: (node, nextNode) => {
const t = node.subType;
const linkDest = node.data.link.destination;
const web = linkDest.web;
if (t === 'person') {
return String(nextNode(node.content));
}
if (t === 'story') {
if (web) {
return `<a href="${web}" target="_blank">${nextNode(node.content)}</a>`;
}
const a = node.data.story.identifiers.suid;
const o = 'https://www.bloomberg.com/news/terminal/'.concat(a);
return `<a href="${o}" target="_blank">${nextNode(node.content)}</a>`;
}
if (t === 'security') {
const s = node.data.security.identifiers.parsekey;
if (s) {
const c = s.split(' ');
const href = 'https://www.bloomberg.com/quote/'.concat(c[0], ':').concat(c[1]);
return `<a href="${href}" target="_blank">${nextNode(node.content)}</a>`;
}
}
return String(nextNode(node.content));
},
br: () => `<br/>`,
hr: () => `<br/>`,
ad: () => {},
blockquote: (node, nextNode) => `<blockquote>${nextNode(node.content)}</blockquote>`,
quote: (node, nextNode) => `<blockquote>${nextNode(node.content)}</blockquote>`,
aside: (node, nextNode) => `<aside>${nextNode(node.content)}</aside>`,
list: (node, nextNode) => {
const t = node.subType;
if (t === 'unordered') {
return `<ul>${nextNode(node.content)}</ul>`;
}
if (t === 'ordered') {
return `<ol>${nextNode(node.content)}</ol>`;
}
},
listItem: (node, nextNode) => `<li>${nextNode(node.content)}</li>`,
media: (node) => {
const t = node.subType;
if (t === 'chart' && node.data.attachment) {
if (node.data.attachment.creator === 'TOASTER') {
const c = node.data.chart;
const e = {
src: (c && c.fallback) || '',
chart: node.data.attachment,
id: (c && c.id) || '',
alt: (c && c.alt) || '',
};
const w = e.chart;

const chart = {
source: w.source,
footnote: w.footnote,
url: w.url,
title: w.title,
subtitle: w.subtitle,
chartId: 'toaster-chart-'.concat(e.id),
chartAlt: e.alt,
fallback: e.src,
};
return art(path.join(__dirname, 'templates/chart_media.art'), { chart });
}
const image = {
alt: node.data.attachment?.footnote || '',
caption: node.data.attachment?.title + node.data.attachment.subtitle || '',
credit: node.data.attachment?.source || '',
src: node.data.chart?.fallback || '',
};
return art(path.join(__dirname, 'templates/image_figure.art'), image);
}
if (t === 'photo') {
const h = node.data;
let img = '';
if (h.attachment) {
const image = { src: h.photo?.src, alt: h.photo?.alt, caption: h.photo?.caption, credit: h.photo?.credit };
img = art(path.join(__dirname, 'templates/image_figure.art'), image);
}
if (h.link && h.link.destination && h.link.destination.web) {
const href = h.link.destination.web;
return `<a href="${href}" target="_blank">${img}</a>`;
}
return img;
}
if (t === 'video') {
const h = node.data;
const id = h.attachment && h.attachment.id;
return processVideo(id);
}
if (t === 'audio' && node.data.attachment) {
const B = node.data.attachment;
const P = B.title;
const D = B.url;
const M = B.image;
if (P && D) {
const audio = {
src: D,
img: M,
caption: P,
credit: '',
};
return art(path.join(__dirname, 'templates/audio_media.art'), audio);
}
}
return '';
},
};

const nodeToHtmlString = (node, obj) => {
const nextNode = (nodes) => nodeListToHtmlString(nodes);
if (!node.type || !nodeRenderers[node.type]) {
return `<node>${node.type}</node>`;
}
return nodeRenderers[node.type](node, nextNode, obj);
};

const nodeListToHtmlString = (nodes) =>
nodes
.map((node, index) =>
nodeToHtmlString(node, {
index,
prev: nodes[index - 1]?.type,
next: nodes[index + 1]?.type,
})
)
.join('');

const documentToHtmlString = (document) => {
if (!document || !document.content) {
return '';
}
return nodeListToHtmlString(document.content);
};

module.exports = {
documentToHtmlString,
};
21 changes: 21 additions & 0 deletions lib/v2/bloomberg/templates/chart_media.art
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<figure>
{{if chart.title}}
{{chart.title}}
{{/if}}
{{if chart.subtitle}}
<p>{{chart.subtitle}}</p>
{{/if}}
<noscript>
<img src="{{ chart.fallback }}" alt="{{ chart.chartAlt }}" loading="lazy" style="display:block; margin-left:auto; margin-right:auto;" width:100%; />
</noscript>
<iframe id="{{chart.chartId}}" title="{{chart.title}}" referrerpolicy="no-referrer" width=100% height=150vh frameborder=0 marginheight=0 marginwidth=0
loading="lazy" scrolling="no" style="border:0; margin:0; padding:0; width:100%; height:150vh;" src="{{ chart.url }}" ></iframe>
{{if chart.source}}
<figcaption>
<div class="source">{{@ chart.source }}</div>
{{if chart.footnote}}
<p>{{chart.footnote}}</p>
{{/if}}
</figcaption>
{{/if}}
</figure>
41 changes: 37 additions & 4 deletions lib/v2/bloomberg/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ const { parseDate } = require('@/utils/parse-date');
const got = require('@/utils/got');
const { art } = require('@/utils/render');

const { documentToHtmlString } = require('./react-renderer');

const rootUrl = 'https://www.bloomberg.com/feeds';
const sel = 'script[data-component-props="ArticleBody"], script[data-component-props="FeatureBody"]';
const apiEndpoints = {
Expand All @@ -27,7 +29,7 @@ const apiEndpoints = {
},
newsletters: {
url: 'https://www.bloomberg.com/news/newsletters/',
sel,
sel: 'script#__NEXT_DATA__',
},
'photo-essays': {
url: 'https://www.bloomberg.com/javelin/api/photo-essay_transporter/',
Expand Down Expand Up @@ -104,9 +106,9 @@ const parseArticle = (item, ctx) =>
}
}

// Blocked by PX3, return the default
// Blocked by PX3, or 404 by api, return the default
const redirectUrls = res.redirectUrls.map(String);
if (redirectUrls.some((r) => new URL(r).pathname === '/tosv2.html')) {
if (redirectUrls.some((r) => new URL(r).pathname === '/tosv2.html') || res.statusCode === 404) {
return {
title: item.title,
link: item.link,
Expand All @@ -123,6 +125,8 @@ const parseArticle = (item, ctx) =>
return parsePhotoEssaysPage(res, api, item);
case 'features/': // single features page
return parseFeaturePage(res, api, item);
case 'newsletters':
return parseNewsletterPage(res, api, item);
default:
return parseOtherPage(res, api, item);
}
Expand All @@ -131,6 +135,27 @@ const parseArticle = (item, ctx) =>
return item;
});

const parseNewsletterPage = async (res, api, item) => {
const newsletter_json = JSON.parse(cheerio.load(res.data)(api.sel).html()).props.pageProps;
const story_json = newsletter_json.story;
const media_img = story_json.ledeImageUrl || Object.values(story_json.imageAttachments ?? {})[0]?.baseUrl;

const rss_item = {
title: story_json.headline || item.title,
link: story_json.url || item.link,
guid: `bloomberg:${story_json.id}`,
description: processHeadline(story_json) + (await processLedeMedia(story_json)) + documentToHtmlString(story_json.body || ''),
pubDate: parseDate(story_json.publishedAt) || item.pubDate,
author: story_json.authors?.map((a) => a.name).join(', ') ?? [],
category: story_json.mostRelevantTags ?? [],
media: {
content: { url: media_img },
thumbnails: { url: media_img },
},
};
return rss_item;
};

const parseAudioPage = async (res, api, item) => {
const audio_json = JSON.parse(cheerio.load(res.data)(api.sel).html()).props.pageProps;
const episode = audio_json.episode;
Expand Down Expand Up @@ -258,7 +283,7 @@ const parseOtherPage = async function (res, api, item) {
};

const processHeadline = (story_json) => {
const dek = story_json.dek || '';
const dek = story_json.dek || story_json.summary || '';
const abs = story_json.abstract?.map((a) => `<li>${a}</li>`).join('');
return abs ? dek + `<ul>${abs}</ul>` : dek;
};
Expand All @@ -276,6 +301,13 @@ const processLedeMedia = async (story_json) => {
video: kind === 'video' && (await processVideo(story_json.ledeAttachment.bmmrId)),
};
return art(path.join(__dirname, 'templates/lede_media.art'), { media });
} else if (story_json.lede) {
const lede = story_json.lede;
const image = {
src: lede.url,
alt: lede.alt || lede.title,
};
return art(path.join(__dirname, 'templates/image_figure.art'), image);
} else if (story_json.imageAttachments) {
const attachment = Object.values(story_json.imageAttachments)[0];
if (attachment) {
Expand Down Expand Up @@ -404,4 +436,5 @@ module.exports = {
asyncPoolAll,
parseNewsList,
parseArticle,
processVideo,
};

0 comments on commit 1f640b0

Please sign in to comment.