From 02fd8b4eda45cc33c530441f8ed3dad30266c86c Mon Sep 17 00:00:00 2001 From: knarfeh Date: Tue, 21 Jun 2016 20:38:45 +0800 Subject: [PATCH] Fix bug of talkpython.fm --- README.md | 48 +++++++++--------- README_cn.md => README_en.md | 50 +++++++++---------- notes/TODOlist.md | 1 + src/container/initialbook.py | 2 +- .../content/talkpython_article.py | 22 +++++--- src/main.py | 6 +-- src/url_parser.py | 2 +- 7 files changed, 69 insertions(+), 62 deletions(-) rename README_cn.md => README_en.md (60%) diff --git a/README.md b/README.md index 948839d..8712741 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,14 @@ # EE-Book -[![Join the chat at https://gitter.im/knarfeh/EE-Book](https://badges.gitter.im/knarfeh/EE-Book.svg)](https://gitter.im/knarfeh/EE-Book?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[中文](./README.md) | [English](./README_en.md) -[中文](./README_cn.md) | [English](./README.md) +[EE-Book](https://github.com/knarfeh/EE-Book) 是一个命令行程序,它可以从网络上爬取内容制作成EPub格式电子书。 -*Note*: Due to personal reason, there will be no big changes in the next two months. --2016.04.19 +[网页版](http://ee-book.org) -[EE-Book](https://github.com/knarfeh/EE-Book) is a command-line utility to downlaod text from the Web, and make it a e-book. GUI is under developing. +## 支持的网站 (正在更新中!) -## Supported Sites (UPDATING!) - -| website | home page | supported type | +| 名称 | 主页 | 支持类型 | | :------ | ---------------------------------------- | ---------------------------------------- | | 知乎 | [www.zhihu.com](http://www.zhihu.com) | **问题:** `zhihu.com/question/{question_id}`
**答案:** `zhihu.com/question/{question_id}/answer/{answer_id}`
**话题:** `zhihu.com/topic/{topic_id}`
**用户的全部回答:** `zhihu.com/people/{people_id}` or `zhihu.com/people/{people_id}/answers`
**收藏夹:** `zhihu.com/collection/{collection_id}`
**专栏:** `zhuanlan.zhihu.com/{zhuanlan_id}` | | 简书 | [www.jianshu.com](http://www.jianshu.com) | **用户的所有文章:** `jianshu.com/users/{people_id}/latest_articles`
**专题:** `jianshu.com/collection/{collection_id}`
**文集:** `jianshu.com/notebooks/{notebooks_id}/latest` or `jianshu.com/notebooks/{notebooks_id}/top` | @@ -18,56 +16,57 @@ | 新浪博客 | [blog.sina.com.cn](http://blog.sina.com.cn/) | **用户的所有文章:** `blog.csdn.net/{people_id}` | | 博客园 | [www.cnblogs.com/](http://www.cnblogs.com/) | **用户的所有文章:** `cnblogs.com/{people_id}/` | | 易百教程 | [www.yiibai.com](http://www.yiibai.com/) | **某个教程的文章:** `yiibai.com/{tutorial_kind}`| -| Talk Python To Me | [www.talkpython.fm](https://www.talkpython.fm)| **transcripts of [「Talk Python To Me」](https://www.talkpython.fm):** `https://talkpython.fm/episodes/all`| +| Talk Python To Me | [www.talkpython.fm](https://www.talkpython.fm)| **[「Talk Python To Me」](https://www.talkpython.fm)的文稿:** `https://talkpython.fm/episodes/all/`| -## Usage +## 用法 -get help info: +获得帮助信息: ```bash $ python ee-book -h ``` -for example: +举个例子: ```bash $ python ee-book -u jianshu.com/users/b1dd2b2c87a8/latest_articles ``` -after a while, you will get the e-book: +稍等片刻, 你就可以得到电子书了: ![directory](http://7xi5vu.com1.z0.glb.clouddn.com/2016-03-09directory.png) ![scheme](http://7xi5vu.com1.z0.glb.clouddn.com/2016-03-09Scheme.png) -## Contributing -...would be awesome! +## 贡献代码 +...当然欢迎 -### requirements +### 解决依赖 * [Beautiful Soup 4](http://www.crummy.com/software/BeautifulSoup/) * [lxml](http://lxml.de/) - * [PyQt4](https://www.riverbankcomputing.com/software/pyqt/download) + * ~ [PyQt4](https://www.riverbankcomputing.com/software/pyqt/download) ~ + +### 搭建 EE-Book 的开发环境 -### Setting up a development environment for EE-Book ```bash $ pip install -r requirements.txt ``` -[install pyqt4](https://riverbankcomputing.com/software/pyqt/download/) +[安装 pyqt4](https://riverbankcomputing.com/software/pyqt/download/) -### [TODO List](./doc/TODOlist.md) +### [TODO List](./notes/TODOlist.md) -## Relevant Infomation +## 相关信息 -* previous [README](https://github.com/knarfeh/EE-Book/blob/c4d870ff8cca6bbac97f04c9da727397cee8d519/README.md) +* 之前版本的 [README](https://github.com/knarfeh/EE-Book/blob/c4d870ff8cca6bbac97f04c9da727397cee8d519/README.md) -* An [article](http://knarfeh.github.io/2016/03/17/EE-Book/) posted on [v2ex](https://v2ex.com/) +* 发在[v2ex](https://v2ex.com/)的一篇[文章](http://knarfeh.github.io/2016/03/17/EE-Book/) -## Thanks +## 感谢 * [知乎助手](https://github.com/YaoZeyuan/ZhihuHelp) * [calibre](https://github.com/kovidgoyal/calibre) @@ -75,4 +74,5 @@ $ pip install -r requirements.txt ## License -EE-Book is licensed under the terms of [MIT license](./LICENSE). \ No newline at end of file +[MIT license](./LICENSE). + diff --git a/README_cn.md b/README_en.md similarity index 60% rename from README_cn.md rename to README_en.md index 7ac31aa..307afec 100644 --- a/README_cn.md +++ b/README_en.md @@ -1,14 +1,16 @@ # EE-Book -[中文](./README_cn.md) | [English](./README.md) +[![Join the chat at https://gitter.im/knarfeh/EE-Book](https://badges.gitter.im/knarfeh/EE-Book.svg)](https://gitter.im/knarfeh/EE-Book?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -*注意*: 由于个人原因,这一两个月只做一些小的修补 --2016.04.19 +[中文](./README.md) | [English](./README_en.md) + +[EE-Book](https://github.com/knarfeh/EE-Book) is a command-line utility to downlaod text from the Web, and make it a e-book. -[EE-Book](https://github.com/knarfeh/EE-Book) 是一个命令行程序,它可以从网络上爬取内容制作成EPub格式电子书。图形界面正在开发中。 +[Web version](http://ee-book.org) -## 支持的网站 (正在更新中!) +## Supported Sites (UPDATING!) -| 名称 | 主页 | 支持类型 | +| website | home page | supported type | | :------ | ---------------------------------------- | ---------------------------------------- | | 知乎 | [www.zhihu.com](http://www.zhihu.com) | **问题:** `zhihu.com/question/{question_id}`
**答案:** `zhihu.com/question/{question_id}/answer/{answer_id}`
**话题:** `zhihu.com/topic/{topic_id}`
**用户的全部回答:** `zhihu.com/people/{people_id}` or `zhihu.com/people/{people_id}/answers`
**收藏夹:** `zhihu.com/collection/{collection_id}`
**专栏:** `zhuanlan.zhihu.com/{zhuanlan_id}` | | 简书 | [www.jianshu.com](http://www.jianshu.com) | **用户的所有文章:** `jianshu.com/users/{people_id}/latest_articles`
**专题:** `jianshu.com/collection/{collection_id}`
**文集:** `jianshu.com/notebooks/{notebooks_id}/latest` or `jianshu.com/notebooks/{notebooks_id}/top` | @@ -16,63 +18,61 @@ | 新浪博客 | [blog.sina.com.cn](http://blog.sina.com.cn/) | **用户的所有文章:** `blog.csdn.net/{people_id}` | | 博客园 | [www.cnblogs.com/](http://www.cnblogs.com/) | **用户的所有文章:** `cnblogs.com/{people_id}/` | | 易百教程 | [www.yiibai.com](http://www.yiibai.com/) | **某个教程的文章:** `yiibai.com/{tutorial_kind}`| -| Talk Python To Me | [www.talkpython.fm](https://www.talkpython.fm)| **[「Talk Python To Me」](https://www.talkpython.fm)的文稿:** `https://talkpython.fm/episodes/all`| +| Talk Python To Me | [www.talkpython.fm](https://www.talkpython.fm)| **transcripts of [「Talk Python To Me」](https://www.talkpython.fm):** `https://talkpython.fm/episodes/all/`| -## 用法 +## Usage -获得帮助信息: +get help info: ```bash $ python ee-book -h ``` -举个例子: +for example: ```bash $ python ee-book -u jianshu.com/users/b1dd2b2c87a8/latest_articles ``` -稍等片刻, 你就可以得到电子书了: +after a while, you will get the e-book: ![directory](http://7xi5vu.com1.z0.glb.clouddn.com/2016-03-09directory.png) ![scheme](http://7xi5vu.com1.z0.glb.clouddn.com/2016-03-09Scheme.png) +## Contributing -## 贡献代码 -...当然欢迎! +...will be welcomed. -### 解决依赖 +### requirements * [Beautiful Soup 4](http://www.crummy.com/software/BeautifulSoup/) * [lxml](http://lxml.de/) - * [PyQt4](https://www.riverbankcomputing.com/software/pyqt/download) - -### 建立 EE-Book 的开发环境 + * ~ [PyQt4](https://www.riverbankcomputing.com/software/pyqt/download) ~ +### Setting up a development environment for EE-Book ```bash $ pip install -r requirements.txt ``` -[安装 pyqt4](https://riverbankcomputing.com/software/pyqt/download/) +[install pyqt4](https://riverbankcomputing.com/software/pyqt/download/) -### [TODO List](./doc/TODOlist.md) +### [TODO List](./notes/TODOlist.md) -## 相关信息 +## Relevant Infomation -* 之前版本的 [README](https://github.com/knarfeh/EE-Book/blob/c4d870ff8cca6bbac97f04c9da727397cee8d519/README.md) +* previous [README](https://github.com/knarfeh/EE-Book/blob/c4d870ff8cca6bbac97f04c9da727397cee8d519/README.md) -* 发在[v2ex](https://v2ex.com/)的一篇[文章](http://knarfeh.github.io/2016/03/17/EE-Book/) +* An [article](http://knarfeh.github.io/2016/03/17/EE-Book/) posted on [v2ex](https://v2ex.com/) -## 感谢 +## Thanks * [知乎助手](https://github.com/YaoZeyuan/ZhihuHelp) * [calibre](https://github.com/kovidgoyal/calibre) * [you-get](https://github.com/soimort/you-get) -## 软件版权许可证 - -EE-Book 遵循 [MIT license](./LICENSE). +## License +[MIT license](./LICENSE). \ No newline at end of file diff --git a/notes/TODOlist.md b/notes/TODOlist.md index 3371b99..119388f 100644 --- a/notes/TODOlist.md +++ b/notes/TODOlist.md @@ -25,6 +25,7 @@ * 豆瓣日记 * medium * stackoverflow +* gank.io log: diff --git a/src/container/initialbook.py b/src/container/initialbook.py index 2da5cfa..313cc17 100644 --- a/src/container/initialbook.py +++ b/src/container/initialbook.py @@ -37,7 +37,7 @@ def __init__(self): def __init__(self): self.kind = '' - self.author_id = 0 + self.author_id = 0 self.sql = InitialBook.Sql() self.epub = InitialBook.Epub() self.info = {} diff --git a/src/lib/talkpython_parser/content/talkpython_article.py b/src/lib/talkpython_parser/content/talkpython_article.py index 9cda376..6e94991 100644 --- a/src/lib/talkpython_parser/content/talkpython_article.py +++ b/src/lib/talkpython_parser/content/talkpython_article.py @@ -34,8 +34,7 @@ def parse_author_id(self): self.info['author_id'] = 'https://talkpython.fm/episodes/all/' def parse_article_id(self): - - from src.tools.extra_tools import ExtraTools + from ....tools.extra_tools import ExtraTools article_id = ExtraTools.md5(self.info['title']) self.info['article_id'] = article_id @@ -43,14 +42,21 @@ def parse_author_name(self): self.info['author_name'] = self.info['author_id'] def parse_article_title(self): - title = self.dom.select('div.col-md-12 div h1')[0].get_text().replace(' ', '').replace('\n', '').split('#') - title = title[1] - title = title.replace(' ', '_').replace(':', '') - self.info['title'] = title + try: + title = self.dom.select('div.col-md-12 div h1')[0].get_text().replace(' ', '').replace('\n', '').split('#') + + title = title[1] + title = title.replace(' ', '_').replace(':', '') + self.info['title'] = title + except IndexError: + self.info['title'] = u"NO TITLE!!!!!!!!!" def parse_answer_content(self): - content = self.dom.select('div.transcript-main div.large-content-text')[0] - self.info['content'] = str(content) + try: + content = self.dom.select('div.transcript-main div.large-content-text')[0] + self.info['content'] = str(content) + except IndexError: + self.info['content'] = u"NO CONTENT!!!!" def parse_publish_date(self): # self.info['publish_date'] = u'TODO' diff --git a/src/main.py b/src/main.py index 21ed585..1fa054c 100644 --- a/src/main.py +++ b/src/main.py @@ -103,9 +103,9 @@ def create_book(command, counter): Debug.logger.info(u"Ready to make No.{} e-book".format(counter)) Debug.logger.info(u"Analyzes {} ".format(command)) task_package = UrlParser.get_task(command) # 分析命令 - if not task_package.is_work_list_empty(): - worker_factory(task_package.work_list) # 执行抓取程序 - Debug.logger.info(u"Complete fetching from web") + # if not task_package.is_work_list_empty(): + # worker_factory(task_package.work_list) # 执行抓取程序 + # Debug.logger.info(u"Complete fetching from web") file_name_set = None if not task_package.is_book_list_empty(): diff --git a/src/url_parser.py b/src/url_parser.py index 8389a3d..fe71814 100644 --- a/src/url_parser.py +++ b/src/url_parser.py @@ -288,7 +288,7 @@ def parse_generic(command): :param command: :return: """ - from src.tools.type import Type + from .tools.type import Type task = SingleTask() for command_type in Type.type_list: result = getattr(Match, command_type)(command)