From d23edeb09e10d4d3ed800f43af24b7d4e53dd7e7 Mon Sep 17 00:00:00 2001 From: yuens Date: Tue, 16 Aug 2016 23:58:03 +0800 Subject: [PATCH] =?UTF-8?q?=E6=AD=A3=E5=9C=A8=E8=BF=9B=E8=A1=8C=E3=80=90?= =?UTF-8?q?=E4=BD=9C=E8=80=85=E8=AF=84=E8=AE=BA=E3=80=81=E4=BD=9C=E8=80=85?= =?UTF-8?q?=E8=AF=84=E8=AE=BA=E4=B8=AA=E6=95=B0=E3=80=91=E7=AD=89POST?= =?UTF-8?q?=E8=A1=A8=E7=9A=84=E6=95=B0=E6=8D=AE=E8=8E=B7=E5=8F=96=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spider/crawler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spider/crawler.py b/spider/crawler.py index c650bbb..46b7a7b 100755 --- a/spider/crawler.py +++ b/spider/crawler.py @@ -345,6 +345,7 @@ def getPostDetailInfoDict(self, postUrl): commentUserNameTagList = postComment.find_all('a', attrs={"href":postDetailInfoDict['postAuthorUrl'], 'class':''}) print len(commentUserNameTagList) commentUserNameList = commentUserNameTagList#map(lambda tag: tag.a, commentUserNameTagList) + commentContentList = re.findall('

(.*)

', str(postContent)) userNameAndCommentContentList = map(lambda name, comment: (name, comment), commentUserNameList, commentContentList)