diff --git a/spider/crawler.py b/spider/crawler.py index c650bbb..46b7a7b 100755 --- a/spider/crawler.py +++ b/spider/crawler.py @@ -345,6 +345,7 @@ def getPostDetailInfoDict(self, postUrl): commentUserNameTagList = postComment.find_all('a', attrs={"href":postDetailInfoDict['postAuthorUrl'], 'class':''}) print len(commentUserNameTagList) commentUserNameList = commentUserNameTagList#map(lambda tag: tag.a, commentUserNameTagList) + commentContentList = re.findall('

(.*)

', str(postContent)) userNameAndCommentContentList = map(lambda name, comment: (name, comment), commentUserNameList, commentContentList)