-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
20 changed files
with
23,176 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
import pymysql.cursors | ||
from tools import HttpApi | ||
import json | ||
import logging | ||
import time | ||
# Connect to the database | ||
connection = pymysql.connect(host='localhost', | ||
user='root', | ||
password='123456', | ||
db='finance', | ||
charset='utf8mb4') | ||
|
||
|
||
def insert(args): | ||
try: | ||
with connection.cursor() as cursor: | ||
# Create a new record | ||
sql = "INSERT INTO finance_rp (stock_code, date, jbmgsy, kfmgsy, xsmgsy, mgjzc,mggjj, mgwfply,mgjyxjl, yyzsr, mlr, gsjlr,kfjlr ,yyzsrtbzz, gsjlrtbzz, kfjlrtbzz, yyzsrgdhbzz, gsjlrgdhbzz, kfjlrgdhbzz, jqjzcsyl, tbjzcsyl, tbzzcsyl, mll, jll, sjsl, yskyysr, xsxjlyysr, jyxjlyysr, zzczzy, yszkzzts, chzzts, zcfzl, ldzczfz, ldbl, sdbl) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" | ||
|
||
cursor.execute(sql, args) | ||
|
||
# connection is not autocommit by default. So you must commit to save | ||
# your changes. | ||
connection.commit() | ||
except : | ||
logging.error("error insert") | ||
finally: | ||
# connection.close() | ||
pass | ||
|
||
|
||
def query(idarg): | ||
try: | ||
with connection.cursor() as cursor: | ||
# Read a single record | ||
sql = "SELECT * FROM 'finance_rp' WHERE 'id'=%s" | ||
cursor.execute(sql, (idarg,)) | ||
result = cursor.fetchone() | ||
print(result) | ||
finally: | ||
connection.close() | ||
pass | ||
pass | ||
|
||
def queryStocks(): | ||
try: | ||
with connection.cursor() as cursor: | ||
# Read a single record | ||
sql = "SELECT code,name FROM stocks" | ||
cursor.execute(sql) | ||
result = cursor.fetchall() | ||
# print(result) | ||
return result | ||
finally: | ||
# connection.close() | ||
pass | ||
|
||
|
||
def crawFinance2DbYear(code): | ||
''' | ||
按年维度 | ||
''' | ||
market = "SH" | ||
if int(code[0:1])<=3: | ||
market ="SZ" | ||
content = HttpApi.httpGet2(u'http://emweb.securities.eastmoney.com/NewFinanceAnalysis/MainTargetAjax?ctype=4&type=1&code='+market+code) | ||
for data in content: | ||
line = [str(data[key]) for key in ['date', 'jbmgsy', 'kfmgsy', 'xsmgsy', 'mgjzc','mggjj', 'mgwfply','mgjyxjl', | ||
'yyzsr', 'mlr', 'gsjlr', 'kfjlr' , 'yyzsrtbzz', 'gsjlrtbzz', 'kfjlrtbzz', 'yyzsrgdhbzz', 'gsjlrgdhbzz', | ||
'kfjlrgdhbzz', 'jqjzcsyl', 'tbjzcsyl', 'tbzzcsyl', 'mll', 'jll', 'sjsl', 'yskyysr', 'xsxjlyysr', 'jyxjlyysr', | ||
'zzczzy', 'yszkzzts', 'chzzts', 'zcfzl', 'ldzczfz', 'ldbl', 'sdbl']] | ||
line.insert(0, str(code)) | ||
insert(tuple(line)) | ||
|
||
def crawFinance2Db(code): | ||
''' | ||
按年维度 | ||
''' | ||
market = "SH" | ||
if int(code[0:1])<=3: | ||
market ="SZ" | ||
content = HttpApi.httpGet2(u'http://emweb.securities.eastmoney.com/NewFinanceAnalysis/MainTargetAjax?ctype=4&type=0&code='+market+code) | ||
for data in content: | ||
line = [str(data[key]) for key in ['date', 'jbmgsy', 'kfmgsy', 'xsmgsy', 'mgjzc','mggjj', 'mgwfply','mgjyxjl', | ||
'yyzsr', 'mlr', 'gsjlr', 'kfjlr' , 'yyzsrtbzz', 'gsjlrtbzz', 'kfjlrtbzz', 'yyzsrgdhbzz', 'gsjlrgdhbzz', | ||
'kfjlrgdhbzz', 'jqjzcsyl', 'tbjzcsyl', 'tbzzcsyl', 'mll', 'jll', 'sjsl', 'yskyysr', 'xsxjlyysr', 'jyxjlyysr', | ||
'zzczzy', 'yszkzzts', 'chzzts', 'zcfzl', 'ldzczfz', 'ldbl', 'sdbl']] | ||
line.insert(0, str(code)) | ||
insert(tuple(line)) | ||
|
||
if __name__ == '__main__': | ||
# data =[str(i) for i in range(1,36)] | ||
# t =tuple(data) | ||
# import pdb; pdb.set_trace() | ||
# insert(t) | ||
# 'stock_code', | ||
# query(str(1)) | ||
|
||
stocks = queryStocks() | ||
# ('000001', '平安银行'), ('000002', '万 科A')) | ||
# print(stocks[0][0]) | ||
|
||
codes = [] | ||
for stockname in stocks: | ||
codes.append(stockname[0]) | ||
|
||
|
||
|
||
count =0 | ||
for stockname in stocks: | ||
time.sleep(0.1) | ||
crawFinance2Db(stockname[0]) | ||
count+=1 | ||
logging.debug("-------------crawFinance2Db-end------------"+stockname[0]+" count:"+str(count)) | ||
|
||
# code = "000651" | ||
# crawFinance2Db(code) | ||
|
||
# code = "600339" | ||
# crawFinance2Db(code) | ||
# connection.close() | ||
print(codes) | ||
logging.debug("--------------end------------") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# 导入必要模块 | ||
import pandas as pd | ||
from sqlalchemy import create_engine | ||
from sqlalchemy import VARCHAR | ||
import json as json | ||
from pandas.core.frame import DataFrame | ||
from tools import HttpApi | ||
import json | ||
import logging | ||
import time | ||
import pymysql.cursors | ||
|
||
# 初始化数据库连接,使用pymysql模块 | ||
# MySQL的用户:root, 密码:147369, 端口:3306,数据库:mydb | ||
engine = create_engine('mysql+pymysql://root:123456@localhost:3306/finance') | ||
|
||
# Connect to the database | ||
connection = pymysql.connect(host='localhost', | ||
user='root', | ||
password='dai693122', | ||
db='finance', | ||
charset='utf8mb4') | ||
|
||
def makeTopNode(jnode,allNodes,industryName): | ||
curLevel=0 | ||
for childNode in jnode: | ||
if(len(childNode)<=3): | ||
curCode ,curName = childNode[2],childNode[0] | ||
allNodes.append([curCode,curName,curLevel,"-1","-1", industryName]) | ||
elif (len(childNode)==4): | ||
curCode ,curName = childNode[3],childNode[0] | ||
allNodes.append([curCode,curName,curLevel,"-1","-1", industryName]) | ||
for childNode2 in childNode[1]: | ||
if(isinstance(childNode2,list) and len(childNode2)<=3): | ||
allNodes.append([childNode2[2],childNode2[0],curLevel+1,curCode ,curName, industryName]) | ||
# makeNode(curCode,curName,childNode,curLevel+1,allNodes) | ||
|
||
def save2DB2(): | ||
with open("/Users/admin/Desktop/doc/finance/multifactor/data/industry/sina_config_data.txt",'r') as f: | ||
configstr = f.read().replace("\\'", "'") | ||
ldict = json.loads(configstr) | ||
#申万二级 | ||
ind =ldict[1][0][1][2][1] | ||
allNodes=[] | ||
# for ind2 in ind: | ||
# ind3 = ind2[1] | ||
makeTopNode(ind,allNodes,"申万二级") | ||
data ={ | ||
'indcode' | ||
} | ||
# print(ldict) | ||
pdind = DataFrame(allNodes) | ||
pdind.columns=['indcode','indname','level','par_indcode','par_indname','classname'] | ||
# pdmean.to_sql('statistic2', engine) | ||
pdind.to_sql('industry',engine,if_exists='append') | ||
|
||
def save2DB(): | ||
with open("/Users/admin/Desktop/doc/finance/multifactor/data/industry/sina_config_data.txt",'r') as f: | ||
configstr = f.read().replace("\\'", "'") | ||
ldict = json.loads(configstr) | ||
#申万二级 | ||
ind =ldict[1][0][1][3][1] | ||
allNodes=[] | ||
# for ind2 in ind: | ||
# ind3 = ind2[1] | ||
makeTopNode(ind,allNodes,"热门概念") | ||
data ={ | ||
'indcode' | ||
} | ||
# print(ldict) | ||
pdind = DataFrame(allNodes) | ||
pdind.columns=['indcode','indname','level','par_indcode','par_indname','classname'] | ||
# pdmean.to_sql('statistic2', engine) | ||
pdind.to_sql('industry',engine,if_exists='append') | ||
|
||
|
||
import traceback | ||
def insert(args): | ||
try: | ||
with connection.cursor() as cursor: | ||
# Create a new record | ||
sql = "INSERT INTO stocks (code, name, industry, industry_code, pe) VALUES (%s, %s, %s, %s, %s)" | ||
cursor.execute(sql, args) | ||
|
||
# connection is not autocommit by default. So you must commit to save | ||
# your changes. | ||
connection.commit() | ||
#except : | ||
except Exception as e: | ||
# ogging.error("error insert") | ||
print(traceback.print_exc()) | ||
finally: | ||
# connection.close() | ||
pass | ||
|
||
|
||
|
||
def queryIndustrys(): | ||
try: | ||
with connection.cursor() as cursor: | ||
# Read a single record | ||
sql = "select indcode,indname from industry ORDER BY indcode asc " | ||
cursor.execute(sql) | ||
result = cursor.fetchall() | ||
# print(result) | ||
return result | ||
finally: | ||
# connection.close() | ||
pass | ||
|
||
|
||
|
||
|
||
def crawIndustry2DBForStock(indcode,indname): | ||
content = HttpApi.httpGet2(u'http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=1&num=1000&sort=symbol&asc=1&node='+indcode+'&symbol=&_s_r_a=page') | ||
if(content is None): | ||
logging.error("null content:"+indcode) | ||
return | ||
for data in content: | ||
line = [str(data[key]) for key in [ 'code', 'name']] | ||
line.append(indname) | ||
line.append(indcode) | ||
line.append("") | ||
insert(tuple(line)) | ||
|
||
|
||
def crawIndustryStocks2DB(): | ||
stocks = queryIndustrys() | ||
# ('000001', '平安银行'), ('000002', '万 科A')) | ||
# print(stocks[0][0]) | ||
|
||
begin = False | ||
count =0 | ||
for ind in stocks: | ||
if(ind[0]=="chgn_730016"): | ||
begin = True | ||
if(begin==False): | ||
continue | ||
time.sleep(2) | ||
crawIndustry2DBForStock(ind[0],ind[1]) | ||
count+=1 | ||
logging.debug("-------------crawIndustryStocks2DB-end------------"+ind[0]+" count:"+str(count)) | ||
|
||
|
||
def main(): | ||
# save2DB2() | ||
# crawIndustry2DBForStock("sw2_270100","test") | ||
crawIndustryStocks2DB() | ||
|
||
if __name__ == '__main__': | ||
main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import tushare as ts | ||
|
||
print(ts.__version__) | ||
|
||
dir = "/data/finance/" | ||
# pro = ts.pro_api() | ||
# stocks = pro.get_stock_basics() | ||
|
||
# pro = ts | ||
stocks = ts.get_stock_basics() | ||
stocks.to_csv(dir+"stocks") | ||
|
||
|
||
industry = ts.get_industry_classified() | ||
industry.to_csv(dir+"industry") | ||
|
||
|
||
concepts = ts.get_concept_classified() | ||
concepts.to_csv(dir+"concepts") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import pymysql.cursors | ||
# from tools import HttpApi | ||
import json | ||
import logging | ||
import time | ||
# Connect to the database | ||
connection = pymysql.connect(host='localhost', | ||
user='root', | ||
password='123456', | ||
db='finance', | ||
charset='utf8mb4') | ||
|
||
|
||
import requests | ||
# import logging | ||
import json | ||
import demjson | ||
|
||
from selenium import webdriver | ||
browser = webdriver.Chrome("/Users/admin/Desktop/soft/chromedriver") | ||
|
||
def crawKForcode(code,filename): | ||
#知识图谱爬取 | ||
print("-------------begin------"+code+" ---"+filename) | ||
url =u'https://www.iwencai.com/diag/block-detail?pid=11666&codes='+code+'&codeType=stock&info={"view":{"nolazy":1,"parseArr":{"_v":"new","dateRange":[],"staying":[],"queryCompare":[],"comparesOfIndex":[]},"asyncParams":{"tid":137}}}' | ||
browser.get(url) | ||
source = browser.page_source | ||
source = source.encode('utf-8').decode('unicode_escape') | ||
print(source) | ||
with open(filename, "w") as f: | ||
f.write(str(source)) | ||
f.close() | ||
print("-------------end------") | ||
|
||
|
||
def queryStocks(): | ||
try: | ||
with connection.cursor() as cursor: | ||
# Read a single record | ||
sql = "SELECT code,name FROM stocks" | ||
cursor.execute(sql) | ||
result = cursor.fetchall() | ||
# print(result) | ||
return result | ||
finally: | ||
# connection.close() | ||
pass | ||
|
||
def queryDStocks(): | ||
try: | ||
with connection.cursor() as cursor: | ||
# Read a single record | ||
sql = "SELECT DISTINCT(code) FROM stocks ORDER BY code asc" | ||
cursor.execute(sql) | ||
result = cursor.fetchall() | ||
# print(result) | ||
return result | ||
finally: | ||
# connection.close() | ||
pass | ||
|
||
if __name__ == '__main__': | ||
# stocks = queryStocks() | ||
stocks = queryDStocks() | ||
# ('000001', '平安银行'), ('000002', '万 科A')) | ||
# print(stocks[0][0]) | ||
|
||
codes = [] | ||
for stockname in stocks: | ||
codes.append(stockname[0]) | ||
|
||
print(len(codes)) | ||
dir = "/data/knowledge/" | ||
|
||
count =0 | ||
for stockname in stocks: | ||
time.sleep(0.5) | ||
# crawFinance2Db(stockname[0]) | ||
crawKForcode(stockname[0],dir+stockname[0]+".txt") | ||
count+=1 | ||
logging.debug("-------------crawKnowledge2Db-end------------"+stockname[0]+" count:"+str(count)) | ||
# if(count>=2): | ||
# break | ||
|
||
print(codes) | ||
logging.debug("--------------end------------") |
Oops, something went wrong.