In [1]:
import sys
from flask import Flask, render_template, abort, url_for, session
from flask_sqlalchemy import SQLAlchemy
from flask_script import Manager, Shell
from flask import Flask, render_template, abort, url_for, session
from flask_sqlalchemy import SQLAlchemy
from flask_script import Manager, Shell
from webapp import app, db, config_app, register_blueprints, celery
from webapp.services import db_service as ds,data_service as dts,holder_service as hs
import pandas as pd
from sqlalchemy import *
from pandas.tseries.offsets import QuarterEnd,DateOffset
reload(sys)
sys.setdefaultencoding('utf8')
config_app(app, 'scriptfan.cfg')
ctx = app.app_context()
ctx.push()
manager=Manager(app)
app.log_format = '%(asctime)s %(funcName)s [%(levelname)s] %(message)s'
app.debug = True
In [ ]:
df3 = ds.get_global_data()
df = pd.merge(bdf, df3, how='left', on='code')
df['holder_amt'] = df['t_cap'] * df['rate'] / 100
In [ ]:
code = '300070'
data = hs.getStockHolderFromNet(code)
dts.updateStockHolder(data)
In [6]:
tdf = pd.read_sql_query("select code,trade_date,close,volume,t_cap,m_cap\
from stock_trade_data order by trade_date desc limit 6000", db.engine) #上市股票不足3000家,取两倍数值
tdf.groupby([tdf['code']]).first()
Out[6]:
In [4]:
code = '002235'
df1 = pd.read_sql_query("select * from stock_finance_data where code=%(name)s order by report_type",
db.engine, params={'name': code},index_col=['report_type'])
df2 = pd.read_sql_query("select report_type,jy_net,tz_in_gdtz,tz_out_gdtz,xj_net,qm_xj_ye as xjye\
from xueqiu_finance_cash where code=%(name)s order by report_type",
db.engine, params={'name': code}, index_col=['report_type'])
df3 = pd.read_sql_query("select report_type,ldzc_yszk,ldzc_yfkx as yszk,ldzc_ch as ch\
from xueqiu_finance_asset where code=%(name)s order by report_type",
db.engine, params={'name': code}, index_col=['report_type'])
pd.concat([df1,df2,df3],join='inner',axis=1)
Out[4]:
In [5]:
code = '002235'
df1 = pd.read_sql_query("select * from stock_finance_data where code=%(name)s",
db.engine, params={'name': code},index_col=['report_type'])
df2 = pd.read_sql_query("select report_type,jy_net,tz_in_gdtz,tz_out_gdtz,xj_net,qm_xj_ye as xjye\
from xueqiu_finance_cash where code=%(name)s",
db.engine, params={'name': code}, index_col=['report_type'])
df3 = pd.read_sql_query("select report_type,ldzc_yszk,ldzc_yfkx as yszk,ldzc_ch as ch\
from xueqiu_finance_asset where code=%(name)s",
db.engine, params={'name': code}, index_col=['report_type'])
#pd.concat([df1,df2,df3],join='inner',axis=1)
df1.join([df2,df3])
Out[5]:
In [ ]:
code = '603617'
import re
import urllib2
from bs4 import BeautifulSoup
url = "http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CirculateStockHolder/stockid/" + code + ".phtml"
headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
req = urllib2.Request(url=url, headers=headers)
feeddata = urllib2.urlopen(req).read()
#soup = BeautifulSoup(feeddata, "html5lib")
#paper_name = soup.html.body.find(id="CirculateShareholderTable").tbody.find_all('tr')
In [ ]:
code = '300133'
df1 = pd.read_sql_query("select * from stock_finance_data where code=%(name)s",
db.engine, params={'name': code})
In [ ]:
report_date = []
holder_name = []
holder_code = []
amount = []
rate = []
holder_type = []
holder_parent = []
rdate = ''
i = 0
for e in paper_name:
t = e.find_all('td')
s = e.find_all('strong')
if len(s) > 0:
if s[0].string == '截止日期':
rdate = t[1].string
i += 1
if i ==1:
latest_val = rdate
if t[0].div:
if t[0].div.string:
if t[0].div.string.isdigit():
hname = t[1].div.text
rateStr = t[3].div.string
if rateStr:
rateArray = re.findall("^[0-9]*\.?[0-9]{0,2}", rateStr)
rate.append(rateArray[0])
else:
rate.append('0')
In [ ]:
# 获取交易数据
global_tdf = dbs.get_global_trade_data()
# 获取财务数据
global_fdf = dbs.get_global_finance_data()
# 获取财务数据
global_bdf = dbs.get_global_basic_data()
df3 = global_bdf.join([global_fdf, global_tdf], how='left')
df3[['trade_date']] = df3[['trade_date']].apply(pd.to_datetime, errors='ignore') # 转换类型
df = df3.reset_index()
df['pe'] = df['t_cap'] / (df['jlr_ttm'] * 10000)
df['pcf'] = df['t_cap'] / (df['zyysr_ttm'] * 10000)
df['ps'] = df['t_cap'] / (df['jyjxjl_ttm'] * 10000)
df['pb'] = df['t_cap'] / (df['gdqy'] * 10000)
In [ ]:
In [ ]:
code = '002241'
report_date= '2018-06-30'
direction='next'
sql = "select max(report_date) from stock_holder where code=:code";
resultProxy = db.session.execute(text(sql), {'code': code})
_max_date = resultProxy.scalar()
if report_date == '':
if (_max_date == None):
_max_date = dbs.getStock(code).launch_date # 取上市日期
_next_date = pd.to_datetime(_max_date)
else:
_next_date = pd.to_datetime(report_date)
if direction == 'next':
if (_next_date.date()-_max_date).days <= 0:
_next_date = QuarterEnd().rollforward(_next_date + DateOffset(days=1))
elif direction == 'pre':
_next_date = QuarterEnd().rollback(_next_date - DateOffset(days=1))
submit_date = QuarterEnd().rollback(_next_date - DateOffset(days=1))
app.logger.debug('query holder data from ' + submit_date.strftime('%Y-%m-%d') +' to '+ _next_date.strftime('%Y-%m-%d'))
hdf = pd.read_sql_query("select id, code,report_date,holder_type,holder_name,holder_code,rate,amount \
from stock_holder where code=%(name)s and report_date>=%(submit_date)s and report_date<=%(report_date)s \
order by report_date desc", db.engine, \
params={'name': code, 'submit_date': submit_date.strftime('%Y-%m-%d'), 'report_date': _next_date.strftime('%Y-%m-%d')})
In [ ]:
from datetime import datetime
today = datetime.now()
dt_2 = QuarterEnd().rollback(today - DateOffset(years=3))
dt_2
In [ ]:
today = datetime.now()
dt_2 = QuarterEnd().rollback(today - DateOffset(years=3))
submit_date = dt_2.date()
hdf = pd.read_sql_query("select id, code,report_date,holder_type,holder_name,holder_code,rate,amount \
from stock_holder where code=%(name)s and report_date>=%(submit_date)s \
order by report_date asc,rate desc", db.engine, \
params={'name': code, 'submit_date': submit_date.strftime('%Y-%m-%d')})
grouped = hdf.groupby('report_date')
pre_group = pd.DataFrame()
def getValue(x, attri):
d1 = m1_df[m1_df['holder_code'] == x]
v1 = d1.get(attri + '_x')
v2 = d1.get(attri + '_y')
if v1.item() != v1.item(): # 空值判断
return v2.item()
else:
return v1.item()
def countVar(x):
d1 = m1_df[m1_df['holder_code'] == x]
v1 = d1.get('rate_x')
v2 = d1.get('rate_y')
if v1.item() != v1.item(): # 空值判断
return '-'
elif v2.item() != v2.item():
return '+'
elif v1.item() == v2.item():
return '0'
else:
return format(v1.item() - v2.item(), ',')
result=[]
for name, group in grouped:
if not pre_group.empty:
m1_df = pd.merge(group,pre_group,on='holder_code',how='outer')
m2_df = {
'name': m1_df['holder_code'].apply(getValue, args=('holder_name',)),
'code': m1_df['holder_code'],
'amount': m1_df['holder_code'].apply(getValue, args=('amount',)),
'rate': m1_df['holder_code'].apply(getValue, args=('rate',)),
'var': m1_df['holder_code'].apply(countVar)
}
result.append({'report_date': name, 'data': m2_df})
else:
pre_group = group
return result
In [ ]:
grouped.first()
In [ ]:
pre_group = pd.DataFrame()
my_group = pd.DataFrame()
for name, group in grouped:
if not pre_group.empty:
my_group = group
break
else:
pre_group = group
In [ ]:
my_group
In [ ]:
m1_df = pd.merge(my_group,pre_group,on='holder_code',how='outer')
m1_df
In [ ]:
m2_df = pd.DataFrame({
'name': m1_df['holder_code'].apply(getValue, args=('holder_name',)),
'code': m1_df['holder_code'].apply(getValue, args=('holder_code',)),
'report_date': m1_df['holder_code'].apply(getValue, args=('report_date',)),
'amount': m1_df['holder_code'].apply(getValue, args=('amount',)),
'rate': m1_df['holder_code'].apply(getValue, args=('rate',)),
'var': m1_df['holder_code'].apply(countVar)
})
In [ ]:
m2_df = pd.DataFrame({
'name': m1_df['holder_code'].apply(getValue, args=('holder_name',)),
'code': m1_df['holder_code'].apply(getValue, args=('holder_code',)),
'report_date': m1_df['holder_code'].apply(getValue, args=('report_date',)),
'amount': m1_df['holder_code'].apply(getValue, args=('amount',)),
'rate': m1_df['holder_code'].apply(getValue, args=('rate',)),
'var': m1_df['holder_code'].apply(countVar)
})
In [ ]:
def fixHolderName(x):
d1 = hdf[hdf['id'] == x]
v1 = d1.get('holder_code').item()
v2 = d1.get('holder_name').item()
if v1 == None: # 空值判断
return v2
else:
return v1
hdf['holder_name_new'] = hdf['id'].apply(fixHolderName)
t2_df = hdf[:10]
t3_df = hdf[10:20]
m1_df = pd.merge(t2_df, t3_df, how='outer', on='holder_name_new')
def getReportDate(x, attri):
d1 = m1_df[m1_df['holder_name_new'] == x]
v1 = d1.get(attri + '_x')
v2 = d1.get(attri + '_y')
if v1 == None: # 空值判断
return v2
else:
return v1
def getValue(x, attri):
d1 = m1_df[m1_df['holder_name_new'] == x]
v1 = d1.get(attri + '_x')
v2 = d1.get(attri + '_y')
if v1.item() != v1.item(): # 空值判断
return v2.item()
else:
return v1.item()
def countVar(x):
d1 = m1_df[m1_df['holder_name_new'] == x]
v1 = d1.get('rate_x')
v2 = d1.get('rate_y')
if v1.item() != v1.item(): # 空值判断
return '减持'
elif v2.item() != v2.item():
return '新进'
elif v1.item() == v2.item():
return '不变'
else:
return format(v1.item() - v2.item(), ',')
m2_df = pd.DataFrame({
'name': m1_df['holder_name_new'].apply(getValue, args=('holder_name',)),
'code': m1_df['holder_name_new'].apply(getValue, args=('holder_code',)),
'report_date': m1_df['holder_name_new'].apply(getValue, args=('report_date',)),
'amount': m1_df['holder_name_new'].apply(getValue, args=('amount',)),
'rate': m1_df['holder_name_new'].apply(getValue, args=('rate',)),
'var': m1_df['holder_name_new'].apply(countVar)
})
(_next_date.strftime('%Y-%m-%d'),m2_df)
In [ ]:
from datetime import datetime
today = datetime.now()
dt_2 = QuarterEnd().rollback(today - DateOffset(months=18))
dt_2.date()
In [ ]:
from pandas.tseries.offsets import *
today = datetime.now().date()
d = today - pd.DateOffset(months=18)
QuarterEnd().rollback(d)
In [ ]:
pd.date_range(end=datetime.now().date(), periods=5, freq='Q')
In [ ]:
import re
temp = u"想做/ 兼_职/学生_/ 的 、加,我Q: 1 5. 8 0. !!?? 8 6 。0. 2。 3 有,惊,喜,哦"
#string = re.sub(u"[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?、~@#¥%……&*()]+", "",temp)
string = re.sub(u"[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+", "",temp)
print string
In [ ]:
import re
temp = u'上海重阳战略投资有限公司–重阳战略同智基金'
#string = re.sub(u"[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?、~@#¥%……&*()]+", "",temp)
string = re.sub(u"[\–\-\-\:\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+", "",temp)
print string
In [ ]:
mkey = u'重阳'
tdf = pd.read_sql_query(
"select sh.holder_name,sh.holder_code,sh.holder_type,sh.report_date from stock_holder sh " \
"where sh.holder_code like %(mkey)s order by report_date desc", db.engine, \
params={'mkey': '%' + mkey + '%'})
gtdf = tdf.groupby(['holder_code'])
bdf = gtdf.first()
bdf['hold_size'] = gtdf.size()
bdf
In [ ]:
hs.getStockHolderFromNet('002241')
In [ ]:
In [ ]:
import re
str = "1230,23"
#re.findall("^[0-9]*\.?[0-9]{0,2}",str)[0]
#re.findall("^([1-9]*.[0-9]{0,2})",str)
re.match("^\-+",str)
In [ ]:
re.search('^([1-9][0-9]*)+(.[0-9]{1,2})*$', str).group(0)
In [ ]:
re.findall("^[0-9]*\.?[0-9]{0,2}", "")
In [ ]:
import threading,multiprocessing
g_queue = multiprocessing.Queue()
def init_queue():
print("init g_queue start")
while not g_queue.empty():
g_queue.get()
for _index in range(10):
g_queue.put(_index)
print("init g_queue end")
return
In [ ]:
g_queue
In [ ]:
import time
# 定义一个IO密集型任务:利用time.sleep()
def task_io(task_id):
print("IOTask[%s] start" % task_id)
while not g_queue.empty():
time.sleep(1)
try:
data = g_queue.get(block=True, timeout=1)
print("IOTask[%s] get data: %s" % (task_id, data))
except Exception as excep:
print("IOTask[%s] error: %s" % (task_id, str(excep)))
print("IOTask[%s] end" % task_id)
return
print("========== 直接执行IO密集型任务 ==========")
init_queue()
time_0 = time.time()
task_io(0)
print("结束", time.time() - time_0, "\n")
print("========== 多线程执行IO密集型任务 ==========")
init_queue()
time_0 = time.time()
thread_list = [threading.Thread(target=task_io, args=(i,)) for i in range(5)]
for t in thread_list:
t.start()
for t in thread_list:
if t.is_alive():
t.join()
print("结束", time.time() - time_0, "\n")
In [ ]:
import time
def simple_generator_function():
i = 0
while i < 50:
i = i +1
time.sleep(5)
yield i
i = 0
for value in simple_generator_function():
print(value)
In [ ]:
#encoding:UTF-8
def yield_test(n):
for i in range(n):
print(">>>>>>>")
yield call(i)
print("i=",i)
def call(i):
return i*2
for i in yield_test(5):
print(i,",")
In [ ]: