三、代码实现
#coding=utf-8
import tushare as ts
import talib as ta
import numpy as np
import pandas as pd
import os,time,sys,re,datetime
import csv
import scipy
import re,urllib2
import xlwt
from BeautifulSoup import BeautifulSoup
#获取股票列表
#code,代码 name,名称 industry,所属行业 area,地区 pe,市盈率 outstanding,流通股本
totals,总股本(万) totalAssets,总资产(万)liquidAssets,流动资产
# fixedAssets,固定资产 reserved,公积金 reservedPerShare,每股公积金 eps,每股收益
bvps,每股净资 pb,市净率 timeToMarket,上市日期
def Get_Stock_List():
df =
ts.get_stock_basics()
return
df[0:2]
#取财务报表每个单元格的首位数字
def Get_First(inputtext):
outputtext =
0
try:
if inputtext<>'--' and len(inputtext)>0:
#print inputtext
inputtext = float(inputtext.replace(',',''))
inputtext = abs(inputtext)
if inputtext>1:
outputtext=int(str(inputtext)[0])
else:
temp=inputtext*10000*10000
outputtext=int(str(temp)[0])
except:
return
return outputtext
#计算每个首位数字出现的次数,计入统计数组
def Get_Count(numcount,inputtext):
outputtext =
Get_First(inputtext)
if
outputtext ==1:
numcount[0]+=1
elif
outputtext ==2:
numcount[1]+=1
elif outputtext ==3:
numcount[2]+=1
elif outputtext ==4:
numcount[3]+=1
elif
outputtext ==5:
numcount[4]+=1
elif outputtext ==6:
numcount[5]+=1
elif
outputtext ==7:
numcount[6]+=1
elif outputtext ==8:
numcount[7]+=1
elif outputtext ==9:
numcount[8]+=1
#elif outputtext ==0:
#print inputtext,outputtext
return
numcount
#抓取网页数据,统计每位数字频率
def Get_Num(url,code,numcount):
headers =
{"User-Agent":"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US;
rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6"}
req =
urllib2.Request(url, headers = headers)
try:
content = urllib2.urlopen(req).read()
except:
return
soup =
BeautifulSoup(content)
table = soup.find("table",{"class":"table_bg001 border_box
limit_sale scr_table"})
for row in
table.findAll("tr"):
cells = row.findAll("td")
if len(cells) > 0:#
i = 0
lencell = len(cells)#统计财务报表的年数
#print lencell
while i < len(cells):
numcount = Get_Count(numcount,cells[i].text)
#print cells[i].text
i=i+1
return
(numcount,lencell)
def Benfude(df_Code,count):
for Code in
df_Code.index:
print(u"股票代码:" + Code)
Name = df_Code.loc[Code,'name']
print Name
ws.write(count, 0, Code)
ws.write(count, 1, Name)
#
1,2,3,4,5,6,7,8,9
NumCount = [0,0,0,0,0,0,0,0,0]
#资产负债表
Url1 =
'http://quotes.money.163.com/f10/zcfzb_'+Code+'.html?type=year'
(NumCount,LenCell) = Get_Num(Url1,Code,NumCount)
#利润表
Url2 =
'http://quotes.money.163.com/f10/lrb_'+Code+'.html?type=year'
(NumCount,LenCell) = Get_Num(Url2,Code,NumCount)
#现金流量表
Url3 =
'http://quotes.money.163.com/f10/xjllb_'+Code+'.html?type=year'
(NumCount,LenCell) = Get_Num(Url3,Code,NumCount)
print NumCount
Number =[1,2,3,4,5,6,7,8,9]
for i in Number:
ws.write(count, i+1, NumCount[i-1])
SumCount = sum(NumCount)
ws.write(count, 11, SumCount)
#统计每个数字的出现频率并写入文件
NumberCount = NumCount
for i in Number:
NumberCount[i-1]=round(NumCount[i-1]/float(SumCount),3)
ws.write(count, i+11, NumberCount[i-1])
print NumberCount
ws.write(count, 21, LenCell)
wb.save('NumCount.xls')
count = count +1
#主函数
df = Get_Stock_List()
count = 1
if __name__ == '__main__':
#定义excel表格内容
wb =
xlwt.Workbook()
ws =
wb.add_sheet(u'统计表')
ws.write(0,
0, u'股票代码')
ws.write(0,
1, u'股票名称')
ws.write(0,
2, u'1')
ws.write(0,
3, u'2')
ws.write(0,
4, u'3')
ws.write(0,
5, u'4')
ws.write(0,
6, u'5')
ws.write(0,
7, u'6')
ws.write(0,
8, u'7')
ws.write(0,
9, u'8')
ws.write(0,
10, u'9')
ws.write(0,
11, u'求和')
ws.write(0, 12, u'1')
ws.write(0,
13, u'2')
ws.write(0,
14, u'3')
ws.write(0,
15, u'4')
ws.write(0,
16, u'5')
ws.write(0,
17, u'6')
ws.write(0,
18, u'7')
ws.write(0,
19, u'8')
ws.write(0,
20, u'9')
ws.write(0,
21, u'年数')
Benfude(df,count)
加载中,请稍候......