'''onedata.k(stockcode, period)'''
### 从AKshare取数据时:
# dfo, dff, dft,dfd,dfw,dfm=onedata.k("300027","all")   ---!!!
# dfd,dfw,dfm = onedata.k("300027","daily")             ---!!!
# dfw= onedata.k("300027","weekly")                     --- weekly / monthly
# dfo= onedata.k("300027","1")                          --- 1 / 5 / 30


'''onedata.stocklist()'''
# stocklist = onedata.stocklist()                       --- Get all stocklist

'''onedata.hq()'''

'''onedata.bsinfo()'''


import akshare as ak
import pandas as pd
import numpy as np
import requests,re,json,datetime
class Onedata:
    def __init__(self,kdatasource=0):
        self.kdatasource = kdatasource #0 AKshare #1 Tushare #2web
        self.periodlist=['1','5','30','daily','weekly','monthly']

    def status():
        # 报告[每个]数据源是否可用
        pass

    def stocklist(self):
        # 得到所有的股票清单
        # 行情数据比股票清单数据快,可以通过读行情取股票清单
        if self.kdatasource==0:    ###stocklist_source
            # 所有股票清单, 只包含 stockcode & stockname 两个字段
            df = self.hq()
            df = df[['stockcode','stockname']]

            '''
            # 第一种方法, 直接读全市场股票代码, 只包含两个字段
            ### df = ak.stock_info_a_code_name()
            ### df.columns=['stockcode','stockname']

            # 第二种方法, 分开三读三个市场的股票清单, 慢,需要各自处理格式
            sz=ak.stock_info_sz_name_code()  # 深证证券交易所股票代码和简称
            print("sz",sz.columns)
            sh=ak.stock_info_sh_name_code()  # 上海证券交易所股票代码和简称
            print("sh",sh.columns)
            bj=ak.stock_info_bj_name_code()  # 北京证券交易所股票代码和简称
            print("bj",bj.columns)
            df = pd.concat([sh, sz, bj], ignore_index=True)
            #df = pd.concat([sz,sh,bj],ignore_index=True,axis=1)
            #df.columns=['stockcode','stockname','size','Csize','IPOday','block','city','listdate']
            #df=df[['stockcode','stockname']]
            '''
        return df

    def k(self,stockcode="300027.SZ", period="daily"):
        # 拿K线数据
        ## 有的系统拿不到K线的总成交金额数据, 怎么处理
        periodDWM = ['daily','weekly','monthly']
        periodOFT = ['1','5','30']
        if self.kdatasource==0:
            stockcode=stockcode[:6]
            if period in periodDWM:
                return self.__K_DWM(stockcode,period)
            if period in periodOFT:
                return self.__K_OFT(stockcode,period)
            if period=="all":
                dfd,dfw,dfm=self.__K_DWM(stockcode,"daily")
                dfo=self.__K_OFT(stockcode,"1")
                dff=self.__K_OFT(stockcode,"5")
                dft=self.__K_OFT(stockcode,"30")
                return dfo,dff,dft,dfd,dfw,dfm

        if self.kdatasource==1:
            if period in periodDWM:
                return self.__web_K_DWM(stockcode,period)
            if period in periodOFT:
                return self.__web_K_OFT(stockcode,period)
            if period=="all":
                dfd=self.__web_K_DWM(stockcode,"daily")
                dfw=self.__web_K_DWM(stockcode,"weekly")
                dfm=self.__web_K_DWM(stockcode,"monthly")
                dfo=self.__web_K_OFT(stockcode,"1")
                dff=self.__web_K_OFT(stockcode,"5")
                dft=self.__web_K_OFT(stockcode,"30")
                return dfo,dff,dft,dfd,dfw,dfm

        if self.source==2:
            pass

    def hq(self):
        # 所有股票的实时行情 [没有卖/买 5档的数据]
        # 东财[东方财富网]数据比新浪数据快
        # return ak.stock_zh_a_spot() #very slow!!!
        df=ak.stock_zh_a_spot_em()
        df=df[['代码','名称','最新价','涨跌额','涨跌幅']]
        df.columns=['stockcode','stockname','c','val','pct']
        df=df.sort_values(axis=0,by='stockcode',ascending=True,ignore_index=True)
        df['stockcode'] = df['stockcode'].map(lambda x: self.__format_code(x))
        return df

    def bsinfo(self):
        # 单股票的卖5, 买5行情
        return ak.stock_zh_a_spot_em()

    def __K_DWM(self,stockcode,period):
        # period 'daily' / 'weekly' / 'monthly' 
        ## 是否用日线数据生成周线/月线数据, 还是重复取两次
        df =  ak.stock_zh_a_hist(symbol=stockcode, period=period, start_date='20060101', end_date='20230118', adjust='qfq')
        df=df[['日期','开盘','收盘','最高','最低','成交量']]#,'成交额']]
        df.columns=['date','open','close','high','low','vol']#,'amt']
        df['date'] = pd.to_datetime(df['date'])
        df.set_index(["date"], inplace=True)  # set date as index
        df['vol'] = df['vol'].map(lambda x: x / 1000).round(3)      # 成交量/1000
        #df['amt'] = df['amt'].map(lambda x: x / 100000).round(3)   # 成交额/1000,000

        if period=="daily":
            mvol    = df['vol'].resample('M').sum().round(2)
            #mamt    = df['amt'].resample('M').sum().round(2)
            mopen   = df['open'].resample('M').first().round(2)
            mclose  = df['close'].resample('M').last().round(2)
            mhigh   = df['high'].resample('M').max().round(2)
            mlow    = df['low'].resample('M').min().round(2)
            df_m= pd.concat([mopen, mclose, mhigh, mlow, mvol], axis=1)
            df_m= df_m.dropna(axis=0, how='any')

            wvol    = df['vol'].resample('W-FRI').sum().round(2)
            #wamt    = df['amt'].resample('W-FRI').sum().round(2)
            wopen   = df['open'].resample('W-FRI').first().round(2)
            wclose  = df['close'].resample('W-FRI').last().round(2)
            whigh   = df['high'].resample('W-FRI').max().round(2)
            wlow    = df['low'].resample('W-FRI').min().round(2)
            df_w = pd.concat([wopen, wclose, whigh, wlow, wvol], axis=1)
            df_w = df_w.dropna(axis=0, how='any')
            return df,df_w,df_m
        else:
            return df

    def __web_K_DWM(self,stockcode,period):
        my_k=0
        urlstockcode = stockcode[-2:].lower() + stockcode[:6]
        # http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?param=sz300027,month,1990-12-01,,10240,qfq
        #url = rf'http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?param={urlstockcode},day,1990-12-01,,10240,bfq'
        if period=="daily":     tmpperiod="day"
        if period=="weekly":    tmpperiod="week"
        if period=="monthly":   tmpperiod="month"
        url = rf'http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?param={urlstockcode},{tmpperiod},1990-12-01,,2000,qfq'
        originaltxt = self.__gethtml(url)
        goodtxt = re.sub(',{(.+?)}]', ']', originaltxt)

        my_json = json.loads(goodtxt)
        if not(urlstockcode=="sz399001" or urlstockcode=="sz399006" or urlstockcode=="sh000001"):
            my_k = my_json['data'][urlstockcode]['qfq'+tmpperiod]
        else:
            my_k = my_json['data'][urlstockcode][tmpperiod]
        
        arrayk = np.array(my_k)
        df = pd.DataFrame(arrayk, columns=['date', 'open', 'close', 'high', 'low', 'vol'])  # ???ul1
        df[['open', 'close', 'high', 'low']] = df[['open', 'close', 'high', 'low']].apply(pd.to_numeric).round(2)
        df['vol'] = df['vol'].apply(pd.to_numeric).astype(int)  # vol 转换为 整型
        df['date'] = pd.to_datetime(df['date'])
        df.set_index(["date"], inplace=True)  # set date as index
        df['vol'] = df['vol'].map(lambda x: x / 1000).round(3)  # 成交量/1000,避免溢出？？？有没有问题
        #df['amt'] = df['vol']*df['close']
        #df['amt'] = df['amt'].round(3)  # 成交量/1000,避免溢出？？？有没有问题
        return df

    def __web_K_OFT(self,stockcode,period):
        urlstockcode = stockcode[-2:].lower() + stockcode[:6]
        if period=="1":     tmpperiod="m1"
        if period=="5":     tmpperiod="m5"
        if period=="30":    tmpperiod="m30"
        
        url = rf'http://ifzq.gtimg.cn/appstock/app/kline/mkline?param={urlstockcode},{tmpperiod},,320&_var=m1_today&r=0.260880015116'
            # vol后面多了两列 {} 和 类似成交量的一个数字, 用 un1 和 un2替代后，再删除
        originaltxt = self.__gethtml(url)
        goodtxt = re.sub(',{}(.+?)]', ']', originaltxt)
        goodtxt = goodtxt[9:]

        my_json = json.loads(goodtxt)
        my_k = my_json['data'][urlstockcode][tmpperiod]
        arrayk = np.array(my_k)
        df = pd.DataFrame(arrayk, columns=['date', 'open', 'close', 'high', 'low', 'vol'])  # ???ul1
        df[['open', 'close', 'high', 'low']] = df[['open', 'close', 'high', 'low']].apply(
            pd.to_numeric).round(2)
        df['vol'] = df['vol'].apply(pd.to_numeric).astype(int)  # vol 转换为 整型
        df['date'] = pd.to_datetime(df['date'])
        df.set_index(["date"], inplace=True)  # set date as index
        df['vol'] = df['vol'].map(lambda x: x / 1000).round(3)  # 成交量/1000,避免溢出？？？有没有问题
        #df['amt'] = df['vol']*df['close']
        #df['amt'] = df['amt'].round(3)  # 成交量/1000,避免溢出？？？有没有问题
        self.__wash_K_div(df,stockcode)

        return df

    def __wash_K_div(self,kdata,stockcode):
        #使用除权数据清洗K线
        #divpd['date'] = pd.to_datetime(divpd['date'])
        #divpd = divpd.set_index('date')
        #divpd = divpd.sort_index(ascending=False)
        #divpd中有没有数据
        divpd= self.__get_div(stockcode)
        for key, val in divpd.iterrows():
            date = key - datetime.timedelta(minutes=1)
            if date < kdata.index[0]: break
            for field in kdata.columns.values:
                if field != 'volume' and field != 'amount':  # open	close	high	low	    vol
                    kdata.loc[:date, field] -= val.bonus / 10
                    kdata.loc[:date, field] += val.price * (val.rationed / 10)
                    kdata.loc[:date:, field] /= 1 + val.present / 10 + val.rationed / 10
        return kdata

    def __get_div(self,stockcode):
        #从csv/divdata.csv读取当前分析股票的除权数据
        divpd = pd.read_csv(r"./csv/divdata.csv")
        divpd = divpd[divpd['code'] == stockcode]
        divpd['date'] = pd.to_datetime(divpd['date'])
        divpd = divpd.set_index('date')
        divpd = divpd.sort_index(ascending=False)
        return divpd

    def __gethtml(self,url):
        ''' 
        最多读取10次，每次等待3秒钟
        如果超过3次没有抓到K线数据，就把 i+url 输出到htmlerror.txt文件中
        每次程序开始运行的时候，清空 htmlerror.txt 文件
        '''
        i = 0
        while i < 10:
            try:
                html = requests.get(url, timeout=3).text
                return html
            except requests.exceptions.RequestException:
                i += 1
        if i==10:   #10次都没有取得数据
            print(f"尝试过{i}次后，依然没有通过{url}得到数据")
        if i > 1 and i!=10:
            #self.string2txt(1, str(i) + url)
            print(f"{i} 次通过{url} 没有得到数据")

    def __K_OFT(self,stockcode,period):
        # period '1' / '5' / '30'
        ## 最近的320个K线数据
        start_time=""
        end_time=""
        df = ak.stock_zh_a_hist_min_em(symbol=stockcode, start_date="2023-01-01 09:30:00", end_date="2024-01-16 15:00:00", period=period, adjust='')
        df=df[-320:]
        df=df[['时间','开盘','收盘','最高','最低','成交量']]#,'成交额']]
        df.columns=['date','open','close','high','low','vol']#,'amt']
        df['vol'] = df['vol'].map(lambda x: x / 1000).round(3)  # 成交量/1000,避免溢出？？？有没有问题
        #df['amt'] = df['amt'].map(lambda x: x / 100000).round(3)  # 成交量/1000,避免溢出？？？有没有问题
        df['date'] = pd.to_datetime(df['date'])
        df.set_index(["date"], inplace=True)  # set date as index
        # !!! 修改1分钟数据里,开盘价为零的问题
        # !!! df[df['open'] == 0] = df['close'].shift(1)
        return df

    def __format_code(self,scode):
        # 将6位股票代码格式为9位
        if len(scode)==9: 
            return scode
        if scode[0:1]=="6":
            return scode+".SH"
        elif scode[0:1]=="8" or scode[0:1]=="4": 
            return scode+".BJ"
        elif scode[0:1]=="0" or scode[0:1]=="3": 
            return scode+".SZ"

'''
    作者: LH
    日期: 2023-01-18
    需求分析: 
        1,得到所有量化交易的基础数据
        2,多个数据源并存,保证数据基础的可靠性
        3,手动或自动切换数据源
        4,如果一个源失效,就要寻找替补
        5,接口的一致性
            -> onedata.status()
                报告[每个]数据源是否可用
            -> onedata.stocklist
                得到所有股票清单
            -> onedata.hq (stockcode)
                得到当前行情
            -> onedata.finance(stockcode)
                得到所有股票的财务数据 [pe]
            -> onedata.k (stockcode, period)
                得到K线数据
'''