import qlib
from qlib.constant import REG_CN
导入QlibDataLoader
from qlib.data.dataset.loader import QlibDataLoader
data_uri = '/verify/crosssection'
qlib.init(provider_uri=data_uri, region=REG_CN)
import warnings
warnings.filterwarnings("ignore")
market = 'csi300' # 沪深300股票池代码,在instruments文件夹下有对应的sh000300.txt
close_ma = ["Power($close - 100, 2)", "EMA($close, 30)", "CSRank($low)"]
ma_names = ["pwclose", 'EMA30', "closerank"]
qdl_ma = QlibDataLoader(config=(close_ma, ma_names))
qdl_ma.load(instruments=market, start_time='20210101', end_time='20230110')
运行以上代码报错如下:[359024:MainThread](2024-07-10 00:49:19,496) INFO - qlib.Initialization - [config.py:456] - default_conf: client.
[359024:MainThread](2024-07-10 00:49:19,499) INFO - qlib.Initialization - [init.py:87] - qlib successfully initialized based on client settings.
[359024:MainThread](2024-07-10 00:49:19,500) INFO - qlib.Initialization - [init.py:89] - data_path={'__DEFAULT_FREQ': PosixPath('/verify/crosssection')}
[359024:MainThread](2024-07-10 00:49:19,518) INFO - qlib.data - [data.py:671] - shared memory created
[359024:MainThread](2024-07-10 00:49:19,524) INFO - qlib.data - [data.py:674] - Using shared memory for cross-section data cache
[359024:MainThread](2024-07-10 00:49:19,525) INFO - qlib.data - [data.py:675] - num cs_levels: 2
[359024:MainThread](2024-07-10 00:49:19,525) INFO - qlib.data - [data.py:680] - cs level start: 1 with {'$low': $low}
[359024:MainThread](2024-07-10 00:49:19,900) INFO - qlib.data - [data.py:715] - cs level finished: 1
[359024:MainThread](2024-07-10 00:49:19,900) INFO - qlib.data - [data.py:717] - Start to calculate the final data
[361984:MainThread](2024-07-10 00:49:20,674) ERROR - qlib.data - [base.py:170] - Loading data error: instrument=SH600015, expression=CSRank($low), start_index=3647, end_index=4137, args=('day',). error info: ('CSRank($low)', 'SH600015', 'day')
Traceback (most recent call last):
File "/home/liujia/project/qlib_crosssection/qlib/qlib/data/base.py", line 168, in _load_from_source
_series = self._load_internal(instrument, load_start_index, load_end_index, *args)
File "/home/liujia/project/qlib_crosssection/qlib/qlib/data/ops.py", line 2101, in _load_internal
return H["fs"][cache_key][0]
File "/home/liujia/project/qlib_crosssection/qlib/qlib/data/cache.py", line 166, in getitem
return self.od[key]
File "", line 2, in getitem
File "/home/liujia/anaconda3/envs/qlib_cross/lib/python3.8/multiprocessing/managers.py", line 850, in _callmethod
raise convert_to_error(kind, result)
KeyError: ('CSRank($low)', 'SH600015', 'day')
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/liujia/anaconda3/envs/qlib_cross/lib/python3.8/site-packages/joblib/_utils.py", line 72, in call
return self.func(**kwargs)
File "/home/liujia/anaconda3/envs/qlib_cross/lib/python3.8/site-packages/joblib/parallel.py", line 598, in call
return [func(*args, **kwargs)
File "/home/liujia/anaconda3/envs/qlib_cross/lib/python3.8/site-packages/joblib/parallel.py", line 598, in
return [func(*args, **kwargs)
File "/home/liujia/project/qlib_crosssection/qlib/qlib/data/data.py", line 849, in inst_calculator
obj[field] = ExpressionD.expression(
File "/home/liujia/project/qlib_crosssection/qlib/qlib/data/data.py", line 1182, in expression
series = expression.load(instrument, query_start, query_end, freq)
File "/home/liujia/project/qlib_crosssection/qlib/qlib/data/base.py", line 232, in load
series = self._load_from_source(instrument, start_index, end_index, *args)
File "/home/liujia/project/qlib_crosssection/qlib/qlib/data/base.py", line 168, in _load_from_source
_series = self._load_internal(instrument, load_start_index, load_end_index, *args)
File "/home/liujia/project/qlib_crosssection/qlib/qlib/data/ops.py", line 2101, in _load_internal
return H["fs"][cache_key][0]
File "/home/liujia/project/qlib_crosssection/qlib/qlib/data/cache.py", line 166, in getitem
return self.od[key]
File "", line 2, in getitem
File "/home/liujia/anaconda3/envs/qlib_cross/lib/python3.8/multiprocessing/managers.py", line 850, in _callmethod
raise convert_to_error(kind, result)
KeyError: ('CSRank($low)', 'SH600015', 'day')
"""
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In[3], line 16
14 ma_names = ["pwclose",'EMA30',"closerank"]
15 qdl_ma = QlibDataLoader(config=(close_ma, ma_names))
---> 16 qdl_ma.load(instruments=market, start_time='20210101', end_time='20230110')
File ~/project/qlib_crosssection/qlib/qlib/data/dataset/loader.py:143, in DLWParser.load(self, instruments, start_time, end_time)
141 else:
142 exprs, names = self.fields
--> 143 df = self.load_group_df(instruments, exprs, names, start_time, end_time)
144 return df
File ~/project/qlib_crosssection/qlib/qlib/data/dataset/loader.py:217, in QlibDataLoader.load_group_df(self, instruments, exprs, names, start_time, end_time, gp_name)
213 freq = self.freq[gp_name] if isinstance(self.freq, dict) else self.freq
214 inst_processors = (
215 self.inst_processors if isinstance(self.inst_processors, list) else self.inst_processors.get(gp_name, [])
216 )
--> 217 df = D.features(instruments, exprs, start_time, end_time, freq=freq, inst_processors=inst_processors)
218 df.columns = names
219 if self.swap_level:
File ~/project/qlib_crosssection/qlib/qlib/data/data.py:1514, in BaseProvider.features(instruments, fields, start_time, end_time, freq, disk_cache, inst_processors)
1512 fields = list(fields) # In case of tuple.
1513 try:
-> 1514 return DatasetD.dataset(
1515 instruments, fields, start_time, end_time, freq, inst_processors=inst_processors, disk_cache=disk_cache
1516 )
1517 except TypeError:
1518 return DatasetD.dataset(instruments, fields, start_time, end_time, freq, inst_processors=inst_processors)
File ~/project/qlib_crosssection/qlib/qlib/data/data.py:1238, in LocalDatasetProvider.dataset(self, instruments, fields, start_time, end_time, freq, inst_processors, **_)
1236 start_time = cal[0]
1237 end_time = cal[-1]
-> 1238 data = self.dataset_processor(
1239 instruments, column_names, start_time, end_time, freq, inst_processors=inst_processors
1240 )
1242 return data
File ~/project/qlib_crosssection/qlib/qlib/data/data.py:748, in DatasetProvider.dataset_processor(instruments, column_names, start_time, end_time, freq, inst_processors)
719 shuffler = CSShuffler(cs_level_summary[0])
720 inst_l, task_l = zip(
721 *list(
722 (
(...)
742 )
743 )
745 data = dict(
746 zip(
747 inst_l,
--> 748 ParallelExt(n_jobs=workers, backend=C.joblib_backend, maxtasksperchild=C.maxtasksperchild)(task_l),
749 )
750 )
751 get_module_logger("data").info("end to calculate the final data")
752 if len(cs_levels) > 1 and C["joblib_backend"] == "multiprocessing":
File ~/anaconda3/envs/qlib_cross/lib/python3.8/site-packages/joblib/parallel.py:2007, in Parallel.call(self, iterable)
2001 # The first item from the output is blank, but it makes the interpreter
2002 # progress until it enters the Try/Except block of the generator and
2003 # reaches the first yield
statement. This starts the asynchronous
2004 # dispatch of the tasks to the workers.
2005 next(output)
-> 2007 return output if self.return_generator else list(output)
File ~/anaconda3/envs/qlib_cross/lib/python3.8/site-packages/joblib/parallel.py:1650, in Parallel._get_outputs(self, iterator, pre_dispatch)
1647 yield
1649 with self._backend.retrieval_context():
-> 1650 yield from self._retrieve()
1652 except GeneratorExit:
1653 # The generator has been garbage collected before being fully
1654 # consumed. This aborts the remaining tasks if possible and warn
1655 # the user if necessary.
1656 self._exception = True
File ~/anaconda3/envs/qlib_cross/lib/python3.8/site-packages/joblib/parallel.py:1754, in Parallel._retrieve(self)
1747 while self._wait_retrieval():
1748
1749 # If the callback thread of a worker has signaled that its task
1750 # triggered an exception, or if the retrieval loop has raised an
1751 # exception (e.g. GeneratorExit
), exit the loop and surface the
1752 # worker traceback.
1753 if self._aborting:
-> 1754 self._raise_error_fast()
1755 break
1757 # If the next job is not ready for retrieval yet, we just wait for
1758 # async callbacks to progress.
File ~/anaconda3/envs/qlib_cross/lib/python3.8/site-packages/joblib/parallel.py:1789, in Parallel._raise_error_fast(self)
1785 # If this error job exists, immediately raise the error by
1786 # calling get_result. This job might not exists if abort has been
1787 # called directly or if the generator is gc'ed.
1788 if error_job is not None:
-> 1789 error_job.get_result(self.timeout)
File ~/anaconda3/envs/qlib_cross/lib/python3.8/site-packages/joblib/parallel.py:745, in BatchCompletionCallBack.get_result(self, timeout)
739 backend = self.parallel._backend
741 if backend.supports_retrieve_callback:
742 # We assume that the result has already been retrieved by the
743 # callback thread, and is stored internally. It's just waiting to
744 # be returned.
--> 745 return self._return_or_raise()
747 # For other backends, the main thread needs to run the retrieval step.
748 try:
File ~/anaconda3/envs/qlib_cross/lib/python3.8/site-packages/joblib/parallel.py:763, in BatchCompletionCallBack._return_or_raise(self)
761 try:
762 if self.status == TASK_ERROR:
--> 763 raise self._result
764 return self._result
765 finally:
KeyError: ('CSRank($low)', 'SH600015', 'day')
Tushare的数据,用原版可以运行,用你的代码新生产了bin数据,但是运行报错。其中pandas 为2.0 版本但是与arctic冲突。