Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 81bd2ca

Browse files
committed
Merge branch 'microsoft:main' into stale
2 parents 143c257 + aa1f9b4 commit 81bd2ca

File tree

13 files changed

+236
-28
lines changed

13 files changed

+236
-28
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ Your feedbacks about the features are very important.
4949
| Planning-based portfolio optimization | Under review: https://github.com/microsoft/qlib/pull/280 |
5050
| Fund data supporting and analysis | Under review: https://github.com/microsoft/qlib/pull/292 |
5151
| Point-in-Time database | Under review: https://github.com/microsoft/qlib/pull/343 |
52-
| High-frequency trading | Initial opensource version under development |
52+
| High-frequency trading | Under review: https://github.com/microsoft/qlib/pull/408 |
5353
| Meta-Learning-based data selection | Initial opensource version under development |
5454

5555
Recent released features

docs/component/data.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,11 @@ The `trade unit` defines the unit number of stocks can be used in a trade, and t
182182
qlib.init(provider_uri='~/.qlib/qlib_data/us_data', region=REG_US)
183183
184184
185+
.. note::
186+
187+
PRs for new data source are highly welcome! Users could commit the code to crawl data as a PR like `the examples here <https://github.com/microsoft/qlib/tree/main/scripts>`_. And then we will use the code to create data cache on our server which other users could use directly.
188+
189+
185190
Data API
186191
========================
187192

qlib/data/data.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,9 @@ def load_calendar(self, freq, future):
522522
# if future calendar not exists, return current calendar
523523
if not os.path.exists(fname):
524524
get_module_logger("data").warning(f"{freq}_future.txt not exists, return current calendar!")
525+
get_module_logger("data").warning(
526+
"You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md"
527+
)
525528
fname = self._uri_cal.format(freq)
526529
else:
527530
fname = self._uri_cal.format(freq)

qlib/log.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@
1515
class MetaLogger(type):
1616
def __new__(cls, name, bases, dict):
1717
wrapper_dict = logging.Logger.__dict__.copy()
18-
wrapper_dict.update(dict)
19-
wrapper_dict["__doc__"] = logging.Logger.__doc__
20-
return type.__new__(cls, name, bases, wrapper_dict)
18+
for key in wrapper_dict:
19+
if key not in dict and key != "__reduce__":
20+
dict[key] = wrapper_dict[key]
21+
return type.__new__(cls, name, bases, dict)
2122

2223

2324
class QlibLogger(metaclass=MetaLogger):
@@ -39,6 +40,9 @@ def setLevel(self, level):
3940
self.level = level
4041

4142
def __getattr__(self, name):
43+
# During unpickling, python will call __getattr__. Use this line to avoid maximum recursion error.
44+
if name in {"__setstate__"}:
45+
raise AttributeError
4246
return self.logger.__getattribute__(name)
4347

4448

@@ -159,3 +163,10 @@ def filter(self, record):
159163
elif isinstance(self.param, list):
160164
allow = not any([self.match_msg(p, record.msg) for p in self.param])
161165
return allow
166+
167+
168+
def set_global_logger_level(level: int):
169+
qlib_logger = logging.root.manager.loggerDict.get("qlib", None)
170+
if qlib_logger is not None:
171+
for _handler in qlib_logger.handlers:
172+
_handler.level = level

qlib/workflow/__init__.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ def __repr__(self):
2323
@contextmanager
2424
def start(
2525
self,
26+
*,
27+
experiment_id: Optional[Text] = None,
2628
experiment_name: Optional[Text] = None,
29+
recorder_id: Optional[Text] = None,
2730
recorder_name: Optional[Text] = None,
2831
uri: Optional[Text] = None,
2932
resume: bool = False,
@@ -45,8 +48,12 @@ def start(
4548
4649
Parameters
4750
----------
51+
experiment_id : str
52+
id of the experiment one wants to start.
4853
experiment_name : str
4954
name of the experiment one wants to start.
55+
recorder_id : str
56+
id of the recorder under the experiment one wants to start.
5057
recorder_name : str
5158
name of the recorder under the experiment one wants to start.
5259
uri : str
@@ -57,15 +64,24 @@ def start(
5764
resume : bool
5865
whether to resume the specific recorder with given name under the given experiment.
5966
"""
60-
run = self.start_exp(experiment_name, recorder_name, uri, resume)
67+
run = self.start_exp(
68+
experiment_id=experiment_id,
69+
experiment_name=experiment_name,
70+
recorder_id=recorder_id,
71+
recorder_name=recorder_name,
72+
uri=uri,
73+
resume=resume,
74+
)
6175
try:
6276
yield run
6377
except Exception as e:
6478
self.end_exp(Recorder.STATUS_FA) # end the experiment if something went wrong
6579
raise e
6680
self.end_exp(Recorder.STATUS_FI)
6781

68-
def start_exp(self, experiment_name=None, recorder_name=None, uri=None, resume=False):
82+
def start_exp(
83+
self, *, experiment_id=None, experiment_name=None, recorder_id=None, recorder_name=None, uri=None, resume=False
84+
):
6985
"""
7086
Lower level method for starting an experiment. When use this method, one should end the experiment manually
7187
and the status of the recorder may not be handled properly. Here is the example code:
@@ -79,8 +95,12 @@ def start_exp(self, experiment_name=None, recorder_name=None, uri=None, resume=F
7995
8096
Parameters
8197
----------
98+
experiment_id : str
99+
id of the experiment one wants to start.
82100
experiment_name : str
83101
the name of the experiment to be started
102+
recorder_id : str
103+
id of the recorder under the experiment one wants to start.
84104
recorder_name : str
85105
name of the recorder under the experiment one wants to start.
86106
uri : str
@@ -93,7 +113,14 @@ def start_exp(self, experiment_name=None, recorder_name=None, uri=None, resume=F
93113
-------
94114
An experiment instance being started.
95115
"""
96-
return self.exp_manager.start_exp(experiment_name, recorder_name, uri, resume)
116+
return self.exp_manager.start_exp(
117+
experiment_id=experiment_id,
118+
experiment_name=experiment_name,
119+
recorder_id=recorder_id,
120+
recorder_name=recorder_name,
121+
uri=uri,
122+
resume=resume,
123+
)
97124

98125
def end_exp(self, recorder_status=Recorder.STATUS_FI):
99126
"""

qlib/workflow/exp.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,14 @@ def info(self):
3939
output["recorders"] = list(recorders.keys())
4040
return output
4141

42-
def start(self, recorder_name=None, resume=False):
42+
def start(self, *, recorder_id=None, recorder_name=None, resume=False):
4343
"""
4444
Start the experiment and set it to be active. This method will also start a new recorder.
4545
4646
Parameters
4747
----------
48+
recorder_id : str
49+
the id of the recorder to be created.
4850
recorder_name : str
4951
the name of the recorder to be created.
5052
resume : bool
@@ -238,14 +240,14 @@ def __init__(self, id, name, uri):
238240
def __repr__(self):
239241
return "{name}(id={id}, info={info})".format(name=self.__class__.__name__, id=self.id, info=self.info)
240242

241-
def start(self, recorder_name=None, resume=False):
243+
def start(self, *, recorder_id=None, recorder_name=None, resume=False):
242244
logger.info(f"Experiment {self.id} starts running ...")
243245
# Get or create recorder
244246
if recorder_name is None:
245247
recorder_name = self._default_rec_name
246248
# resume the recorder
247249
if resume:
248-
recorder, _ = self._get_or_create_rec(recorder_name=recorder_name)
250+
recorder, _ = self._get_or_create_rec(recorder_id=recorder_id, recorder_name=recorder_name)
249251
# create a new recorder
250252
else:
251253
recorder = self.create_recorder(recorder_name)

qlib/workflow/expm.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@ def __repr__(self):
3333

3434
def start_exp(
3535
self,
36+
*,
37+
experiment_id: Optional[Text] = None,
3638
experiment_name: Optional[Text] = None,
39+
recorder_id: Optional[Text] = None,
3740
recorder_name: Optional[Text] = None,
3841
uri: Optional[Text] = None,
3942
resume: bool = False,
@@ -45,8 +48,12 @@ def start_exp(
4548
4649
Parameters
4750
----------
51+
experiment_id : str
52+
id of the active experiment.
4853
experiment_name : str
4954
name of the active experiment.
55+
recorder_id : str
56+
id of the recorder to be started.
5057
recorder_name : str
5158
name of the recorder to be started.
5259
uri : str
@@ -298,7 +305,10 @@ def client(self):
298305

299306
def start_exp(
300307
self,
308+
*,
309+
experiment_id: Optional[Text] = None,
301310
experiment_name: Optional[Text] = None,
311+
recorder_id: Optional[Text] = None,
302312
recorder_name: Optional[Text] = None,
303313
uri: Optional[Text] = None,
304314
resume: bool = False,
@@ -308,11 +318,11 @@ def start_exp(
308318
# Create experiment
309319
if experiment_name is None:
310320
experiment_name = self._default_exp_name
311-
experiment, _ = self._get_or_create_exp(experiment_name=experiment_name)
321+
experiment, _ = self._get_or_create_exp(experiment_id=experiment_id, experiment_name=experiment_name)
312322
# Set up active experiment
313323
self.active_experiment = experiment
314324
# Start the experiment
315-
self.active_experiment.start(recorder_name, resume)
325+
self.active_experiment.start(recorder_id=recorder_id, recorder_name=recorder_name, resume=resume)
316326

317327
return self.active_experiment
318328

scripts/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
### Download CN Data
1616

1717
```bash
18+
# daily data
1819
python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
20+
21+
# 1min data (Optional for running non-high-frequency strategies)
22+
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min
1923
```
2024

2125
### Downlaod US Data
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Get future trading days
2+
3+
> `D.calendar(future=True)` will be used
4+
5+
## Requirements
6+
7+
```bash
8+
pip install -r requirements.txt
9+
```
10+
11+
## Collector Data
12+
13+
```bash
14+
# parse instruments, using in qlib/instruments.
15+
python future_trading_date_collector.py --qlib_dir ~/.qlib/qlib_data/cn_data --freq day
16+
```
17+
18+
## Parameters
19+
20+
- qlib_dir: qlib data directory
21+
- freq: value from [`day`, `1min`], default `day`
22+
23+
24+
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
import sys
5+
from typing import List
6+
from pathlib import Path
7+
8+
import fire
9+
import numpy as np
10+
import pandas as pd
11+
from loguru import logger
12+
13+
# get data from baostock
14+
import baostock as bs
15+
16+
CUR_DIR = Path(__file__).resolve().parent
17+
sys.path.append(str(CUR_DIR.parent.parent))
18+
19+
20+
from data_collector.utils import generate_minutes_calendar_from_daily
21+
22+
23+
def read_calendar_from_qlib(qlib_dir: Path) -> pd.DataFrame:
24+
calendar_path = qlib_dir.joinpath("calendars").joinpath("day.txt")
25+
if not calendar_path.exists():
26+
return pd.DataFrame()
27+
return pd.read_csv(calendar_path, header=None)
28+
29+
30+
def write_calendar_to_qlib(qlib_dir: Path, date_list: List[str], freq: str = "day"):
31+
calendar_path = str(qlib_dir.joinpath("calendars").joinpath(f"{freq}_future.txt"))
32+
33+
np.savetxt(calendar_path, date_list, fmt="%s", encoding="utf-8")
34+
logger.info(f"write future calendars success: {calendar_path}")
35+
36+
37+
def generate_qlib_calendar(date_list: List[str], freq: str) -> List[str]:
38+
print(freq)
39+
if freq == "day":
40+
return date_list
41+
elif freq == "1min":
42+
date_list = generate_minutes_calendar_from_daily(date_list, freq=freq).tolist()
43+
return list(map(lambda x: pd.Timestamp(x).strftime("%Y-%m-%d %H:%M:%S"), date_list))
44+
else:
45+
raise ValueError(f"Unsupported freq: {freq}")
46+
47+
48+
def future_calendar_collector(qlib_dir: [str, Path], freq: str = "day"):
49+
"""get future calendar
50+
51+
Parameters
52+
----------
53+
qlib_dir: str or Path
54+
qlib data directory
55+
freq: str
56+
value from ["day", "1min"], by default day
57+
"""
58+
qlib_dir = Path(qlib_dir).expanduser().resolve()
59+
if not qlib_dir.exists():
60+
raise FileNotFoundError(str(qlib_dir))
61+
62+
lg = bs.login()
63+
if lg.error_code != "0":
64+
logger.error(f"login error: {lg.error_msg}")
65+
return
66+
# read daily calendar
67+
daily_calendar = read_calendar_from_qlib(qlib_dir)
68+
end_year = pd.Timestamp.now().year
69+
if daily_calendar.empty:
70+
start_year = pd.Timestamp.now().year
71+
else:
72+
start_year = pd.Timestamp(daily_calendar.iloc[-1, 0]).year
73+
rs = bs.query_trade_dates(start_date=pd.Timestamp(f"{start_year}-01-01"), end_date=f"{end_year}-12-31")
74+
data_list = []
75+
while (rs.error_code == "0") & rs.next():
76+
_row_data = rs.get_row_data()
77+
if int(_row_data[1]) == 1:
78+
data_list.append(_row_data[0])
79+
data_list = sorted(data_list)
80+
date_list = generate_qlib_calendar(data_list, freq=freq)
81+
write_calendar_to_qlib(qlib_dir, date_list, freq=freq)
82+
bs.logout()
83+
logger.info(f"get trading dates success: {start_year}-01-01 to {end_year}-12-31")
84+
85+
86+
if __name__ == "__main__":
87+
fire.Fire(future_calendar_collector)

0 commit comments

Comments
 (0)