Skip to content

Commit

Permalink
fix_macos_CI (#1081)
Browse files Browse the repository at this point in the history
Co-authored-by: Linlang Lv (iSoftStone) <v-linlanglv@microsoft.com>
  • Loading branch information
SunsetWolf and Linlang Lv (iSoftStone) committed Apr 29, 2022
1 parent 3c9c76b commit 5d579d1
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 39 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/test_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,10 @@ jobs:
# Test Qlib installed with pip

- name: Check Qlib with flake8
run: |
pip install --upgrade pip
pip install flake8
cd ..
flake8 --ignore=E501,F541,E266,E402,W503,E731,E203 qlib
run: |
pip install --upgrade pip
pip install flake8
flake8 --ignore=E501,F541,E266,E402,W503,E731,E203 qlib
- name: Install Qlib with pip
run: |
Expand Down
38 changes: 24 additions & 14 deletions scripts/data_collector/br_index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ def bench_start_date(self) -> pd.Timestamp:

def get_current_4_month_period(self, current_month: int):
"""
This function is used to calculated what is the current
four month period for the current month. For example,
This function is used to calculated what is the current
four month period for the current month. For example,
If the current month is August 8, its four month period
is 2Q.
OBS: In english Q is used to represent *quarter*
which means a three month period. However, in
which means a three month period. However, in
portuguese we use Q to represent a four month period.
In other words,
Expand Down Expand Up @@ -90,8 +90,8 @@ def get_current_4_month_period(self, current_month: int):

def get_four_month_period(self):
"""
The ibovespa index is updated every four months.
Therefore, we will represent each time period as 2003_1Q
The ibovespa index is updated every four months.
Therefore, we will represent each time period as 2003_1Q
which means 2003 first four mount period (Jan, Feb, Mar, Apr)
"""
four_months_period = ["1Q", "2Q", "3Q"]
Expand All @@ -101,14 +101,13 @@ def get_four_month_period(self):
current_month = now.month
for year in [item for item in range(init_year, current_year)]:
for el in four_months_period:
self.years_4_month_periods.append(str(year)+"_"+el)
self.years_4_month_periods.append(str(year) + "_" + el)
# For current year the logic must be a little different
current_4_month_period = self.get_current_4_month_period(current_month)
for i in range(int(current_4_month_period[0])):
self.years_4_month_periods.append(str(current_year) + "_" + str(i+1) + "Q")
self.years_4_month_periods.append(str(current_year) + "_" + str(i + 1) + "Q")
return self.years_4_month_periods


def format_datetime(self, inst_df: pd.DataFrame) -> pd.DataFrame:
"""formatting the datetime in an instrument
Expand Down Expand Up @@ -189,11 +188,19 @@ def get_changes(self):
try:
df_changes_list = []
for i in tqdm(range(len(self.years_4_month_periods) - 1)):
df = pd.read_csv(self.ibov_index_composition.format(self.years_4_month_periods[i]), on_bad_lines="skip")["symbol"]
df_ = pd.read_csv(self.ibov_index_composition.format(self.years_4_month_periods[i + 1]), on_bad_lines="skip")["symbol"]
df = pd.read_csv(
self.ibov_index_composition.format(self.years_4_month_periods[i]), on_bad_lines="skip"
)["symbol"]
df_ = pd.read_csv(
self.ibov_index_composition.format(self.years_4_month_periods[i + 1]), on_bad_lines="skip"
)["symbol"]

## Remove Dataframe
remove_date = self.years_4_month_periods[i].split("_")[0] + "-" + quarter_dict[self.years_4_month_periods[i].split("_")[1]]
remove_date = (
self.years_4_month_periods[i].split("_")[0]
+ "-"
+ quarter_dict[self.years_4_month_periods[i].split("_")[1]]
)
list_remove = list(df[~df.isin(df_)])
df_removed = pd.DataFrame(
{
Expand All @@ -204,7 +211,11 @@ def get_changes(self):
)

## Add Dataframe
add_date = self.years_4_month_periods[i + 1].split("_")[0] + "-" + quarter_dict[self.years_4_month_periods[i + 1].split("_")[1]]
add_date = (
self.years_4_month_periods[i + 1].split("_")[0]
+ "-"
+ quarter_dict[self.years_4_month_periods[i + 1].split("_")[1]]
)
list_add = list(df_[~df_.isin(df)])
df_added = pd.DataFrame(
{"date": len(list_add) * [add_date], "type": len(list_add) * ["add"], "symbol": list_add}
Expand Down Expand Up @@ -272,6 +283,5 @@ def filter_df(self, df: pd.DataFrame) -> pd.DataFrame:
return df.loc[:, ["Código"]].copy()



if __name__ == "__main__":
fire.Fire(partial(get_instruments, market_index="br_index" ))
fire.Fire(partial(get_instruments, market_index="br_index"))
1 change: 0 additions & 1 deletion scripts/data_collector/us_index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,5 @@ def parse_instruments(self):
logger.warning(f"No suitable data source has been found!")



if __name__ == "__main__":
fire.Fire(partial(get_instruments, market_index="us_index"))
7 changes: 4 additions & 3 deletions scripts/data_collector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,14 +559,15 @@ def generate_minutes_calendar_from_daily(

return pd.Index(sorted(set(np.hstack(res))))


def get_instruments(
qlib_dir: str,
index_name: str,
method: str = "parse_instruments",
freq: str = "day",
request_retry: int = 5,
retry_sleep: int = 3,
market_index: str = "cn_index"
market_index: str = "cn_index",
):
"""
Expand All @@ -585,7 +586,7 @@ def get_instruments(
retry_sleep: int
request sleep, by default 3
market_index: str
Where the files to obtain the index are located,
Where the files to obtain the index are located,
for example data_collector.cn_index.collector
Examples
Expand All @@ -605,4 +606,4 @@ def get_instruments(


if __name__ == "__main__":
assert len(get_hs_stock_symbols()) >= MINIMUM_SYMBOLS_NUM
assert len(get_hs_stock_symbols()) >= MINIMUM_SYMBOLS_NUM
32 changes: 16 additions & 16 deletions scripts/data_collector/yahoo/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,24 +317,24 @@ class YahooCollectorIN1min(YahooCollectorIN):

class YahooCollectorBR(YahooCollector, ABC):
def retry(cls):
""""
The reason to use retry=2 is due to the fact that
Yahoo Finance unfortunately does not keep track of some
Brazilian stocks.
Therefore, the decorator deco_retry with retry argument
set to 5 will keep trying to get the stock data up to 5 times,
which makes the code to download Brazilians stocks very slow.
In future, this may change, but for now
I suggest to leave retry argument to 1 or 2 in
order to improve download speed.
To achieve this goal an abstract attribute (retry)
was added into YahooCollectorBR base class
"""
The reason to use retry=2 is due to the fact that
Yahoo Finance unfortunately does not keep track of some
Brazilian stocks.
Therefore, the decorator deco_retry with retry argument
set to 5 will keep trying to get the stock data up to 5 times,
which makes the code to download Brazilians stocks very slow.
In future, this may change, but for now
I suggest to leave retry argument to 1 or 2 in
order to improve download speed.
To achieve this goal an abstract attribute (retry)
was added into YahooCollectorBR base class
"""
raise NotImplementedError

def get_instrument_list(self):
logger.info("get BR stock symbols......")
symbols = get_br_stock_symbols() + [
Expand Down

0 comments on commit 5d579d1

Please sign in to comment.