hahunavth commited on
Commit
c3ece9d
β€’
1 Parent(s): 2b0538d

add cron server

Browse files
Files changed (18) hide show
  1. .gitignore +1 -1
  2. .idea/.gitignore +8 -8
  3. .idea/kaggle-utils.iml +7 -7
  4. .idea/misc.xml +6 -6
  5. .idea/modules.xml +7 -7
  6. .idea/vcs.xml +5 -5
  7. Dockerfile +20 -0
  8. cli.py +50 -50
  9. config/aasist-train.json +0 -8
  10. google_sheet.py +125 -62
  11. kaggle_service.py +582 -582
  12. logger.py +58 -52
  13. main.py +109 -99
  14. requirements.txt +8 -3
  15. run.py +130 -0
  16. run_stt.py +68 -0
  17. test.ipynb +160 -160
  18. version.py +1 -1
.gitignore CHANGED
@@ -1,2 +1,2 @@
1
- tmp
2
  __pycache__
 
1
+ tmp
2
  __pycache__
.idea/.gitignore CHANGED
@@ -1,8 +1,8 @@
1
- # Default ignored files
2
- /shelf/
3
- /workspace.xml
4
- # Editor-based HTTP Client requests
5
- /httpRequests/
6
- # Datasource local storage ignored files
7
- /dataSources/
8
- /dataSources.local.xml
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/kaggle-utils.iml CHANGED
@@ -1,8 +1,8 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <module type="PYTHON_MODULE" version="4">
3
- <component name="NewModuleRootManager">
4
- <content url="file://$MODULE_DIR$" />
5
- <orderEntry type="inheritedJdk" />
6
- <orderEntry type="sourceFolder" forTests="false" />
7
- </component>
8
  </module>
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
  </module>
.idea/misc.xml CHANGED
@@ -1,7 +1,7 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="Black">
4
- <option name="sdkName" value="vlsp2023-tts-api" />
5
- </component>
6
- <component name="ProjectRootManager" version="2" project-jdk-name="vlsp2023-tts-api" project-jdk-type="Python SDK" />
7
  </project>
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="vlsp2023-tts-api" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="vlsp2023-tts-api" project-jdk-type="Python SDK" />
7
  </project>
.idea/modules.xml CHANGED
@@ -1,8 +1,8 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ProjectModuleManager">
4
- <modules>
5
- <module fileurl="file://$PROJECT_DIR$/.idea/kaggle-utils.iml" filepath="$PROJECT_DIR$/.idea/kaggle-utils.iml" />
6
- </modules>
7
- </component>
8
  </project>
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/kaggle-utils.iml" filepath="$PROJECT_DIR$/.idea/kaggle-utils.iml" />
6
+ </modules>
7
+ </component>
8
  </project>
.idea/vcs.xml CHANGED
@@ -1,6 +1,6 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="VcsDirectoryMappings">
4
- <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
- </component>
6
  </project>
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
  </project>
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ RUN useradd -m -u 1000 user
10
+
11
+ USER user
12
+
13
+ ENV HOME=/home/user \
14
+ PATH=/home/user/.local/bin:$PATH
15
+
16
+ WORKDIR $HOME/app
17
+
18
+ COPY --chown=user . $HOME/app
19
+
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
cli.py CHANGED
@@ -1,51 +1,51 @@
1
- import os
2
- import json
3
- from types import SimpleNamespace
4
-
5
- from kaggle_service import KernelRerunService, NbJob
6
- import argparse
7
- from logger import sheet_logger
8
-
9
-
10
- def main(args):
11
- if not os.path.exists(args.config):
12
- print(f"Config folder not found: {os.path.abspath(args.config)}")
13
- exit(1)
14
-
15
- configs = []
16
- if os.path.isdir(args.config):
17
- files = os.listdir(args.config)
18
- for file in files:
19
- with open(os.path.join(args.config, file), "r") as f:
20
- obj = json.loads(f.read())
21
- configs.append(obj)
22
- print(obj)
23
- elif os.path.isfile(args.config):
24
- with open(args.config, "r") as f:
25
- obj = json.loads(f.read())
26
- configs.append(obj)
27
- print(obj)
28
-
29
- service = KernelRerunService()
30
- for config in configs:
31
- service.add_job(NbJob.from_dict(config))
32
-
33
- if args.option == "run":
34
- service.run_all()
35
- elif args.option == "validate":
36
- service.validate_all()
37
- elif args.option == "status":
38
- service.status_all()
39
- else:
40
- print(f"Invalid option: {args.option}")
41
-
42
-
43
- if __name__ == "__main__":
44
- # parser = argparse.ArgumentParser()
45
- # parser.add_argument("option", type=str, default="run", choices=["run", "validate", "status"])
46
- # parser.add_argument("--config", type=str, default="./config")
47
- #
48
- # args = parser.parse_args()
49
- args = SimpleNamespace(option="validate", config='./config')
50
-
51
  main(args)
 
1
+ import os
2
+ import json
3
+ from types import SimpleNamespace
4
+
5
+ from kaggle_service import KernelRerunService, NbJob
6
+ import argparse
7
+ from logger import sheet_logger
8
+
9
+
10
+ def main(args):
11
+ if not os.path.exists(args.config):
12
+ print(f"Config folder not found: {os.path.abspath(args.config)}")
13
+ exit(1)
14
+
15
+ configs = []
16
+ if os.path.isdir(args.config):
17
+ files = os.listdir(args.config)
18
+ for file in files:
19
+ with open(os.path.join(args.config, file), "r") as f:
20
+ obj = json.loads(f.read())
21
+ configs.append(obj)
22
+ print(obj)
23
+ elif os.path.isfile(args.config):
24
+ with open(args.config, "r") as f:
25
+ obj = json.loads(f.read())
26
+ configs.append(obj)
27
+ print(obj)
28
+
29
+ service = KernelRerunService()
30
+ for config in configs:
31
+ service.add_job(NbJob.from_dict(config))
32
+
33
+ if args.option == "run":
34
+ service.run_all()
35
+ elif args.option == "validate":
36
+ service.validate_all()
37
+ elif args.option == "status":
38
+ service.status_all()
39
+ else:
40
+ print(f"Invalid option: {args.option}")
41
+
42
+
43
+ if __name__ == "__main__":
44
+ # parser = argparse.ArgumentParser()
45
+ # parser.add_argument("option", type=str, default="run", choices=["run", "validate", "status"])
46
+ # parser.add_argument("--config", type=str, default="./config")
47
+ #
48
+ # args = parser.parse_args()
49
+ args = SimpleNamespace(option="validate", config='./config')
50
+
51
  main(args)
config/aasist-train.json CHANGED
@@ -1,8 +0,0 @@
1
- {
2
- "accounts": {
3
- "hahunavth": "fb3d65ea4d06f91a83cf571e9a39d40d",
4
- "mrhakk": "26780db435523a697855d5d13355744d",
5
- "havthust": "c54e96568075fcc277bd10ba0e0a52b9"
6
- },
7
- "slug": "hahunavth/vlsp-sv-2023-aasist-train"
8
- }
 
 
 
 
 
 
 
 
 
google_sheet.py CHANGED
@@ -1,63 +1,126 @@
1
- import gspread
2
- from oauth2client.service_account import ServiceAccountCredentials
3
- from typing import Dict
4
-
5
-
6
- class SheetCRUDRepository:
7
- def __init__(self, worksheet):
8
- self.worksheet = worksheet
9
- self.titles = self.worksheet.row_values(1) # Assuming titles are in the first row
10
- assert len(set(self.titles)) == len(self.titles), f"Failed to init {SheetCRUDRepository.__class__}, titles: {self.titles} contain duplicated values!"
11
-
12
- def create(self, data: Dict):
13
- values = [data.get(title, '') for title in self.titles]
14
- self.worksheet.append_row(values)
15
-
16
- def read(self, row_index: int) -> Dict:
17
- """
18
- return {} if empty
19
- """
20
- values = self.worksheet.row_values(row_index)
21
- return {title: value for title, value in zip(self.titles, values)}
22
-
23
- def update(self, row_index: int, data: Dict):
24
- values = [data.get(title, '') for title in self.titles]
25
- self.worksheet.update(f"A{row_index}:Z{row_index}", [values])
26
-
27
- def delete(self, row_index: int):
28
- self.worksheet.delete_row(row_index)
29
-
30
- def find(self, search_dict):
31
- for col_title, value in search_dict.items():
32
- if col_title in self.titles:
33
- col_index = self.titles.index(col_title) + 1 # Adding 1 to match gspread indexing
34
- cell = self.worksheet.find(value, in_column=col_index)
35
- if cell is None:
36
- break
37
- row_number = cell.row
38
- return row_number, self.read(row_number)
39
- return None
40
-
41
- def create_repositories():
42
- scope = [
43
- 'https://www.googleapis.com/auth/spreadsheets',
44
- 'https://www.googleapis.com/auth/drive'
45
- ]
46
- creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
47
- client = gspread.authorize(creds)
48
- # sheet_url = "https://docs.google.com/spreadsheets/d/17OxKF0iP_aJJ0HCgJkwFsH762EUrtcEIYcPmyiiKnaM"
49
- sheet_url = "https://docs.google.com/spreadsheets/d/1KzUYgWwbvYXGfyehOTyZCCQf0udZiwVXxaxpmkXEe3E/edit?usp=sharing"
50
- sheet = client.open_by_url(sheet_url)
51
-
52
- config_repository = SheetCRUDRepository(sheet.get_worksheet(0))
53
- log_repository = SheetCRUDRepository(sheet.get_worksheet(1))
54
- secret_repository = SheetCRUDRepository(sheet.get_worksheet(2))
55
- return config_repository, log_repository, secret_repository
56
-
57
-
58
- conf_repo, log_repo, secret_repo = create_repositories()
59
-
60
-
61
- if __name__ == "__main__":
62
- a = create_repositories()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  print(a)
 
1
+ import gspread
2
+ from oauth2client.service_account import ServiceAccountCredentials
3
+ from typing import Dict
4
+
5
+
6
+ class SheetCRUDRepository:
7
+ def __init__(self, worksheet):
8
+ self.worksheet = worksheet
9
+ self.titles = self.worksheet.row_values(1) # Assuming titles are in the first row
10
+ assert len(set(self.titles)) == len(self.titles), f"Failed to init {SheetCRUDRepository.__class__}, titles: {self.titles} contain duplicated values!"
11
+
12
+ def create(self, data: Dict):
13
+ values = [data.get(title, '') for title in self.titles]
14
+ self.worksheet.append_row(values)
15
+
16
+ def read(self, row_index: int) -> Dict:
17
+ """
18
+ return {} if empty
19
+ """
20
+ values = self.worksheet.row_values(row_index)
21
+ return {title: value for title, value in zip(self.titles, values)}
22
+
23
+ def update(self, row_index: int, data: Dict):
24
+ values = [data.get(title, '') for title in self.titles]
25
+ self.worksheet.update(f"A{row_index}:Z{row_index}", [values])
26
+
27
+ def delete(self, row_index: int):
28
+ self.worksheet.delete_row(row_index)
29
+
30
+ def find(self, search_dict):
31
+ for col_title, value in search_dict.items():
32
+ if col_title in self.titles:
33
+ col_index = self.titles.index(col_title) + 1 # Adding 1 to match gspread indexing
34
+ cell = self.worksheet.find(value, in_column=col_index)
35
+ if cell is None:
36
+ break
37
+ row_number = cell.row
38
+ return row_number, self.read(row_number)
39
+ return None
40
+
41
+
42
+ class Converter:
43
+ @staticmethod
44
+ def parse_one_to_obj(field_name, value):
45
+ if value in ['TRUE', 'FALSE']:
46
+ return field_name, value == 'TRUE'
47
+ if isinstance(value, str):
48
+ if value.startswith('[DURATION]'):
49
+ if 'NONE' in value.upper():
50
+ return field_name, None
51
+ value = value.replace('[DURATION]', '').replace("\n", '').rstrip()
52
+ sign = 1
53
+ if 'before' in value:
54
+ sign = -1
55
+ if 'after' in value:
56
+ sign = 1
57
+ value = value.replace('after', '').replace('before', '').rstrip()
58
+ if 'h' in value:
59
+ value = value.replace('h', '')
60
+ return field_name, {"hours": int(value) * sign}
61
+ if 'm' in value:
62
+ value = value.replace('m', '')
63
+ return field_name, {"minutes": int(value) * sign}
64
+
65
+
66
+
67
+ return field_name, value
68
+
69
+ @staticmethod
70
+ def parse_one_to_row(field_name, value):
71
+ if isinstance(value, str):
72
+ if value in ['TRUE', 'FALSE']:
73
+ return field_name, value == "TRUE"
74
+ if isinstance(value, dict):
75
+ if 'hours' in value or 'minutes' in value:
76
+ # ignore
77
+ return None, None
78
+ return field_name, value
79
+
80
+ @staticmethod
81
+ def convert_to_obj(row):
82
+ if row is None:
83
+ return None
84
+ obj = {}
85
+ for key in row.keys():
86
+ new_key, value = Converter.parse_one_to_obj(key, row[key])
87
+ if new_key is not None:
88
+ obj[new_key] = value
89
+ return obj
90
+
91
+ @staticmethod
92
+ def convert_to_row(obj):
93
+ if obj is None:
94
+ return None
95
+ row = {}
96
+ for key in obj.keys():
97
+ new_key, value = Converter.parse_one_to_row(key, obj[key])
98
+ if new_key is not None:
99
+ row[new_key] = value
100
+ return row
101
+
102
+
103
+ def create_repositories():
104
+ scope = [
105
+ 'https://www.googleapis.com/auth/spreadsheets',
106
+ 'https://www.googleapis.com/auth/drive'
107
+ ]
108
+ creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
109
+ client = gspread.authorize(creds)
110
+ # sheet_url = "https://docs.google.com/spreadsheets/d/17OxKF0iP_aJJ0HCgJkwFsH762EUrtcEIYcPmyiiKnaM"
111
+ sheet_url = "https://docs.google.com/spreadsheets/d/1KzUYgWwbvYXGfyehOTyZCCQf0udZiwVXxaxpmkXEe3E/edit?usp=sharing"
112
+ sheet = client.open_by_url(sheet_url)
113
+
114
+ run_stt_repository = SheetCRUDRepository(sheet.get_worksheet(0))
115
+ config_repository = SheetCRUDRepository(sheet.get_worksheet(1))
116
+ log_repository = SheetCRUDRepository(sheet.get_worksheet(2))
117
+ secret_repository = SheetCRUDRepository(sheet.get_worksheet(3))
118
+ return run_stt_repository, config_repository, log_repository, secret_repository
119
+
120
+
121
+ run_stt_repo, conf_repo, log_repo, secret_repo = create_repositories()
122
+
123
+
124
+ if __name__ == "__main__":
125
+ a = create_repositories()
126
  print(a)
kaggle_service.py CHANGED
@@ -1,583 +1,583 @@
1
- import json
2
- import os
3
- from typing import Callable, List, Union, Dict
4
-
5
- # fake default account to use kaggle.api.kaggle_api_extended
6
- os.environ['KAGGLE_USERNAME']=''
7
- os.environ['KAGGLE_KEY']=''
8
-
9
- from kaggle.api.kaggle_api_extended import KaggleApi
10
- from kaggle.rest import ApiException
11
- import shutil
12
- import time
13
- import threading
14
- import copy
15
- from logger import sheet_logger
16
-
17
-
18
- def get_api():
19
- api = KaggleApi()
20
- api.authenticate()
21
- return api
22
-
23
-
24
- class KaggleApiWrapper(KaggleApi):
25
- """
26
- Override KaggleApi.read_config_environment to use username and secret without environment variables
27
- """
28
-
29
- def __init__(self, username, secret):
30
- super().__init__()
31
- self.username = username
32
- self.secret = secret
33
-
34
- def read_config_environment(self, config_data=None, quiet=False):
35
- config = super().read_config_environment(config_data, quiet)
36
- config['username'] = self.username
37
- config['key'] = self.secret
38
- # only work for pythonanyware
39
- # config['proxy'] = "http://proxy.server:3128"
40
-
41
- return config_data
42
-
43
- def __del__(self):
44
- # todo: fix bug when delete api
45
- pass
46
-
47
- # def get_accelerator_quota_with_http_info(self): # noqa: E501
48
- # """
49
- #
50
- # This method makes a synchronous HTTP request by default. To make an
51
- # asynchronous HTTP request, please pass async_req=True
52
- # >>> thread = api.competitions_list_with_http_info(async_req=True)
53
- # >>> result = thread.get()
54
- #
55
- # :param async_req bool
56
- # :param str group: Filter competitions by a particular group
57
- # :param str category: Filter competitions by a particular category
58
- # :param str sort_by: Sort the results
59
- # :param int page: Page number
60
- # :param str search: Search terms
61
- # :return: Result
62
- # If the method is called asynchronously,
63
- # returns the request thread.
64
- # """
65
- #
66
- # all_params = [] # noqa: E501
67
- # all_params.append('async_req')
68
- # all_params.append('_return_http_data_only')
69
- # all_params.append('_preload_content')
70
- # all_params.append('_request_timeout')
71
- #
72
- # params = locals()
73
- #
74
- # collection_formats = {}
75
- #
76
- # path_params = {}
77
- #
78
- # query_params = []
79
- # # if 'group' in params:
80
- # # query_params.append(('group', params['group'])) # noqa: E501
81
- # # if 'category' in params:
82
- # # query_params.append(('category', params['category'])) # noqa: E501
83
- # # if 'sort_by' in params:
84
- # # query_params.append(('sortBy', params['sort_by'])) # noqa: E501
85
- # # if 'page' in params:
86
- # # query_params.append(('page', params['page'])) # noqa: E501
87
- # # if 'search' in params:
88
- # # query_params.append(('search', params['search'])) # noqa: E501
89
- #
90
- # header_params = {}
91
- #
92
- # form_params = []
93
- # local_var_files = {}
94
- #
95
- # body_params = None
96
- # # HTTP header `Accept`
97
- # header_params['Accept'] = self.api_client.select_header_accept(
98
- # ['application/json']) # noqa: E501
99
- #
100
- # # Authentication setting
101
- # auth_settings = ['basicAuth'] # noqa: E501
102
- #
103
- # return self.api_client.call_api(
104
- # 'i/kernels.KernelsService/GetAcceleratorQuotaStatistics', 'GET',
105
- # # '/competitions/list', 'GET',
106
- # path_params,
107
- # query_params,
108
- # header_params,
109
- # body=body_params,
110
- # post_params=form_params,
111
- # files=local_var_files,
112
- # response_type='Result', # noqa: E501
113
- # auth_settings=auth_settings,
114
- # async_req=params.get('async_req'),
115
- # _return_http_data_only=params.get('_return_http_data_only'),
116
- # _preload_content=params.get('_preload_content', True),
117
- # _request_timeout=params.get('_request_timeout'),
118
- # collection_formats=collection_formats)
119
- #
120
- # if __name__ == "__main__":
121
- # api = KaggleApiWrapper('[email protected]', "c54e96568075fcc277bd10ba0e0a52b9")
122
- # api.authenticate()
123
- # print(api.get_accelerator_quota_with_http_info())
124
-
125
- class ValidateException(Exception):
126
- def __init__(self, message: str):
127
- super(ValidateException, self).__init__(message)
128
-
129
- @staticmethod
130
- def from_api_exception(e: ApiException, kernel_slug: str):
131
- return ValidateException(f"Error: {e.status} {e.reason} with notebook {kernel_slug}")
132
-
133
- @staticmethod
134
- def from_api_exception_list(el: List[ApiException], kernel_slug_list: List[str]):
135
- message = f"Error: \n"
136
- for e, k in zip(el, kernel_slug_list):
137
- message = message + f"\t{e.status} {e.reason} with notebook {k}"
138
- return ValidateException(message)
139
-
140
-
141
- class KaggleNotebook:
142
- def __init__(self, api: KaggleApi, kernel_slug: str, container_path: str = "./tmp", id=None):
143
- """
144
- :param api: KaggleApi
145
- :param kernel_slug: Notebook id, you can find it in the url of the notebook.
146
- For example, `username/notebook-name-123456`
147
- :param container_path: Path to the local folder where the notebook will be downloaded
148
- """
149
- self.api = api
150
- self.kernel_slug = kernel_slug
151
- self.container_path = container_path
152
- self.id = id
153
- if self.id is None:
154
- print(f"Warn: {self.__class__.__name__}.id is None")
155
-
156
- def status(self) -> str or None:
157
- """
158
- :return:
159
- "running"
160
- "cancelAcknowledged"
161
- "queued": waiting for run
162
- "error": when raise exception in notebook
163
- Throw exception when failed
164
- """
165
- res = self.api.kernels_status(self.kernel_slug)
166
- print(f"Status: {res}")
167
- if res is None:
168
- if self.id is not None:
169
- sheet_logger.update_job_status(self.id, notebook_status='None')
170
- return None
171
- if self.id is not None:
172
- sheet_logger.update_job_status(self.id, notebook_status=res['status'])
173
- return res['status']
174
-
175
- def _get_local_nb_path(self) -> str:
176
- return os.path.join(self.container_path, self.kernel_slug)
177
-
178
- def pull(self, path=None) -> str or None:
179
- """
180
-
181
- :param path:
182
- :return:
183
- :raises: ApiException if notebook not found or not share to user
184
- """
185
- self._clean()
186
- path = path or self._get_local_nb_path()
187
- metadata_path = os.path.join(path, "kernel-metadata.json")
188
- res = self.api.kernels_pull(self.kernel_slug, path=path, metadata=True, quiet=False)
189
- if not os.path.exists(metadata_path):
190
- print(f"Warn: Not found {metadata_path}. Clean {path}")
191
- self._clean()
192
- return None
193
- return res
194
-
195
- def push(self, path=None) -> str or None:
196
- status = self.status()
197
- if status in ['queued', 'running']:
198
- print("Warn: Notebook is " + status + ". Skip push notebook!")
199
- return None
200
-
201
- self.api.kernels_push(path or self._get_local_nb_path())
202
- time.sleep(1)
203
- status = self.status()
204
- return status
205
-
206
- def _clean(self) -> None:
207
- if os.path.exists(self._get_local_nb_path()):
208
- shutil.rmtree(self._get_local_nb_path())
209
-
210
- def get_metadata(self, path=None):
211
- path = path or self._get_local_nb_path()
212
- metadata_path = os.path.join(path, "kernel-metadata.json")
213
- if not os.path.exists(metadata_path):
214
- return None
215
- return json.loads(open(metadata_path).read())
216
-
217
- def check_nb_permission(self) -> Union[tuple[bool], tuple[bool, None]]:
218
- status = self.status() # raise ApiException
219
- if status is None:
220
- return False, status
221
- return True, status
222
-
223
-
224
- def check_datasets_permission(self) -> bool:
225
- meta = self.get_metadata()
226
- if meta is None:
227
- print("Warn: cannot get metadata. Pull and try again?")
228
- dataset_sources = meta['dataset_sources']
229
- ex_list = []
230
- slugs = []
231
- for dataset in dataset_sources:
232
- try:
233
- self.api.dataset_status(dataset)
234
- except ApiException as e:
235
- print(f"Error: {e.status} {e.reason} with dataset {dataset} in notebook {self.kernel_slug}")
236
- ex_list.append(e)
237
- slugs.append(self.kernel_slug)
238
- # return False
239
- if len(ex_list) > 0:
240
- raise ValidateException.from_api_exception_list(ex_list, slugs)
241
- return True
242
-
243
-
244
- class AccountTransactionManager:
245
- def __init__(self, acc_secret_dict: dict=None):
246
- """
247
- :param acc_secret_dict: {username: secret}
248
- """
249
- self.acc_secret_dict = acc_secret_dict
250
- if self.acc_secret_dict is None:
251
- self.acc_secret_dict = {}
252
- # self.api_dict = {username: KaggleApiWrapper(username, secret) for username, secret in acc_secret_dict.items()}
253
- # lock for each account to avoid concurrent use api
254
- self.lock_dict = {username: False for username in self.acc_secret_dict.keys()}
255
- self.state_lock = threading.Lock()
256
-
257
- def _get_api(self, username: str) -> KaggleApiWrapper:
258
- # return self.api_dict[username]
259
- return KaggleApiWrapper(username, self.acc_secret_dict[username])
260
-
261
- def _get_lock(self, username: str) -> bool:
262
- return self.lock_dict[username]
263
-
264
- def _set_lock(self, username: str, lock: bool) -> None:
265
- self.lock_dict[username] = lock
266
-
267
- def add_account(self, username, secret):
268
- if username not in self.acc_secret_dict.keys():
269
- self.state_lock.acquire()
270
- self.acc_secret_dict[username] = secret
271
- self.lock_dict[username] = False
272
- self.state_lock.release()
273
-
274
- def remove_account(self, username):
275
- if username in self.acc_secret_dict.keys():
276
- self.state_lock.acquire()
277
- del self.acc_secret_dict[username]
278
- del self.lock_dict[username]
279
- self.state_lock.release()
280
- else:
281
- print(f"Warn: try to remove account not in the list: {username}, list: {self.acc_secret_dict.keys()}")
282
-
283
- def get_unlocked_api_unblocking(self, username_list: List[str]) -> tuple[KaggleApiWrapper, Callable[[], None]]:
284
- """
285
- :param username_list: list of username
286
- :return: (api, release) where release is a function to release api
287
- """
288
- while True:
289
- print("get_unlocked_api_unblocking" + str(username_list))
290
- for username in username_list:
291
- self.state_lock.acquire()
292
- if not self._get_lock(username):
293
- self._set_lock(username, True)
294
- api = self._get_api(username)
295
-
296
- def release():
297
- self.state_lock.acquire()
298
- self._set_lock(username, False)
299
- api.__del__()
300
- self.state_lock.release()
301
-
302
- self.state_lock.release()
303
- return api, release
304
- self.state_lock.release()
305
- time.sleep(1)
306
-
307
-
308
- class NbJob:
309
- def __init__(self, acc_dict: dict, nb_slug: str, rerun_stt: List[str] = None, not_rerun_stt: List[str] = None, id=None):
310
- """
311
-
312
- :param acc_dict:
313
- :param nb_slug:
314
- :param rerun_stt:
315
- :param not_rerun_stt: If notebook status in this list, do not rerun it. (Note: do not add "queued", "running")
316
- """
317
- self.rerun_stt = rerun_stt
318
- if self.rerun_stt is None:
319
- self.rerun_stt = ['complete']
320
- self.not_rerun_stt = not_rerun_stt
321
-
322
- if self.not_rerun_stt is None:
323
- self.not_rerun_stt = ['queued', 'running', 'cancelAcknowledged']
324
- assert "queued" in self.not_rerun_stt
325
- assert "running" in self.not_rerun_stt
326
-
327
- self.acc_dict = acc_dict
328
- self.nb_slug = nb_slug
329
- self.id = id
330
-
331
- def get_acc_dict(self):
332
- return self.acc_dict
333
-
334
- def get_username_list(self):
335
- return list(self.acc_dict.keys())
336
-
337
- def is_valid_with_acc(self, api):
338
- """
339
- :param api:
340
- :return:
341
- :raise: ValidationException
342
- """
343
- notebook = KaggleNotebook(api, self.nb_slug, id=self.id)
344
-
345
- try:
346
- notebook.pull() # raise ApiException
347
- stt, _ = notebook.check_nb_permission() # note: raise ApiException
348
- stt = notebook.check_datasets_permission() # raise ValidationException
349
- except ApiException as e:
350
- raise ValidateException.from_api_exception(e, self.nb_slug)
351
- # if not stt:
352
- # return False
353
- return True
354
-
355
- def is_valid(self):
356
- for username in self.acc_dict.keys():
357
- secrets = self.acc_dict[username]
358
- api = KaggleApiWrapper(username=username, secret=secrets)
359
- api.authenticate()
360
- if not self.is_valid_with_acc(api):
361
- return False
362
- return True
363
-
364
- def acc_check_and_rerun_if_need(self, api: KaggleApi) -> bool:
365
- """
366
- :return:
367
- True if rerun success or notebook is running
368
- False user does not have enough gpu quotas
369
- :raises
370
- Exception if setup error
371
- """
372
- notebook = KaggleNotebook(api, self.nb_slug, "./tmp", id=self.id) # todo: change hardcode container_path here
373
-
374
- notebook.pull()
375
- assert notebook.check_datasets_permission(), f"User {api} does not have permission on datasets of notebook {self.nb_slug}"
376
- success, status1 = notebook.check_nb_permission()
377
- assert success, f"User {api} does not have permission on notebook {self.nb_slug}" # todo: using api.username
378
-
379
- if status1 in self.rerun_stt:
380
- status2 = notebook.push()
381
- time.sleep(10)
382
- status3 = notebook.status()
383
-
384
- # if 3 times same stt -> acc out of quota
385
- if status1 == status2 == status3:
386
- sheet_logger.log(username=api.username, nb=self.nb_slug, log="Try but no effect. Seem account to be out of quota")
387
- return False
388
- if status3 in self.not_rerun_stt:
389
- # sheet_logger.log(username=api.username, nb=self.nb_slug, log=f"Notebook status is {status3} is in ignore status list {self.not_rerun_stt}, do nothing!")
390
- sheet_logger.log(username=api.username, nb=self.nb_slug,
391
- log=f"Schedule notebook successfully. Current status is '{status3}'")
392
- return True
393
- if status3 not in ["queued", "running"]:
394
- # return False # todo: check when user is out of quota
395
- print(f"Error: status is {status3}")
396
-
397
- raise Exception("Setup exception")
398
-
399
- return True
400
-
401
- sheet_logger.log(username=api.username, nb=self.nb_slug, log=f"Notebook status is '{status1}' is not in {self.rerun_stt}, do nothing!")
402
- return True
403
-
404
- @staticmethod
405
- def from_dict(obj: dict, id=None):
406
- return NbJob(acc_dict=obj['accounts'], nb_slug=obj['slug'], rerun_stt=obj.get('rerun_status'), not_rerun_stt=obj.get('not_rerun_stt'), id=id)
407
-
408
-
409
- class KernelRerunService:
410
- def __init__(self):
411
- self.jobs: Dict[str, NbJob] = {}
412
- self.acc_manager = AccountTransactionManager()
413
- self.username2jobid = {}
414
- self.jobid2username = {}
415
-
416
- def add_job(self, nb_job: NbJob):
417
- if nb_job.nb_slug in self.jobs.keys():
418
- print("Warn: nb_job already in job list")
419
- return
420
- self.jobs[nb_job.nb_slug] = nb_job
421
- self.jobid2username[nb_job.nb_slug] = nb_job.get_username_list()
422
- for username in nb_job.get_username_list():
423
- if username not in self.username2jobid.keys():
424
- self.username2jobid[username] = []
425
- self.acc_manager.add_account(username, nb_job.acc_dict[username])
426
- self.username2jobid[username].append(nb_job.nb_slug)
427
-
428
- def remove_job(self, nb_job):
429
- if nb_job.nb_slug not in self.jobs.keys():
430
- print("Warn: try to remove nb_job not in list")
431
- return
432
- username_list = self.jobid2username[nb_job.nb_slug]
433
- username_list = [username for username in username_list if len(self.username2jobid[username]) == 1]
434
-
435
- for username in username_list:
436
- del self.username2jobid[username]
437
- self.acc_manager.remove_account(username)
438
- del self.jobs[nb_job.nb_slug]
439
- del self.jobid2username[nb_job.nb_slug]
440
-
441
- def validate_all(self):
442
- for username in self.acc_manager.acc_secret_dict.keys():
443
- api, release = self.acc_manager.get_unlocked_api_unblocking([username])
444
- api.authenticate()
445
- print(f"Using username: {api.username}")
446
-
447
- for job in self.jobs.values():
448
- ex_msg_list = []
449
- if username in job.get_username_list():
450
- print(f"Validate user: {username}, job: {job.nb_slug}")
451
-
452
- try:
453
- job.is_valid_with_acc(api)
454
- except ValidateException as e:
455
- print(f"Error: not valid")
456
- a = f"Setup error: {username} does not have permission on notebook {job.nb_slug} or related datasets"
457
- if job.id is not None: # if have id, write log
458
- ex_msg_list.append(f"Account {username}\n" + str(e) + "\n")
459
- else: # if not have id, raise
460
- raise Exception(a)
461
- if len(ex_msg_list) > 0:
462
- sheet_logger.update_job_status(job.id, validate_status="\n".join(ex_msg_list))
463
- else:
464
- sheet_logger.update_job_status(job.id, validate_status="success")
465
-
466
- release()
467
- return True
468
-
469
- def status_all(self):
470
- for job in self.jobs.values():
471
- print(f"Job: {job.nb_slug}")
472
- api, release = self.acc_manager.get_unlocked_api_unblocking(job.get_username_list())
473
- api.authenticate()
474
- print(f"Using username: {api.username}")
475
- notebook = KaggleNotebook(api, job.nb_slug, id=job.id)
476
- print(f"Notebook: {notebook.kernel_slug}")
477
- print(notebook.status())
478
- release()
479
-
480
- def run(self, nb_job: NbJob):
481
- username_list = copy.copy(nb_job.get_username_list())
482
- while len(username_list) > 0:
483
- api, release = self.acc_manager.get_unlocked_api_unblocking(username_list)
484
- api.authenticate()
485
- print(f"Using username: {api.username}")
486
-
487
- try:
488
- result = nb_job.acc_check_and_rerun_if_need(api)
489
- if result:
490
- return True
491
- except Exception as e:
492
- print(e)
493
- release()
494
- break
495
-
496
- if api.username in username_list:
497
- username_list.remove(api.username)
498
- release()
499
- else:
500
- release()
501
- raise Exception("")
502
- return False
503
-
504
- def run_all(self):
505
- for job in self.jobs.values():
506
- success = self.run(job)
507
- print(f"Job: {job.nb_slug} {success}")
508
-
509
-
510
- # if __name__ == "__main__":
511
- # service = KernelRerunService()
512
- # files = os.listdir("./config")
513
- # for file in files:
514
- # if '.example' not in file:
515
- # with open(os.path.join("./config", file), "r") as f:
516
- # obj = json.loads(f.read())
517
- # print(obj)
518
- # service.add_job(NbJob.from_dict(obj))
519
- # service.run_all()
520
-
521
- # try:
522
- # acc_secret_dict = {
523
- # "hahunavth": "secret",
524
- # "hahunavth2": "secret",
525
- # "hahunavth3": "secret",
526
- # "hahunavth4": "secret",
527
- # "hahunavth5": "secret",
528
- # }
529
- # acc_manager = AccountTransactionManager(acc_secret_dict)
530
- #
531
- #
532
- # def test1():
533
- # username_list = ["hahunavth", "hahunavth2", "hahunavth3", "hahunavth4", "hahunavth5"]
534
- # while len(username_list) > 0:
535
- # api, release = acc_manager.get_unlocked_api_unblocking(username_list)
536
- # print("test1 is using " + api.username)
537
- # time.sleep(1)
538
- # release()
539
- # if api.username in username_list:
540
- # username_list.remove(api.username)
541
- # else:
542
- # raise Exception("")
543
- # print("test1 release " + api.username)
544
- #
545
- #
546
- # def test2():
547
- # username_list = ["hahunavth2", "hahunavth3", "hahunavth5"]
548
- # while len(username_list) > 0:
549
- # api, release = acc_manager.get_unlocked_api_unblocking(username_list)
550
- # print("test2 is using " + api.username)
551
- # time.sleep(3)
552
- # release()
553
- # if api.username in username_list:
554
- # username_list.remove(api.username)
555
- # else:
556
- # raise Exception("")
557
- # print("test2 release " + api.username)
558
- #
559
- #
560
- # t1 = threading.Thread(target=test1)
561
- # t2 = threading.Thread(target=test2)
562
- # t1.start()
563
- # t2.start()
564
- # t1.join()
565
- # t2.join()
566
- #
567
- # # kgapi = KaggleApiWrapper("hahunavth", "fb3d65ea4d06f91a83cf571e9a39d40d")
568
- # # kgapi.authenticate()
569
- # # # kgapi = get_api()
570
- # # notebook = KaggleNotebook(kgapi, "hahunavth/ess-vlsp2023-denoising", "./tmp")
571
- # # # print(notebook.pull())
572
- # # # print(notebook.check_datasets_permission())
573
- # # print(notebook.check_nb_permission())
574
- # # # print(notebook.status())
575
- # # # notebook.push()
576
- # # # print(notebook.status())
577
- # except ApiException as e:
578
- # print(e.status)
579
- # print(e.reason)
580
- # raise e
581
- # # 403 when nb not exists or not share to acc
582
- # # 404 when push to unknow kenel_slug.username
583
  # # 401 when invalid username, pass
 
1
+ import json
2
+ import os
3
+ from typing import Callable, List, Union, Dict
4
+
5
+ # fake default account to use kaggle.api.kaggle_api_extended
6
+ os.environ['KAGGLE_USERNAME']=''
7
+ os.environ['KAGGLE_KEY']=''
8
+
9
+ from kaggle.api.kaggle_api_extended import KaggleApi
10
+ from kaggle.rest import ApiException
11
+ import shutil
12
+ import time
13
+ import threading
14
+ import copy
15
+ from logger import sheet_logger
16
+
17
+
18
+ def get_api():
19
+ api = KaggleApi()
20
+ api.authenticate()
21
+ return api
22
+
23
+
24
+ class KaggleApiWrapper(KaggleApi):
25
+ """
26
+ Override KaggleApi.read_config_environment to use username and secret without environment variables
27
+ """
28
+
29
+ def __init__(self, username, secret):
30
+ super().__init__()
31
+ self.username = username
32
+ self.secret = secret
33
+
34
+ def read_config_environment(self, config_data=None, quiet=False):
35
+ config = super().read_config_environment(config_data, quiet)
36
+ config['username'] = self.username
37
+ config['key'] = self.secret
38
+ # only work for pythonanyware
39
+ # config['proxy'] = "http://proxy.server:3128"
40
+
41
+ return config_data
42
+
43
+ def __del__(self):
44
+ # todo: fix bug when delete api
45
+ pass
46
+
47
+ # def get_accelerator_quota_with_http_info(self): # noqa: E501
48
+ # """
49
+ #
50
+ # This method makes a synchronous HTTP request by default. To make an
51
+ # asynchronous HTTP request, please pass async_req=True
52
+ # >>> thread = api.competitions_list_with_http_info(async_req=True)
53
+ # >>> result = thread.get()
54
+ #
55
+ # :param async_req bool
56
+ # :param str group: Filter competitions by a particular group
57
+ # :param str category: Filter competitions by a particular category
58
+ # :param str sort_by: Sort the results
59
+ # :param int page: Page number
60
+ # :param str search: Search terms
61
+ # :return: Result
62
+ # If the method is called asynchronously,
63
+ # returns the request thread.
64
+ # """
65
+ #
66
+ # all_params = [] # noqa: E501
67
+ # all_params.append('async_req')
68
+ # all_params.append('_return_http_data_only')
69
+ # all_params.append('_preload_content')
70
+ # all_params.append('_request_timeout')
71
+ #
72
+ # params = locals()
73
+ #
74
+ # collection_formats = {}
75
+ #
76
+ # path_params = {}
77
+ #
78
+ # query_params = []
79
+ # # if 'group' in params:
80
+ # # query_params.append(('group', params['group'])) # noqa: E501
81
+ # # if 'category' in params:
82
+ # # query_params.append(('category', params['category'])) # noqa: E501
83
+ # # if 'sort_by' in params:
84
+ # # query_params.append(('sortBy', params['sort_by'])) # noqa: E501
85
+ # # if 'page' in params:
86
+ # # query_params.append(('page', params['page'])) # noqa: E501
87
+ # # if 'search' in params:
88
+ # # query_params.append(('search', params['search'])) # noqa: E501
89
+ #
90
+ # header_params = {}
91
+ #
92
+ # form_params = []
93
+ # local_var_files = {}
94
+ #
95
+ # body_params = None
96
+ # # HTTP header `Accept`
97
+ # header_params['Accept'] = self.api_client.select_header_accept(
98
+ # ['application/json']) # noqa: E501
99
+ #
100
+ # # Authentication setting
101
+ # auth_settings = ['basicAuth'] # noqa: E501
102
+ #
103
+ # return self.api_client.call_api(
104
+ # 'i/kernels.KernelsService/GetAcceleratorQuotaStatistics', 'GET',
105
+ # # '/competitions/list', 'GET',
106
+ # path_params,
107
+ # query_params,
108
+ # header_params,
109
+ # body=body_params,
110
+ # post_params=form_params,
111
+ # files=local_var_files,
112
+ # response_type='Result', # noqa: E501
113
+ # auth_settings=auth_settings,
114
+ # async_req=params.get('async_req'),
115
+ # _return_http_data_only=params.get('_return_http_data_only'),
116
+ # _preload_content=params.get('_preload_content', True),
117
+ # _request_timeout=params.get('_request_timeout'),
118
+ # collection_formats=collection_formats)
119
+ #
120
+ # if __name__ == "__main__":
121
+ # api = KaggleApiWrapper('[email protected]', "c54e96568075fcc277bd10ba0e0a52b9")
122
+ # api.authenticate()
123
+ # print(api.get_accelerator_quota_with_http_info())
124
+
125
+ class ValidateException(Exception):
126
+ def __init__(self, message: str):
127
+ super(ValidateException, self).__init__(message)
128
+
129
+ @staticmethod
130
+ def from_api_exception(e: ApiException, kernel_slug: str):
131
+ return ValidateException(f"Error: {e.status} {e.reason} with notebook {kernel_slug}")
132
+
133
+ @staticmethod
134
+ def from_api_exception_list(el: List[ApiException], kernel_slug_list: List[str]):
135
+ message = f"Error: \n"
136
+ for e, k in zip(el, kernel_slug_list):
137
+ message = message + f"\t{e.status} {e.reason} with notebook {k}"
138
+ return ValidateException(message)
139
+
140
+
141
+ class KaggleNotebook:
142
+ def __init__(self, api: KaggleApi, kernel_slug: str, container_path: str = "./tmp", id=None):
143
+ """
144
+ :param api: KaggleApi
145
+ :param kernel_slug: Notebook id, you can find it in the url of the notebook.
146
+ For example, `username/notebook-name-123456`
147
+ :param container_path: Path to the local folder where the notebook will be downloaded
148
+ """
149
+ self.api = api
150
+ self.kernel_slug = kernel_slug
151
+ self.container_path = container_path
152
+ self.id = id
153
+ if self.id is None:
154
+ print(f"Warn: {self.__class__.__name__}.id is None")
155
+
156
+ def status(self) -> str or None:
157
+ """
158
+ :return:
159
+ "running"
160
+ "cancelAcknowledged"
161
+ "queued": waiting for run
162
+ "error": when raise exception in notebook
163
+ Throw exception when failed
164
+ """
165
+ res = self.api.kernels_status(self.kernel_slug)
166
+ print(f"Status: {res}")
167
+ if res is None:
168
+ if self.id is not None:
169
+ sheet_logger.update_job_status(self.id, notebook_status='None')
170
+ return None
171
+ if self.id is not None:
172
+ sheet_logger.update_job_status(self.id, notebook_status=res['status'])
173
+ return res['status']
174
+
175
+ def _get_local_nb_path(self) -> str:
176
+ return os.path.join(self.container_path, self.kernel_slug)
177
+
178
+ def pull(self, path=None) -> str or None:
179
+ """
180
+
181
+ :param path:
182
+ :return:
183
+ :raises: ApiException if notebook not found or not share to user
184
+ """
185
+ self._clean()
186
+ path = path or self._get_local_nb_path()
187
+ metadata_path = os.path.join(path, "kernel-metadata.json")
188
+ res = self.api.kernels_pull(self.kernel_slug, path=path, metadata=True, quiet=False)
189
+ if not os.path.exists(metadata_path):
190
+ print(f"Warn: Not found {metadata_path}. Clean {path}")
191
+ self._clean()
192
+ return None
193
+ return res
194
+
195
+ def push(self, path=None) -> str or None:
196
+ status = self.status()
197
+ if status in ['queued', 'running']:
198
+ print("Warn: Notebook is " + status + ". Skip push notebook!")
199
+ return None
200
+
201
+ self.api.kernels_push(path or self._get_local_nb_path())
202
+ time.sleep(1)
203
+ status = self.status()
204
+ return status
205
+
206
+ def _clean(self) -> None:
207
+ if os.path.exists(self._get_local_nb_path()):
208
+ shutil.rmtree(self._get_local_nb_path())
209
+
210
+ def get_metadata(self, path=None):
211
+ path = path or self._get_local_nb_path()
212
+ metadata_path = os.path.join(path, "kernel-metadata.json")
213
+ if not os.path.exists(metadata_path):
214
+ return None
215
+ return json.loads(open(metadata_path).read())
216
+
217
+ def check_nb_permission(self) -> Union[tuple[bool], tuple[bool, None]]:
218
+ status = self.status() # raise ApiException
219
+ if status is None:
220
+ return False, status
221
+ return True, status
222
+
223
+
224
+ def check_datasets_permission(self) -> bool:
225
+ meta = self.get_metadata()
226
+ if meta is None:
227
+ print("Warn: cannot get metadata. Pull and try again?")
228
+ dataset_sources = meta['dataset_sources']
229
+ ex_list = []
230
+ slugs = []
231
+ for dataset in dataset_sources:
232
+ try:
233
+ self.api.dataset_status(dataset)
234
+ except ApiException as e:
235
+ print(f"Error: {e.status} {e.reason} with dataset {dataset} in notebook {self.kernel_slug}")
236
+ ex_list.append(e)
237
+ slugs.append(self.kernel_slug)
238
+ # return False
239
+ if len(ex_list) > 0:
240
+ raise ValidateException.from_api_exception_list(ex_list, slugs)
241
+ return True
242
+
243
+
244
+ class AccountTransactionManager:
245
+ def __init__(self, acc_secret_dict: dict=None):
246
+ """
247
+ :param acc_secret_dict: {username: secret}
248
+ """
249
+ self.acc_secret_dict = acc_secret_dict
250
+ if self.acc_secret_dict is None:
251
+ self.acc_secret_dict = {}
252
+ # self.api_dict = {username: KaggleApiWrapper(username, secret) for username, secret in acc_secret_dict.items()}
253
+ # lock for each account to avoid concurrent use api
254
+ self.lock_dict = {username: False for username in self.acc_secret_dict.keys()}
255
+ self.state_lock = threading.Lock()
256
+
257
+ def _get_api(self, username: str) -> KaggleApiWrapper:
258
+ # return self.api_dict[username]
259
+ return KaggleApiWrapper(username, self.acc_secret_dict[username])
260
+
261
+ def _get_lock(self, username: str) -> bool:
262
+ return self.lock_dict[username]
263
+
264
+ def _set_lock(self, username: str, lock: bool) -> None:
265
+ self.lock_dict[username] = lock
266
+
267
+ def add_account(self, username, secret):
268
+ if username not in self.acc_secret_dict.keys():
269
+ self.state_lock.acquire()
270
+ self.acc_secret_dict[username] = secret
271
+ self.lock_dict[username] = False
272
+ self.state_lock.release()
273
+
274
+ def remove_account(self, username):
275
+ if username in self.acc_secret_dict.keys():
276
+ self.state_lock.acquire()
277
+ del self.acc_secret_dict[username]
278
+ del self.lock_dict[username]
279
+ self.state_lock.release()
280
+ else:
281
+ print(f"Warn: try to remove account not in the list: {username}, list: {self.acc_secret_dict.keys()}")
282
+
283
+ def get_unlocked_api_unblocking(self, username_list: List[str]) -> tuple[KaggleApiWrapper, Callable[[], None]]:
284
+ """
285
+ :param username_list: list of username
286
+ :return: (api, release) where release is a function to release api
287
+ """
288
+ while True:
289
+ print("get_unlocked_api_unblocking" + str(username_list))
290
+ for username in username_list:
291
+ self.state_lock.acquire()
292
+ if not self._get_lock(username):
293
+ self._set_lock(username, True)
294
+ api = self._get_api(username)
295
+
296
+ def release():
297
+ self.state_lock.acquire()
298
+ self._set_lock(username, False)
299
+ api.__del__()
300
+ self.state_lock.release()
301
+
302
+ self.state_lock.release()
303
+ return api, release
304
+ self.state_lock.release()
305
+ time.sleep(1)
306
+
307
+
308
+ class NbJob:
309
+ def __init__(self, acc_dict: dict, nb_slug: str, rerun_stt: List[str] = None, not_rerun_stt: List[str] = None, id=None):
310
+ """
311
+
312
+ :param acc_dict:
313
+ :param nb_slug:
314
+ :param rerun_stt:
315
+ :param not_rerun_stt: If notebook status in this list, do not rerun it. (Note: do not add "queued", "running")
316
+ """
317
+ self.rerun_stt = rerun_stt
318
+ if self.rerun_stt is None:
319
+ self.rerun_stt = ['complete']
320
+ self.not_rerun_stt = not_rerun_stt
321
+
322
+ if self.not_rerun_stt is None:
323
+ self.not_rerun_stt = ['queued', 'running', 'cancelAcknowledged']
324
+ assert "queued" in self.not_rerun_stt
325
+ assert "running" in self.not_rerun_stt
326
+
327
+ self.acc_dict = acc_dict
328
+ self.nb_slug = nb_slug
329
+ self.id = id
330
+
331
+ def get_acc_dict(self):
332
+ return self.acc_dict
333
+
334
+ def get_username_list(self):
335
+ return list(self.acc_dict.keys())
336
+
337
+ def is_valid_with_acc(self, api):
338
+ """
339
+ :param api:
340
+ :return:
341
+ :raise: ValidationException
342
+ """
343
+ notebook = KaggleNotebook(api, self.nb_slug, id=self.id)
344
+
345
+ try:
346
+ notebook.pull() # raise ApiException
347
+ stt, _ = notebook.check_nb_permission() # note: raise ApiException
348
+ stt = notebook.check_datasets_permission() # raise ValidationException
349
+ except ApiException as e:
350
+ raise ValidateException.from_api_exception(e, self.nb_slug)
351
+ # if not stt:
352
+ # return False
353
+ return True
354
+
355
+ def is_valid(self):
356
+ for username in self.acc_dict.keys():
357
+ secrets = self.acc_dict[username]
358
+ api = KaggleApiWrapper(username=username, secret=secrets)
359
+ api.authenticate()
360
+ if not self.is_valid_with_acc(api):
361
+ return False
362
+ return True
363
+
364
+ def acc_check_and_rerun_if_need(self, api: KaggleApi) -> bool:
365
+ """
366
+ :return:
367
+ True if rerun success or notebook is running
368
+ False user does not have enough gpu quotas
369
+ :raises
370
+ Exception if setup error
371
+ """
372
+ notebook = KaggleNotebook(api, self.nb_slug, "./tmp", id=self.id) # todo: change hardcode container_path here
373
+
374
+ notebook.pull()
375
+ assert notebook.check_datasets_permission(), f"User {api} does not have permission on datasets of notebook {self.nb_slug}"
376
+ success, status1 = notebook.check_nb_permission()
377
+ assert success, f"User {api} does not have permission on notebook {self.nb_slug}" # todo: using api.username
378
+
379
+ if status1 in self.rerun_stt:
380
+ status2 = notebook.push()
381
+ time.sleep(10)
382
+ status3 = notebook.status()
383
+
384
+ # if 3 times same stt -> acc out of quota
385
+ if status1 == status2 == status3:
386
+ sheet_logger.log(username=api.username, nb=self.nb_slug, log="Try but no effect. Seem account to be out of quota")
387
+ return False
388
+ if status3 in self.not_rerun_stt:
389
+ # sheet_logger.log(username=api.username, nb=self.nb_slug, log=f"Notebook status is {status3} is in ignore status list {self.not_rerun_stt}, do nothing!")
390
+ sheet_logger.log(username=api.username, nb=self.nb_slug,
391
+ log=f"Schedule notebook successfully. Current status is '{status3}'")
392
+ return True
393
+ if status3 not in ["queued", "running"]:
394
+ # return False # todo: check when user is out of quota
395
+ print(f"Error: status is {status3}")
396
+
397
+ raise Exception("Setup exception")
398
+
399
+ return True
400
+
401
+ sheet_logger.log(username=api.username, nb=self.nb_slug, log=f"Notebook status is '{status1}' is not in {self.rerun_stt}, do nothing!")
402
+ return True
403
+
404
+ @staticmethod
405
+ def from_dict(obj: dict, id=None):
406
+ return NbJob(acc_dict=obj['accounts'], nb_slug=obj['slug'], rerun_stt=obj.get('rerun_status'), not_rerun_stt=obj.get('not_rerun_stt'), id=id)
407
+
408
+
409
+ class KernelRerunService:
410
+ def __init__(self):
411
+ self.jobs: Dict[str, NbJob] = {}
412
+ self.acc_manager = AccountTransactionManager()
413
+ self.username2jobid = {}
414
+ self.jobid2username = {}
415
+
416
+ def add_job(self, nb_job: NbJob):
417
+ if nb_job.nb_slug in self.jobs.keys():
418
+ print("Warn: nb_job already in job list")
419
+ return
420
+ self.jobs[nb_job.nb_slug] = nb_job
421
+ self.jobid2username[nb_job.nb_slug] = nb_job.get_username_list()
422
+ for username in nb_job.get_username_list():
423
+ if username not in self.username2jobid.keys():
424
+ self.username2jobid[username] = []
425
+ self.acc_manager.add_account(username, nb_job.acc_dict[username])
426
+ self.username2jobid[username].append(nb_job.nb_slug)
427
+
428
+ def remove_job(self, nb_job):
429
+ if nb_job.nb_slug not in self.jobs.keys():
430
+ print("Warn: try to remove nb_job not in list")
431
+ return
432
+ username_list = self.jobid2username[nb_job.nb_slug]
433
+ username_list = [username for username in username_list if len(self.username2jobid[username]) == 1]
434
+
435
+ for username in username_list:
436
+ del self.username2jobid[username]
437
+ self.acc_manager.remove_account(username)
438
+ del self.jobs[nb_job.nb_slug]
439
+ del self.jobid2username[nb_job.nb_slug]
440
+
441
+ def validate_all(self):
442
+ for username in self.acc_manager.acc_secret_dict.keys():
443
+ api, release = self.acc_manager.get_unlocked_api_unblocking([username])
444
+ api.authenticate()
445
+ print(f"Using username: {api.username}")
446
+
447
+ for job in self.jobs.values():
448
+ ex_msg_list = []
449
+ if username in job.get_username_list():
450
+ print(f"Validate user: {username}, job: {job.nb_slug}")
451
+
452
+ try:
453
+ job.is_valid_with_acc(api)
454
+ except ValidateException as e:
455
+ print(f"Error: not valid")
456
+ a = f"Setup error: {username} does not have permission on notebook {job.nb_slug} or related datasets"
457
+ if job.id is not None: # if have id, write log
458
+ ex_msg_list.append(f"Account {username}\n" + str(e) + "\n")
459
+ else: # if not have id, raise
460
+ raise Exception(a)
461
+ if len(ex_msg_list) > 0:
462
+ sheet_logger.update_job_status(job.id, validate_status="\n".join(ex_msg_list))
463
+ else:
464
+ sheet_logger.update_job_status(job.id, validate_status="success")
465
+
466
+ release()
467
+ return True
468
+
469
+ def status_all(self):
470
+ for job in self.jobs.values():
471
+ print(f"Job: {job.nb_slug}")
472
+ api, release = self.acc_manager.get_unlocked_api_unblocking(job.get_username_list())
473
+ api.authenticate()
474
+ print(f"Using username: {api.username}")
475
+ notebook = KaggleNotebook(api, job.nb_slug, id=job.id)
476
+ print(f"Notebook: {notebook.kernel_slug}")
477
+ print(notebook.status())
478
+ release()
479
+
480
+ def run(self, nb_job: NbJob):
481
+ username_list = copy.copy(nb_job.get_username_list())
482
+ while len(username_list) > 0:
483
+ api, release = self.acc_manager.get_unlocked_api_unblocking(username_list)
484
+ api.authenticate()
485
+ print(f"Using username: {api.username}")
486
+
487
+ try:
488
+ result = nb_job.acc_check_and_rerun_if_need(api)
489
+ if result:
490
+ return True
491
+ except Exception as e:
492
+ print(e)
493
+ release()
494
+ break
495
+
496
+ if api.username in username_list:
497
+ username_list.remove(api.username)
498
+ release()
499
+ else:
500
+ release()
501
+ raise Exception("")
502
+ return False
503
+
504
+ def run_all(self):
505
+ for job in self.jobs.values():
506
+ success = self.run(job)
507
+ print(f"Job: {job.nb_slug} {success}")
508
+
509
+
510
+ # if __name__ == "__main__":
511
+ # service = KernelRerunService()
512
+ # files = os.listdir("./config")
513
+ # for file in files:
514
+ # if '.example' not in file:
515
+ # with open(os.path.join("./config", file), "r") as f:
516
+ # obj = json.loads(f.read())
517
+ # print(obj)
518
+ # service.add_job(NbJob.from_dict(obj))
519
+ # service.run_all()
520
+
521
+ # try:
522
+ # acc_secret_dict = {
523
+ # "hahunavth": "secret",
524
+ # "hahunavth2": "secret",
525
+ # "hahunavth3": "secret",
526
+ # "hahunavth4": "secret",
527
+ # "hahunavth5": "secret",
528
+ # }
529
+ # acc_manager = AccountTransactionManager(acc_secret_dict)
530
+ #
531
+ #
532
+ # def test1():
533
+ # username_list = ["hahunavth", "hahunavth2", "hahunavth3", "hahunavth4", "hahunavth5"]
534
+ # while len(username_list) > 0:
535
+ # api, release = acc_manager.get_unlocked_api_unblocking(username_list)
536
+ # print("test1 is using " + api.username)
537
+ # time.sleep(1)
538
+ # release()
539
+ # if api.username in username_list:
540
+ # username_list.remove(api.username)
541
+ # else:
542
+ # raise Exception("")
543
+ # print("test1 release " + api.username)
544
+ #
545
+ #
546
+ # def test2():
547
+ # username_list = ["hahunavth2", "hahunavth3", "hahunavth5"]
548
+ # while len(username_list) > 0:
549
+ # api, release = acc_manager.get_unlocked_api_unblocking(username_list)
550
+ # print("test2 is using " + api.username)
551
+ # time.sleep(3)
552
+ # release()
553
+ # if api.username in username_list:
554
+ # username_list.remove(api.username)
555
+ # else:
556
+ # raise Exception("")
557
+ # print("test2 release " + api.username)
558
+ #
559
+ #
560
+ # t1 = threading.Thread(target=test1)
561
+ # t2 = threading.Thread(target=test2)
562
+ # t1.start()
563
+ # t2.start()
564
+ # t1.join()
565
+ # t2.join()
566
+ #
567
+ # # kgapi = KaggleApiWrapper("hahunavth", "fb3d65ea4d06f91a83cf571e9a39d40d")
568
+ # # kgapi.authenticate()
569
+ # # # kgapi = get_api()
570
+ # # notebook = KaggleNotebook(kgapi, "hahunavth/ess-vlsp2023-denoising", "./tmp")
571
+ # # # print(notebook.pull())
572
+ # # # print(notebook.check_datasets_permission())
573
+ # # print(notebook.check_nb_permission())
574
+ # # # print(notebook.status())
575
+ # # # notebook.push()
576
+ # # # print(notebook.status())
577
+ # except ApiException as e:
578
+ # print(e.status)
579
+ # print(e.reason)
580
+ # raise e
581
+ # # 403 when nb not exists or not share to acc
582
+ # # 404 when push to unknow kenel_slug.username
583
  # # 401 when invalid username, pass
logger.py CHANGED
@@ -1,53 +1,59 @@
1
- import platform,socket,re,uuid,json,psutil,logging
2
- from datetime import datetime as dt
3
- from google_sheet import log_repo, conf_repo, SheetCRUDRepository
4
- from version import VERSION
5
-
6
-
7
- def get_sys_info():
8
- try:
9
- info={}
10
- info['platform']=platform.system()
11
- info['platform-release']=platform.release()
12
- info['platform-version']=platform.version()
13
- info['architecture']=platform.machine()
14
- info['hostname']=socket.gethostname()
15
- info['ip-address']=socket.gethostbyname(socket.gethostname())
16
- info['mac-address']=':'.join(re.findall('..', '%012x' % uuid.getnode()))
17
- info['processor']=platform.processor()
18
- info['ram']=str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB"
19
- return json.dumps(info)
20
- except Exception as e:
21
- logging.exception(e)
22
-
23
-
24
- class SheetLogger:
25
- def __init__(self, log_repo: SheetCRUDRepository, config_repo: SheetCRUDRepository):
26
- self.log_repo = log_repo
27
- self.config_repo = config_repo
28
-
29
- def log(self, log='', nb='', username=''):
30
- self.log_repo.create({
31
- "time": str(dt.now()),
32
- "notebook_name": nb,
33
- "kaggle_username": username,
34
- "log": log,
35
- "device": str(get_sys_info()),
36
- "version": VERSION
37
- })
38
-
39
- def update_job_status(self, row, validate_status: str = None, notebook_status: str = None):
40
- data = self.config_repo.read(row)
41
- data.update({"last_updated": str(dt.now())})
42
- if validate_status is not None:
43
- data.update({"validate_status": validate_status})
44
- if notebook_status is not None:
45
- data.update({"notebook_status": notebook_status})
46
- self.config_repo.update(row, data)
47
- # print(self.config_repo.find({"config": "hahunavth/vlsp-sv-2023-s2pecnet-train"}))
48
-
49
-
50
- sheet_logger = SheetLogger(log_repo, conf_repo)
51
-
52
- if __name__ == "__main__":
 
 
 
 
 
 
53
  sheet_logger.update_job_status(5, "abc" , )
 
1
+ import platform,socket,re,uuid,json,psutil,logging
2
+ from datetime import datetime as dt
3
+ from google_sheet import log_repo, conf_repo, SheetCRUDRepository
4
+ from version import VERSION
5
+ import pytz
6
+
7
+
8
+ def get_now():
9
+ current_time = dt.now(pytz.timezone('Asia/Ho_Chi_Minh'))
10
+ return current_time
11
+
12
+
13
+ def get_sys_info():
14
+ try:
15
+ info={}
16
+ info['platform']=platform.system()
17
+ info['platform-release']=platform.release()
18
+ info['platform-version']=platform.version()
19
+ info['architecture']=platform.machine()
20
+ info['hostname']=socket.gethostname()
21
+ info['ip-address']=socket.gethostbyname(socket.gethostname())
22
+ info['mac-address']=':'.join(re.findall('..', '%012x' % uuid.getnode()))
23
+ info['processor']=platform.processor()
24
+ info['ram']=str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB"
25
+ return json.dumps(info)
26
+ except Exception as e:
27
+ logging.exception(e)
28
+
29
+
30
+ class SheetLogger:
31
+ def __init__(self, log_repo: SheetCRUDRepository, config_repo: SheetCRUDRepository):
32
+ self.log_repo = log_repo
33
+ self.config_repo = config_repo
34
+
35
+ def log(self, log='', nb='', username=''):
36
+ self.log_repo.create({
37
+ "time": str(get_now()),
38
+ "notebook_name": nb,
39
+ "kaggle_username": username,
40
+ "log": log,
41
+ "device": str(get_sys_info()),
42
+ "version": VERSION
43
+ })
44
+
45
+ def update_job_status(self, row, validate_status: str = None, notebook_status: str = None):
46
+ data = self.config_repo.read(row)
47
+ data.update({"last_updated": str(get_now())})
48
+ if validate_status is not None:
49
+ data.update({"validate_status": validate_status})
50
+ if notebook_status is not None:
51
+ data.update({"notebook_status": notebook_status})
52
+ self.config_repo.update(row, data)
53
+ # print(self.config_repo.find({"config": "hahunavth/vlsp-sv-2023-s2pecnet-train"}))
54
+
55
+
56
+ sheet_logger = SheetLogger(log_repo, conf_repo)
57
+
58
+ if __name__ == "__main__":
59
  sheet_logger.update_job_status(5, "abc" , )
main.py CHANGED
@@ -1,99 +1,109 @@
1
- from google_sheet import conf_repo, secret_repo
2
- import json
3
-
4
- from kaggle_service import KernelRerunService, NbJob
5
- from logger import sheet_logger
6
-
7
-
8
- if __name__ == "__main__":
9
- sheet_logger.log("========= start ==========")
10
-
11
- # load all account and secret into a dict
12
- secret_dict = {}
13
- try:
14
- for i in range(2, 50): # note: read row 2 - 50
15
- rs = secret_repo.read(i)
16
- if not rs:
17
- break
18
- assert 'username' in rs
19
- assert 'secret' in rs
20
-
21
- username = rs['username'].rstrip()
22
- secret = rs['secret'].rstrip()
23
-
24
- secret_dict[username] = secret
25
- except Exception as e:
26
- sheet_logger.log(log="Get secret table failed!!" + str(e))
27
-
28
- # load config from gg sheet file
29
- configs = []
30
- ids = []
31
- try:
32
- for i in range(2, 50): # note: read row 2 - 50
33
- rs = conf_repo.read(i)
34
- if not rs:
35
- break
36
- if not rs['config']:
37
- reset_keys = []
38
- for key in rs:
39
- if rs[key]:
40
- reset_keys.append(key)
41
- if len(reset_keys) > 0:
42
- conf_repo.update(row_index=i, data={k: '' for k in reset_keys})
43
- break
44
- print(i, rs)
45
- try:
46
- # validation
47
- # assert 'config' in rs, 'require config column'
48
- assert 'usernames' in rs, 'require usernames column'
49
- assert 'enable' in rs, 'require enable column'
50
-
51
- assert rs['enable'].rstrip() in ['enable', 'disable'], rs['enable']
52
- assert 'slug' in rs['config'], 'require config.slug'
53
-
54
- cfg = json.loads(rs['config'])
55
- usernames = set(rs['usernames']
56
- .rstrip().replace('\n', ' ').replace(',', ' ').replace(';', ' ').replace('|', ' ')
57
- .split(' '))
58
- usernames = [u for u in usernames if u]
59
- is_enabled = rs['enable'].rstrip() == 'enable'
60
- if not is_enabled:
61
- sheet_logger.log(log="Disabled, skip!", nb=cfg['slug'])
62
- print("skip ", cfg['slug'])
63
- continue
64
-
65
- assert len(usernames) > 0, 'len usernames == 0'
66
-
67
- # process config
68
- accounts = {u: secret_dict[u] for u in secret_dict if u in usernames}
69
- assert not set(usernames).difference(set(accounts.keys())), set(usernames).difference(set(accounts.keys()))
70
- cfg = {**cfg, "accounts": accounts}
71
-
72
- # save
73
- configs.append(cfg)
74
- ids.append(i)
75
- print(cfg)
76
- except AssertionError:
77
- import traceback
78
- sheet_logger.update_job_status(i, validate_status=str(traceback.format_exc()))
79
- except Exception:
80
- import traceback
81
- sheet_logger.log(log="Get config failed!!" + str(traceback.format_exc()))
82
-
83
- # add config to service
84
- service = KernelRerunService()
85
- for config, idx in zip(configs, ids):
86
- try:
87
- service.add_job(NbJob.from_dict(config, id=idx))
88
- except Exception as e:
89
- sheet_logger.update_job_status(idx, validate_status=str(e))
90
-
91
- # run service
92
- try:
93
- service.validate_all()
94
- # service.status_all()
95
- service.run_all()
96
- except Exception as e:
97
- sheet_logger.log(log=str(e))
98
-
99
- sheet_logger.log("========= end ==========")
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Annotated
2
+
3
+ from apscheduler.schedulers.background import BackgroundScheduler
4
+ from fastapi.encoders import jsonable_encoder
5
+ from fastapi.exceptions import RequestValidationError
6
+ from starlette.middleware.cors import CORSMiddleware
7
+ from fastapi import FastAPI, Header, UploadFile, Depends, HTTPException, status
8
+ import base64
9
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
10
+ from starlette.responses import JSONResponse
11
+ from collections import defaultdict
12
+ from pydantic import BaseModel
13
+ from threading import Lock
14
+
15
+ from logger import get_now
16
+ from run import main as run_main
17
+
18
+ START_AT = get_now()
19
+ app = FastAPI()
20
+
21
+ lock = Lock()
22
+ n_run = 0
23
+ last_run = None
24
+ is_running=False
25
+ def scheduled_job():
26
+
27
+ with lock:
28
+ global is_running
29
+ if is_running:
30
+ return False
31
+ is_running = True
32
+
33
+ print("Job is running!")
34
+ run_main()
35
+
36
+ with lock:
37
+ global n_run
38
+ n_run = n_run + 1
39
+ global last_run
40
+ last_run = get_now()
41
+ is_running = False
42
+ return True
43
+
44
+ # Create a scheduler
45
+ scheduler = BackgroundScheduler()
46
+
47
+ # Add the scheduled job to the scheduler
48
+ scheduler.add_job(scheduled_job, 'interval', minutes=30)
49
+
50
+ # Start the scheduler
51
+ scheduler.start()
52
+
53
+ # You can also stop the scheduler when the FastAPI application shuts down
54
+ @app.on_event("shutdown")
55
+ def shutdown_event():
56
+ scheduler.shutdown()
57
+
58
+
59
+ class BaseResponse(BaseModel):
60
+ status: int = 1
61
+ message: str = ""
62
+ result: object = None
63
+
64
+ @app.exception_handler(HTTPException)
65
+ async def http_exception_handler(request, exc: HTTPException):
66
+ return JSONResponse(
67
+ status_code=status.HTTP_400_BAD_REQUEST,
68
+ content=jsonable_encoder(BaseResponse(status=0, message=exc.detail))
69
+ )
70
+
71
+ @app.exception_handler(RequestValidationError)
72
+ def validation_exception_handler(request, exc: RequestValidationError) -> JSONResponse:
73
+ reformatted_message = defaultdict(list)
74
+ for pydantic_error in exc.errors():
75
+ loc, msg = pydantic_error["loc"], pydantic_error["msg"]
76
+ filtered_loc = loc[1:] if loc[0] in ("body", "query", "path") else loc
77
+ field_string = ".".join(filtered_loc)
78
+ reformatted_message[field_string].append(msg)
79
+
80
+ return JSONResponse(
81
+ status_code=status.HTTP_400_BAD_REQUEST,
82
+ content=jsonable_encoder(BaseResponse(status=0, message="Invalid request", result=reformatted_message))
83
+ )
84
+
85
+ @app.get("/status", response_model=BaseResponse)
86
+ def status():
87
+ return BaseResponse(result={
88
+ "start_at": START_AT,
89
+ "current": get_now(),
90
+ "n_runs": n_run,
91
+ "last_run": last_run,
92
+ })
93
+
94
+ @app.get("/run")
95
+ def run_once():
96
+ print("Running the job once.")
97
+ success = scheduled_job() # Manually trigger the job
98
+ if not success:
99
+ return BaseResponse(message="Job is running, not start a new job")
100
+ return BaseResponse(message="Job executed once.")
101
+
102
+
103
+ app.add_middleware(
104
+ CORSMiddleware,
105
+ allow_origins=["*"],
106
+ allow_credentials=True,
107
+ allow_methods=["*"],
108
+ allow_headers=["*"],
109
+ )
requirements.txt CHANGED
@@ -1,3 +1,8 @@
1
- kaggle
2
- gspread
3
- oauth2client
 
 
 
 
 
 
1
+ kaggle
2
+ gspread
3
+ oauth2client
4
+ pytz
5
+ apscheduler
6
+ fastapi
7
+ starlette
8
+ pydantic
run.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google_sheet import conf_repo, secret_repo
2
+ import json
3
+
4
+ from kaggle_service import KernelRerunService, NbJob
5
+ from logger import sheet_logger, get_now
6
+ from run_stt import run_stt_service
7
+
8
+
9
+ def get_secret_dict():
10
+ # load all account and secret into a dict
11
+ secret_dict = {}
12
+ try:
13
+ for i in range(2, 50): # note: read row 2 - 50
14
+ rs = secret_repo.read(i)
15
+ if not rs:
16
+ break
17
+ assert 'username' in rs
18
+ assert 'secret' in rs
19
+
20
+ username = rs['username'].rstrip()
21
+ secret = rs['secret'].rstrip()
22
+
23
+ secret_dict[username] = secret
24
+ except Exception as e:
25
+ sheet_logger.log(log="Get secret table failed!!" + str(e))
26
+ return secret_dict
27
+
28
+
29
+ def get_config_list(secret_dict):
30
+ configs = []
31
+ ids = []
32
+ try:
33
+ for i in range(2, 50): # note: read row 2 - 50
34
+ rs = conf_repo.read(i)
35
+ if not rs:
36
+ break
37
+ if not rs['config']:
38
+ reset_keys = []
39
+ for key in rs:
40
+ if rs[key]:
41
+ reset_keys.append(key)
42
+ if len(reset_keys) > 0:
43
+ conf_repo.update(row_index=i, data={k: '' for k in reset_keys})
44
+ break
45
+ print(i, rs)
46
+ try:
47
+ # validation
48
+ # assert 'config' in rs, 'require config column'
49
+ assert 'usernames' in rs, 'require usernames column'
50
+ assert 'enable' in rs, 'require enable column'
51
+
52
+ assert rs['enable'].rstrip() in ['enable', 'disable'], rs['enable']
53
+ assert 'slug' in rs['config'], 'require config.slug'
54
+
55
+ cfg = json.loads(rs['config'])
56
+ usernames = set(rs['usernames']
57
+ .rstrip().replace('\n', ' ').replace(',', ' ').replace(';', ' ').replace('|', ' ')
58
+ .split(' '))
59
+ usernames = [u for u in usernames if u]
60
+ is_enabled = rs['enable'].rstrip() == 'enable'
61
+ if not is_enabled:
62
+ sheet_logger.log(log="Disabled, skip!", nb=cfg['slug'])
63
+ print("skip ", cfg['slug'])
64
+ continue
65
+
66
+ assert len(usernames) > 0, 'len usernames == 0'
67
+
68
+ # process config
69
+ accounts = {u: secret_dict[u] for u in secret_dict if u in usernames}
70
+ assert not set(usernames).difference(set(accounts.keys())), set(usernames).difference(set(accounts.keys()))
71
+ cfg = {**cfg, "accounts": accounts}
72
+
73
+ # save
74
+ configs.append(cfg)
75
+ ids.append(i)
76
+ print(cfg)
77
+ except AssertionError:
78
+ import traceback
79
+ sheet_logger.update_job_status(i, validate_status=str(traceback.format_exc()))
80
+ except Exception:
81
+ import traceback
82
+ sheet_logger.log(log="Get config failed!!" + str(traceback.format_exc()))
83
+ return configs, ids
84
+
85
+
86
+ def create_service(configs, ids):
87
+ service = KernelRerunService()
88
+ for config, idx in zip(configs, ids):
89
+ try:
90
+ service.add_job(NbJob.from_dict(config, id=idx))
91
+ except Exception as e:
92
+ sheet_logger.update_job_status(idx, validate_status=str(e))
93
+ return service
94
+
95
+
96
+ def main():
97
+ sheet_logger.log("========= start ==========")
98
+
99
+ obj = run_stt_service.get_obj()
100
+ if run_stt_service.is_need_to_run(obj):
101
+ # start
102
+ run_stt_service.set_is_running({})
103
+
104
+ secret_dict = get_secret_dict()
105
+
106
+ # load config from gg sheet file
107
+ configs, ids = get_config_list(secret_dict)
108
+
109
+ # add config to service
110
+ service = create_service(configs, ids)
111
+
112
+ # run service
113
+ try:
114
+ service.validate_all()
115
+ # service.status_all()
116
+ service.run_all()
117
+ except Exception as e:
118
+ sheet_logger.log(log=str(e))
119
+
120
+ # done
121
+ run_stt_service.set_run_done()
122
+
123
+ else:
124
+ sheet_logger.log(f"Current time is {get_now()} is not after {obj.get('last_run')} {obj.get('auto_run_after_last_run')} or not require new run")
125
+
126
+ sheet_logger.log("========= end ==========")
127
+
128
+
129
+ if __name__ == "__main__":
130
+ main()
run_stt.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google_sheet import run_stt_repo as _run_stt_repo, Converter
2
+ from logger import get_now
3
+ from dateutil import parser
4
+ import datetime as dt
5
+
6
+
7
+ class RunSttService:
8
+ def __init__(self, run_stt_repo):
9
+ self.run_stt_repo = run_stt_repo
10
+
11
+ def get_row(self):
12
+ config = self.run_stt_repo.read(2)
13
+ return config
14
+
15
+ def get_obj(self):
16
+ return Converter.convert_to_obj(self.get_row())
17
+
18
+ def is_need_to_run(self, obj):
19
+ is_running = obj['is_running']
20
+ if is_running:
21
+ return False
22
+ if obj['require_new_run']:
23
+ return True
24
+ last_run = parser.parse(obj['last_run'])
25
+ duration = obj['auto_run_after_last_run']
26
+ if not last_run:
27
+ return True
28
+ if not duration:
29
+ return False
30
+ # print(duration)
31
+ if get_now() - last_run > dt.timedelta(**duration):
32
+ return True
33
+ return False
34
+
35
+ def get_const_cfg(self, const_keys=['require_new_run', 'auto_run_after_last_run']):
36
+ row = self.get_row()
37
+ row = {k: row[k] for k in row if k in const_keys}
38
+ return row
39
+
40
+ def set_is_running(self, obj):
41
+ obj['last_run'] = str(get_now())
42
+ obj['is_running'] = True
43
+ obj = {**obj, **self.get_const_cfg()}
44
+ row = Converter.convert_to_row(obj)
45
+ print(row)
46
+ self.run_stt_repo.update(row_index=2, data=row)
47
+
48
+ def set_run_done(self):
49
+ obj = {
50
+ 'is_running': False,
51
+ "require_new_run": False,
52
+ **self.get_const_cfg(['auto_run_after_last_run', 'last_run'])
53
+ }
54
+ row = Converter.convert_to_row(obj)
55
+ self.run_stt_repo.update(row_index=2, data=row)
56
+
57
+
58
+ run_stt_service = RunSttService(_run_stt_repo)
59
+
60
+
61
+ # if __name__ == '__main__':
62
+ # run_stt_service = RunSttService(_run_stt_repo)
63
+ # obj = run_stt_service.get_obj()
64
+ # print(run_stt_service.is_need_to_run(obj))
65
+ # run_stt_service.set_is_running({})
66
+ # import time
67
+ # time.sleep(10)
68
+ # run_stt_service.set_run_done()
test.ipynb CHANGED
@@ -1,160 +1,160 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "initial_id",
7
- "metadata": {
8
- "collapsed": true,
9
- "ExecuteTime": {
10
- "end_time": "2023-11-05T00:47:48.131663100Z",
11
- "start_time": "2023-11-05T00:47:35.527891600Z"
12
- }
13
- },
14
- "outputs": [
15
- {
16
- "name": "stdout",
17
- "output_type": "stream",
18
- "text": [
19
- "Collecting kaggle\r\n",
20
- " Downloading kaggle-1.5.16.tar.gz (83 kB)\r\n",
21
- "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m83.6/83.6 kB\u001B[0m \u001B[31m444.7 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m0:01\u001B[0m00:01\u001B[0m\r\n",
22
- "\u001B[?25h Preparing metadata (setup.py) ... \u001B[?25ldone\r\n",
23
- "\u001B[?25hRequirement already satisfied: six>=1.10 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (1.16.0)\r\n",
24
- "Requirement already satisfied: certifi in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2023.7.22)\r\n",
25
- "Requirement already satisfied: python-dateutil in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.8.2)\r\n",
26
- "Requirement already satisfied: requests in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.31.0)\r\n",
27
- "Collecting tqdm (from kaggle)\r\n",
28
- " Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\r\n",
29
- "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m57.6/57.6 kB\u001B[0m \u001B[31m332.3 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m \u001B[36m0:00:01\u001B[0m\r\n",
30
- "\u001B[?25hCollecting python-slugify (from kaggle)\r\n",
31
- " Downloading python_slugify-8.0.1-py2.py3-none-any.whl (9.7 kB)\r\n",
32
- "Requirement already satisfied: urllib3 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.0.7)\r\n",
33
- "Requirement already satisfied: bleach in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (6.1.0)\r\n",
34
- "Requirement already satisfied: webencodings in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from bleach->kaggle) (0.5.1)\r\n",
35
- "Collecting text-unidecode>=1.3 (from python-slugify->kaggle)\r\n",
36
- " Downloading text_unidecode-1.3-py2.py3-none-any.whl (78 kB)\r\n",
37
- "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m78.2/78.2 kB\u001B[0m \u001B[31m944.9 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0ma \u001B[36m0:00:01\u001B[0m\r\n",
38
- "\u001B[?25hRequirement already satisfied: charset-normalizer<4,>=2 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from requests->kaggle) (3.3.1)\r\n",
39
- "Requirement already satisfied: idna<4,>=2.5 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from requests->kaggle) (3.4)\r\n",
40
- "Downloading tqdm-4.66.1-py3-none-any.whl (78 kB)\r\n",
41
- "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m78.3/78.3 kB\u001B[0m \u001B[31m748.5 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m \u001B[36m0:00:01\u001B[0m\r\n",
42
- "\u001B[?25hBuilding wheels for collected packages: kaggle\r\n",
43
- " Building wheel for kaggle (setup.py) ... \u001B[?25ldone\r\n",
44
- "\u001B[?25h Created wheel for kaggle: filename=kaggle-1.5.16-py3-none-any.whl size=110683 sha256=fbd426d35c51820adb030ee423c20d997221fd1cdb7093a4b6af3afa94d170f3\r\n",
45
- " Stored in directory: /home/kryo/.cache/pip/wheels/43/4b/fb/736478af5e8004810081a06259f9aa2f7c3329fc5d03c2c412\r\n",
46
- "Successfully built kaggle\r\n",
47
- "Installing collected packages: text-unidecode, tqdm, python-slugify, kaggle\r\n",
48
- "Successfully installed kaggle-1.5.16 python-slugify-8.0.1 text-unidecode-1.3 tqdm-4.66.1\r\n"
49
- ]
50
- }
51
- ],
52
- "source": [
53
- "!pip install kaggle"
54
- ]
55
- },
56
- {
57
- "cell_type": "code",
58
- "execution_count": null,
59
- "outputs": [],
60
- "source": [],
61
- "metadata": {
62
- "collapsed": false
63
- },
64
- "id": "55f5e4a1d2ec2ecc"
65
- },
66
- {
67
- "cell_type": "code",
68
- "execution_count": 4,
69
- "outputs": [
70
- {
71
- "name": "stdout",
72
- "output_type": "stream",
73
- "text": [
74
- "\u001B[31mERROR: Could not find a version that satisfies the requirement kaggle_api_extended (from versions: none)\u001B[0m\u001B[31m\r\n",
75
- "\u001B[0m\u001B[31mERROR: No matching distribution found for kaggle_api_extended\u001B[0m\u001B[31m\r\n",
76
- "\u001B[0m"
77
- ]
78
- }
79
- ],
80
- "source": [],
81
- "metadata": {
82
- "collapsed": false,
83
- "ExecuteTime": {
84
- "end_time": "2023-11-05T00:51:45.918622100Z",
85
- "start_time": "2023-11-05T00:51:45.022051100Z"
86
- }
87
- },
88
- "id": "28cea8fd0c172fff"
89
- },
90
- {
91
- "cell_type": "markdown",
92
- "source": [
93
- "## merge files"
94
- ],
95
- "metadata": {
96
- "collapsed": false
97
- },
98
- "id": "fd44ed9dee4275cf"
99
- },
100
- {
101
- "cell_type": "code",
102
- "execution_count": 6,
103
- "outputs": [
104
- {
105
- "name": "stdout",
106
- "output_type": "stream",
107
- "text": [
108
- "Files merged successfully!\n"
109
- ]
110
- }
111
- ],
112
- "source": [
113
- "import shutil\n",
114
- "from pathlib import Path\n",
115
- "try:\n",
116
- " with open(Path('merged_file.py'), 'wb') as destination_file:\n",
117
- " with open(Path('google_sheet.py'), 'rb') as file1:\n",
118
- " shutil.copyfileobj(file1, destination_file)\n",
119
- " with open(Path('logger.py'), 'rb') as file2:\n",
120
- " shutil.copyfileobj(file2, destination_file)\n",
121
- " with open(Path('kaggle_service.py'), 'rb') as file3:\n",
122
- " shutil.copyfileobj(file3, destination_file)\n",
123
- " with open(Path('main.py'), 'rb') as file4:\n",
124
- " shutil.copyfileobj(file4, destination_file)\n",
125
- " print(\"Files merged successfully!\")\n",
126
- "except :\n",
127
- " raise Exception(\"Cannot merge files!\")"
128
- ],
129
- "metadata": {
130
- "collapsed": false,
131
- "ExecuteTime": {
132
- "end_time": "2023-11-05T02:15:34.843461500Z",
133
- "start_time": "2023-11-05T02:15:34.781001200Z"
134
- }
135
- },
136
- "id": "2568f49413e2057c"
137
- }
138
- ],
139
- "metadata": {
140
- "kernelspec": {
141
- "display_name": "Python 3",
142
- "language": "python",
143
- "name": "python3"
144
- },
145
- "language_info": {
146
- "codemirror_mode": {
147
- "name": "ipython",
148
- "version": 2
149
- },
150
- "file_extension": ".py",
151
- "mimetype": "text/x-python",
152
- "name": "python",
153
- "nbconvert_exporter": "python",
154
- "pygments_lexer": "ipython2",
155
- "version": "2.7.6"
156
- }
157
- },
158
- "nbformat": 4,
159
- "nbformat_minor": 5
160
- }
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "initial_id",
7
+ "metadata": {
8
+ "collapsed": true,
9
+ "ExecuteTime": {
10
+ "end_time": "2023-11-05T00:47:48.131663100Z",
11
+ "start_time": "2023-11-05T00:47:35.527891600Z"
12
+ }
13
+ },
14
+ "outputs": [
15
+ {
16
+ "name": "stdout",
17
+ "output_type": "stream",
18
+ "text": [
19
+ "Collecting kaggle\r\n",
20
+ " Downloading kaggle-1.5.16.tar.gz (83 kB)\r\n",
21
+ "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m83.6/83.6 kB\u001B[0m \u001B[31m444.7 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m0:01\u001B[0m00:01\u001B[0m\r\n",
22
+ "\u001B[?25h Preparing metadata (setup.py) ... \u001B[?25ldone\r\n",
23
+ "\u001B[?25hRequirement already satisfied: six>=1.10 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (1.16.0)\r\n",
24
+ "Requirement already satisfied: certifi in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2023.7.22)\r\n",
25
+ "Requirement already satisfied: python-dateutil in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.8.2)\r\n",
26
+ "Requirement already satisfied: requests in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.31.0)\r\n",
27
+ "Collecting tqdm (from kaggle)\r\n",
28
+ " Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\r\n",
29
+ "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m57.6/57.6 kB\u001B[0m \u001B[31m332.3 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m \u001B[36m0:00:01\u001B[0m\r\n",
30
+ "\u001B[?25hCollecting python-slugify (from kaggle)\r\n",
31
+ " Downloading python_slugify-8.0.1-py2.py3-none-any.whl (9.7 kB)\r\n",
32
+ "Requirement already satisfied: urllib3 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.0.7)\r\n",
33
+ "Requirement already satisfied: bleach in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (6.1.0)\r\n",
34
+ "Requirement already satisfied: webencodings in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from bleach->kaggle) (0.5.1)\r\n",
35
+ "Collecting text-unidecode>=1.3 (from python-slugify->kaggle)\r\n",
36
+ " Downloading text_unidecode-1.3-py2.py3-none-any.whl (78 kB)\r\n",
37
+ "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m78.2/78.2 kB\u001B[0m \u001B[31m944.9 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0ma \u001B[36m0:00:01\u001B[0m\r\n",
38
+ "\u001B[?25hRequirement already satisfied: charset-normalizer<4,>=2 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from requests->kaggle) (3.3.1)\r\n",
39
+ "Requirement already satisfied: idna<4,>=2.5 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from requests->kaggle) (3.4)\r\n",
40
+ "Downloading tqdm-4.66.1-py3-none-any.whl (78 kB)\r\n",
41
+ "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m78.3/78.3 kB\u001B[0m \u001B[31m748.5 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m \u001B[36m0:00:01\u001B[0m\r\n",
42
+ "\u001B[?25hBuilding wheels for collected packages: kaggle\r\n",
43
+ " Building wheel for kaggle (setup.py) ... \u001B[?25ldone\r\n",
44
+ "\u001B[?25h Created wheel for kaggle: filename=kaggle-1.5.16-py3-none-any.whl size=110683 sha256=fbd426d35c51820adb030ee423c20d997221fd1cdb7093a4b6af3afa94d170f3\r\n",
45
+ " Stored in directory: /home/kryo/.cache/pip/wheels/43/4b/fb/736478af5e8004810081a06259f9aa2f7c3329fc5d03c2c412\r\n",
46
+ "Successfully built kaggle\r\n",
47
+ "Installing collected packages: text-unidecode, tqdm, python-slugify, kaggle\r\n",
48
+ "Successfully installed kaggle-1.5.16 python-slugify-8.0.1 text-unidecode-1.3 tqdm-4.66.1\r\n"
49
+ ]
50
+ }
51
+ ],
52
+ "source": [
53
+ "!pip install kaggle"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": null,
59
+ "outputs": [],
60
+ "source": [],
61
+ "metadata": {
62
+ "collapsed": false
63
+ },
64
+ "id": "55f5e4a1d2ec2ecc"
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 4,
69
+ "outputs": [
70
+ {
71
+ "name": "stdout",
72
+ "output_type": "stream",
73
+ "text": [
74
+ "\u001B[31mERROR: Could not find a version that satisfies the requirement kaggle_api_extended (from versions: none)\u001B[0m\u001B[31m\r\n",
75
+ "\u001B[0m\u001B[31mERROR: No matching distribution found for kaggle_api_extended\u001B[0m\u001B[31m\r\n",
76
+ "\u001B[0m"
77
+ ]
78
+ }
79
+ ],
80
+ "source": [],
81
+ "metadata": {
82
+ "collapsed": false,
83
+ "ExecuteTime": {
84
+ "end_time": "2023-11-05T00:51:45.918622100Z",
85
+ "start_time": "2023-11-05T00:51:45.022051100Z"
86
+ }
87
+ },
88
+ "id": "28cea8fd0c172fff"
89
+ },
90
+ {
91
+ "cell_type": "markdown",
92
+ "source": [
93
+ "## merge files"
94
+ ],
95
+ "metadata": {
96
+ "collapsed": false
97
+ },
98
+ "id": "fd44ed9dee4275cf"
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 6,
103
+ "outputs": [
104
+ {
105
+ "name": "stdout",
106
+ "output_type": "stream",
107
+ "text": [
108
+ "Files merged successfully!\n"
109
+ ]
110
+ }
111
+ ],
112
+ "source": [
113
+ "import shutil\n",
114
+ "from pathlib import Path\n",
115
+ "try:\n",
116
+ " with open(Path('merged_file.py'), 'wb') as destination_file:\n",
117
+ " with open(Path('google_sheet.py'), 'rb') as file1:\n",
118
+ " shutil.copyfileobj(file1, destination_file)\n",
119
+ " with open(Path('logger.py'), 'rb') as file2:\n",
120
+ " shutil.copyfileobj(file2, destination_file)\n",
121
+ " with open(Path('kaggle_service.py'), 'rb') as file3:\n",
122
+ " shutil.copyfileobj(file3, destination_file)\n",
123
+ " with open(Path('main.py'), 'rb') as file4:\n",
124
+ " shutil.copyfileobj(file4, destination_file)\n",
125
+ " print(\"Files merged successfully!\")\n",
126
+ "except :\n",
127
+ " raise Exception(\"Cannot merge files!\")"
128
+ ],
129
+ "metadata": {
130
+ "collapsed": false,
131
+ "ExecuteTime": {
132
+ "end_time": "2023-11-05T02:15:34.843461500Z",
133
+ "start_time": "2023-11-05T02:15:34.781001200Z"
134
+ }
135
+ },
136
+ "id": "2568f49413e2057c"
137
+ }
138
+ ],
139
+ "metadata": {
140
+ "kernelspec": {
141
+ "display_name": "Python 3",
142
+ "language": "python",
143
+ "name": "python3"
144
+ },
145
+ "language_info": {
146
+ "codemirror_mode": {
147
+ "name": "ipython",
148
+ "version": 2
149
+ },
150
+ "file_extension": ".py",
151
+ "mimetype": "text/x-python",
152
+ "name": "python",
153
+ "nbconvert_exporter": "python",
154
+ "pygments_lexer": "ipython2",
155
+ "version": "2.7.6"
156
+ }
157
+ },
158
+ "nbformat": 4,
159
+ "nbformat_minor": 5
160
+ }
version.py CHANGED
@@ -1 +1 @@
1
- VERSION="v1.0.1"
 
1
+ VERSION="v1.0.1"