Spaces:
Sleeping
Sleeping
hahunavth
commited on
Commit
β’
c3ece9d
1
Parent(s):
2b0538d
add cron server
Browse files- .gitignore +1 -1
- .idea/.gitignore +8 -8
- .idea/kaggle-utils.iml +7 -7
- .idea/misc.xml +6 -6
- .idea/modules.xml +7 -7
- .idea/vcs.xml +5 -5
- Dockerfile +20 -0
- cli.py +50 -50
- config/aasist-train.json +0 -8
- google_sheet.py +125 -62
- kaggle_service.py +582 -582
- logger.py +58 -52
- main.py +109 -99
- requirements.txt +8 -3
- run.py +130 -0
- run_stt.py +68 -0
- test.ipynb +160 -160
- version.py +1 -1
.gitignore
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
tmp
|
2 |
__pycache__
|
|
|
1 |
+
tmp
|
2 |
__pycache__
|
.idea/.gitignore
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
-
# Default ignored files
|
2 |
-
/shelf/
|
3 |
-
/workspace.xml
|
4 |
-
# Editor-based HTTP Client requests
|
5 |
-
/httpRequests/
|
6 |
-
# Datasource local storage ignored files
|
7 |
-
/dataSources/
|
8 |
-
/dataSources.local.xml
|
|
|
1 |
+
# Default ignored files
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
4 |
+
# Editor-based HTTP Client requests
|
5 |
+
/httpRequests/
|
6 |
+
# Datasource local storage ignored files
|
7 |
+
/dataSources/
|
8 |
+
/dataSources.local.xml
|
.idea/kaggle-utils.iml
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
-
<module type="PYTHON_MODULE" version="4">
|
3 |
-
<component name="NewModuleRootManager">
|
4 |
-
<content url="file://$MODULE_DIR$" />
|
5 |
-
<orderEntry type="inheritedJdk" />
|
6 |
-
<orderEntry type="sourceFolder" forTests="false" />
|
7 |
-
</component>
|
8 |
</module>
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$" />
|
5 |
+
<orderEntry type="inheritedJdk" />
|
6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
7 |
+
</component>
|
8 |
</module>
|
.idea/misc.xml
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
-
<project version="4">
|
3 |
-
<component name="Black">
|
4 |
-
<option name="sdkName" value="vlsp2023-tts-api" />
|
5 |
-
</component>
|
6 |
-
<component name="ProjectRootManager" version="2" project-jdk-name="vlsp2023-tts-api" project-jdk-type="Python SDK" />
|
7 |
</project>
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="Black">
|
4 |
+
<option name="sdkName" value="vlsp2023-tts-api" />
|
5 |
+
</component>
|
6 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="vlsp2023-tts-api" project-jdk-type="Python SDK" />
|
7 |
</project>
|
.idea/modules.xml
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
-
<project version="4">
|
3 |
-
<component name="ProjectModuleManager">
|
4 |
-
<modules>
|
5 |
-
<module fileurl="file://$PROJECT_DIR$/.idea/kaggle-utils.iml" filepath="$PROJECT_DIR$/.idea/kaggle-utils.iml" />
|
6 |
-
</modules>
|
7 |
-
</component>
|
8 |
</project>
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/kaggle-utils.iml" filepath="$PROJECT_DIR$/.idea/kaggle-utils.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
</project>
|
.idea/vcs.xml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
-
<project version="4">
|
3 |
-
<component name="VcsDirectoryMappings">
|
4 |
-
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
5 |
-
</component>
|
6 |
</project>
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="VcsDirectoryMappings">
|
4 |
+
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
5 |
+
</component>
|
6 |
</project>
|
Dockerfile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
RUN useradd -m -u 1000 user
|
10 |
+
|
11 |
+
USER user
|
12 |
+
|
13 |
+
ENV HOME=/home/user \
|
14 |
+
PATH=/home/user/.local/bin:$PATH
|
15 |
+
|
16 |
+
WORKDIR $HOME/app
|
17 |
+
|
18 |
+
COPY --chown=user . $HOME/app
|
19 |
+
|
20 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
cli.py
CHANGED
@@ -1,51 +1,51 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
from types import SimpleNamespace
|
4 |
-
|
5 |
-
from kaggle_service import KernelRerunService, NbJob
|
6 |
-
import argparse
|
7 |
-
from logger import sheet_logger
|
8 |
-
|
9 |
-
|
10 |
-
def main(args):
|
11 |
-
if not os.path.exists(args.config):
|
12 |
-
print(f"Config folder not found: {os.path.abspath(args.config)}")
|
13 |
-
exit(1)
|
14 |
-
|
15 |
-
configs = []
|
16 |
-
if os.path.isdir(args.config):
|
17 |
-
files = os.listdir(args.config)
|
18 |
-
for file in files:
|
19 |
-
with open(os.path.join(args.config, file), "r") as f:
|
20 |
-
obj = json.loads(f.read())
|
21 |
-
configs.append(obj)
|
22 |
-
print(obj)
|
23 |
-
elif os.path.isfile(args.config):
|
24 |
-
with open(args.config, "r") as f:
|
25 |
-
obj = json.loads(f.read())
|
26 |
-
configs.append(obj)
|
27 |
-
print(obj)
|
28 |
-
|
29 |
-
service = KernelRerunService()
|
30 |
-
for config in configs:
|
31 |
-
service.add_job(NbJob.from_dict(config))
|
32 |
-
|
33 |
-
if args.option == "run":
|
34 |
-
service.run_all()
|
35 |
-
elif args.option == "validate":
|
36 |
-
service.validate_all()
|
37 |
-
elif args.option == "status":
|
38 |
-
service.status_all()
|
39 |
-
else:
|
40 |
-
print(f"Invalid option: {args.option}")
|
41 |
-
|
42 |
-
|
43 |
-
if __name__ == "__main__":
|
44 |
-
# parser = argparse.ArgumentParser()
|
45 |
-
# parser.add_argument("option", type=str, default="run", choices=["run", "validate", "status"])
|
46 |
-
# parser.add_argument("--config", type=str, default="./config")
|
47 |
-
#
|
48 |
-
# args = parser.parse_args()
|
49 |
-
args = SimpleNamespace(option="validate", config='./config')
|
50 |
-
|
51 |
main(args)
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from types import SimpleNamespace
|
4 |
+
|
5 |
+
from kaggle_service import KernelRerunService, NbJob
|
6 |
+
import argparse
|
7 |
+
from logger import sheet_logger
|
8 |
+
|
9 |
+
|
10 |
+
def main(args):
|
11 |
+
if not os.path.exists(args.config):
|
12 |
+
print(f"Config folder not found: {os.path.abspath(args.config)}")
|
13 |
+
exit(1)
|
14 |
+
|
15 |
+
configs = []
|
16 |
+
if os.path.isdir(args.config):
|
17 |
+
files = os.listdir(args.config)
|
18 |
+
for file in files:
|
19 |
+
with open(os.path.join(args.config, file), "r") as f:
|
20 |
+
obj = json.loads(f.read())
|
21 |
+
configs.append(obj)
|
22 |
+
print(obj)
|
23 |
+
elif os.path.isfile(args.config):
|
24 |
+
with open(args.config, "r") as f:
|
25 |
+
obj = json.loads(f.read())
|
26 |
+
configs.append(obj)
|
27 |
+
print(obj)
|
28 |
+
|
29 |
+
service = KernelRerunService()
|
30 |
+
for config in configs:
|
31 |
+
service.add_job(NbJob.from_dict(config))
|
32 |
+
|
33 |
+
if args.option == "run":
|
34 |
+
service.run_all()
|
35 |
+
elif args.option == "validate":
|
36 |
+
service.validate_all()
|
37 |
+
elif args.option == "status":
|
38 |
+
service.status_all()
|
39 |
+
else:
|
40 |
+
print(f"Invalid option: {args.option}")
|
41 |
+
|
42 |
+
|
43 |
+
if __name__ == "__main__":
|
44 |
+
# parser = argparse.ArgumentParser()
|
45 |
+
# parser.add_argument("option", type=str, default="run", choices=["run", "validate", "status"])
|
46 |
+
# parser.add_argument("--config", type=str, default="./config")
|
47 |
+
#
|
48 |
+
# args = parser.parse_args()
|
49 |
+
args = SimpleNamespace(option="validate", config='./config')
|
50 |
+
|
51 |
main(args)
|
config/aasist-train.json
CHANGED
@@ -1,8 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"accounts": {
|
3 |
-
"hahunavth": "fb3d65ea4d06f91a83cf571e9a39d40d",
|
4 |
-
"mrhakk": "26780db435523a697855d5d13355744d",
|
5 |
-
"havthust": "c54e96568075fcc277bd10ba0e0a52b9"
|
6 |
-
},
|
7 |
-
"slug": "hahunavth/vlsp-sv-2023-aasist-train"
|
8 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
google_sheet.py
CHANGED
@@ -1,63 +1,126 @@
|
|
1 |
-
import gspread
|
2 |
-
from oauth2client.service_account import ServiceAccountCredentials
|
3 |
-
from typing import Dict
|
4 |
-
|
5 |
-
|
6 |
-
class SheetCRUDRepository:
|
7 |
-
def __init__(self, worksheet):
|
8 |
-
self.worksheet = worksheet
|
9 |
-
self.titles = self.worksheet.row_values(1) # Assuming titles are in the first row
|
10 |
-
assert len(set(self.titles)) == len(self.titles), f"Failed to init {SheetCRUDRepository.__class__}, titles: {self.titles} contain duplicated values!"
|
11 |
-
|
12 |
-
def create(self, data: Dict):
|
13 |
-
values = [data.get(title, '') for title in self.titles]
|
14 |
-
self.worksheet.append_row(values)
|
15 |
-
|
16 |
-
def read(self, row_index: int) -> Dict:
|
17 |
-
"""
|
18 |
-
return {} if empty
|
19 |
-
"""
|
20 |
-
values = self.worksheet.row_values(row_index)
|
21 |
-
return {title: value for title, value in zip(self.titles, values)}
|
22 |
-
|
23 |
-
def update(self, row_index: int, data: Dict):
|
24 |
-
values = [data.get(title, '') for title in self.titles]
|
25 |
-
self.worksheet.update(f"A{row_index}:Z{row_index}", [values])
|
26 |
-
|
27 |
-
def delete(self, row_index: int):
|
28 |
-
self.worksheet.delete_row(row_index)
|
29 |
-
|
30 |
-
def find(self, search_dict):
|
31 |
-
for col_title, value in search_dict.items():
|
32 |
-
if col_title in self.titles:
|
33 |
-
col_index = self.titles.index(col_title) + 1 # Adding 1 to match gspread indexing
|
34 |
-
cell = self.worksheet.find(value, in_column=col_index)
|
35 |
-
if cell is None:
|
36 |
-
break
|
37 |
-
row_number = cell.row
|
38 |
-
return row_number, self.read(row_number)
|
39 |
-
return None
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
if
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
print(a)
|
|
|
1 |
+
import gspread
|
2 |
+
from oauth2client.service_account import ServiceAccountCredentials
|
3 |
+
from typing import Dict
|
4 |
+
|
5 |
+
|
6 |
+
class SheetCRUDRepository:
|
7 |
+
def __init__(self, worksheet):
|
8 |
+
self.worksheet = worksheet
|
9 |
+
self.titles = self.worksheet.row_values(1) # Assuming titles are in the first row
|
10 |
+
assert len(set(self.titles)) == len(self.titles), f"Failed to init {SheetCRUDRepository.__class__}, titles: {self.titles} contain duplicated values!"
|
11 |
+
|
12 |
+
def create(self, data: Dict):
|
13 |
+
values = [data.get(title, '') for title in self.titles]
|
14 |
+
self.worksheet.append_row(values)
|
15 |
+
|
16 |
+
def read(self, row_index: int) -> Dict:
|
17 |
+
"""
|
18 |
+
return {} if empty
|
19 |
+
"""
|
20 |
+
values = self.worksheet.row_values(row_index)
|
21 |
+
return {title: value for title, value in zip(self.titles, values)}
|
22 |
+
|
23 |
+
def update(self, row_index: int, data: Dict):
|
24 |
+
values = [data.get(title, '') for title in self.titles]
|
25 |
+
self.worksheet.update(f"A{row_index}:Z{row_index}", [values])
|
26 |
+
|
27 |
+
def delete(self, row_index: int):
|
28 |
+
self.worksheet.delete_row(row_index)
|
29 |
+
|
30 |
+
def find(self, search_dict):
|
31 |
+
for col_title, value in search_dict.items():
|
32 |
+
if col_title in self.titles:
|
33 |
+
col_index = self.titles.index(col_title) + 1 # Adding 1 to match gspread indexing
|
34 |
+
cell = self.worksheet.find(value, in_column=col_index)
|
35 |
+
if cell is None:
|
36 |
+
break
|
37 |
+
row_number = cell.row
|
38 |
+
return row_number, self.read(row_number)
|
39 |
+
return None
|
40 |
+
|
41 |
+
|
42 |
+
class Converter:
|
43 |
+
@staticmethod
|
44 |
+
def parse_one_to_obj(field_name, value):
|
45 |
+
if value in ['TRUE', 'FALSE']:
|
46 |
+
return field_name, value == 'TRUE'
|
47 |
+
if isinstance(value, str):
|
48 |
+
if value.startswith('[DURATION]'):
|
49 |
+
if 'NONE' in value.upper():
|
50 |
+
return field_name, None
|
51 |
+
value = value.replace('[DURATION]', '').replace("\n", '').rstrip()
|
52 |
+
sign = 1
|
53 |
+
if 'before' in value:
|
54 |
+
sign = -1
|
55 |
+
if 'after' in value:
|
56 |
+
sign = 1
|
57 |
+
value = value.replace('after', '').replace('before', '').rstrip()
|
58 |
+
if 'h' in value:
|
59 |
+
value = value.replace('h', '')
|
60 |
+
return field_name, {"hours": int(value) * sign}
|
61 |
+
if 'm' in value:
|
62 |
+
value = value.replace('m', '')
|
63 |
+
return field_name, {"minutes": int(value) * sign}
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
return field_name, value
|
68 |
+
|
69 |
+
@staticmethod
|
70 |
+
def parse_one_to_row(field_name, value):
|
71 |
+
if isinstance(value, str):
|
72 |
+
if value in ['TRUE', 'FALSE']:
|
73 |
+
return field_name, value == "TRUE"
|
74 |
+
if isinstance(value, dict):
|
75 |
+
if 'hours' in value or 'minutes' in value:
|
76 |
+
# ignore
|
77 |
+
return None, None
|
78 |
+
return field_name, value
|
79 |
+
|
80 |
+
@staticmethod
|
81 |
+
def convert_to_obj(row):
|
82 |
+
if row is None:
|
83 |
+
return None
|
84 |
+
obj = {}
|
85 |
+
for key in row.keys():
|
86 |
+
new_key, value = Converter.parse_one_to_obj(key, row[key])
|
87 |
+
if new_key is not None:
|
88 |
+
obj[new_key] = value
|
89 |
+
return obj
|
90 |
+
|
91 |
+
@staticmethod
|
92 |
+
def convert_to_row(obj):
|
93 |
+
if obj is None:
|
94 |
+
return None
|
95 |
+
row = {}
|
96 |
+
for key in obj.keys():
|
97 |
+
new_key, value = Converter.parse_one_to_row(key, obj[key])
|
98 |
+
if new_key is not None:
|
99 |
+
row[new_key] = value
|
100 |
+
return row
|
101 |
+
|
102 |
+
|
103 |
+
def create_repositories():
|
104 |
+
scope = [
|
105 |
+
'https://www.googleapis.com/auth/spreadsheets',
|
106 |
+
'https://www.googleapis.com/auth/drive'
|
107 |
+
]
|
108 |
+
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
|
109 |
+
client = gspread.authorize(creds)
|
110 |
+
# sheet_url = "https://docs.google.com/spreadsheets/d/17OxKF0iP_aJJ0HCgJkwFsH762EUrtcEIYcPmyiiKnaM"
|
111 |
+
sheet_url = "https://docs.google.com/spreadsheets/d/1KzUYgWwbvYXGfyehOTyZCCQf0udZiwVXxaxpmkXEe3E/edit?usp=sharing"
|
112 |
+
sheet = client.open_by_url(sheet_url)
|
113 |
+
|
114 |
+
run_stt_repository = SheetCRUDRepository(sheet.get_worksheet(0))
|
115 |
+
config_repository = SheetCRUDRepository(sheet.get_worksheet(1))
|
116 |
+
log_repository = SheetCRUDRepository(sheet.get_worksheet(2))
|
117 |
+
secret_repository = SheetCRUDRepository(sheet.get_worksheet(3))
|
118 |
+
return run_stt_repository, config_repository, log_repository, secret_repository
|
119 |
+
|
120 |
+
|
121 |
+
run_stt_repo, conf_repo, log_repo, secret_repo = create_repositories()
|
122 |
+
|
123 |
+
|
124 |
+
if __name__ == "__main__":
|
125 |
+
a = create_repositories()
|
126 |
print(a)
|
kaggle_service.py
CHANGED
@@ -1,583 +1,583 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
from typing import Callable, List, Union, Dict
|
4 |
-
|
5 |
-
# fake default account to use kaggle.api.kaggle_api_extended
|
6 |
-
os.environ['KAGGLE_USERNAME']=''
|
7 |
-
os.environ['KAGGLE_KEY']=''
|
8 |
-
|
9 |
-
from kaggle.api.kaggle_api_extended import KaggleApi
|
10 |
-
from kaggle.rest import ApiException
|
11 |
-
import shutil
|
12 |
-
import time
|
13 |
-
import threading
|
14 |
-
import copy
|
15 |
-
from logger import sheet_logger
|
16 |
-
|
17 |
-
|
18 |
-
def get_api():
|
19 |
-
api = KaggleApi()
|
20 |
-
api.authenticate()
|
21 |
-
return api
|
22 |
-
|
23 |
-
|
24 |
-
class KaggleApiWrapper(KaggleApi):
|
25 |
-
"""
|
26 |
-
Override KaggleApi.read_config_environment to use username and secret without environment variables
|
27 |
-
"""
|
28 |
-
|
29 |
-
def __init__(self, username, secret):
|
30 |
-
super().__init__()
|
31 |
-
self.username = username
|
32 |
-
self.secret = secret
|
33 |
-
|
34 |
-
def read_config_environment(self, config_data=None, quiet=False):
|
35 |
-
config = super().read_config_environment(config_data, quiet)
|
36 |
-
config['username'] = self.username
|
37 |
-
config['key'] = self.secret
|
38 |
-
# only work for pythonanyware
|
39 |
-
# config['proxy'] = "http://proxy.server:3128"
|
40 |
-
|
41 |
-
return config_data
|
42 |
-
|
43 |
-
def __del__(self):
|
44 |
-
# todo: fix bug when delete api
|
45 |
-
pass
|
46 |
-
|
47 |
-
# def get_accelerator_quota_with_http_info(self): # noqa: E501
|
48 |
-
# """
|
49 |
-
#
|
50 |
-
# This method makes a synchronous HTTP request by default. To make an
|
51 |
-
# asynchronous HTTP request, please pass async_req=True
|
52 |
-
# >>> thread = api.competitions_list_with_http_info(async_req=True)
|
53 |
-
# >>> result = thread.get()
|
54 |
-
#
|
55 |
-
# :param async_req bool
|
56 |
-
# :param str group: Filter competitions by a particular group
|
57 |
-
# :param str category: Filter competitions by a particular category
|
58 |
-
# :param str sort_by: Sort the results
|
59 |
-
# :param int page: Page number
|
60 |
-
# :param str search: Search terms
|
61 |
-
# :return: Result
|
62 |
-
# If the method is called asynchronously,
|
63 |
-
# returns the request thread.
|
64 |
-
# """
|
65 |
-
#
|
66 |
-
# all_params = [] # noqa: E501
|
67 |
-
# all_params.append('async_req')
|
68 |
-
# all_params.append('_return_http_data_only')
|
69 |
-
# all_params.append('_preload_content')
|
70 |
-
# all_params.append('_request_timeout')
|
71 |
-
#
|
72 |
-
# params = locals()
|
73 |
-
#
|
74 |
-
# collection_formats = {}
|
75 |
-
#
|
76 |
-
# path_params = {}
|
77 |
-
#
|
78 |
-
# query_params = []
|
79 |
-
# # if 'group' in params:
|
80 |
-
# # query_params.append(('group', params['group'])) # noqa: E501
|
81 |
-
# # if 'category' in params:
|
82 |
-
# # query_params.append(('category', params['category'])) # noqa: E501
|
83 |
-
# # if 'sort_by' in params:
|
84 |
-
# # query_params.append(('sortBy', params['sort_by'])) # noqa: E501
|
85 |
-
# # if 'page' in params:
|
86 |
-
# # query_params.append(('page', params['page'])) # noqa: E501
|
87 |
-
# # if 'search' in params:
|
88 |
-
# # query_params.append(('search', params['search'])) # noqa: E501
|
89 |
-
#
|
90 |
-
# header_params = {}
|
91 |
-
#
|
92 |
-
# form_params = []
|
93 |
-
# local_var_files = {}
|
94 |
-
#
|
95 |
-
# body_params = None
|
96 |
-
# # HTTP header `Accept`
|
97 |
-
# header_params['Accept'] = self.api_client.select_header_accept(
|
98 |
-
# ['application/json']) # noqa: E501
|
99 |
-
#
|
100 |
-
# # Authentication setting
|
101 |
-
# auth_settings = ['basicAuth'] # noqa: E501
|
102 |
-
#
|
103 |
-
# return self.api_client.call_api(
|
104 |
-
# 'i/kernels.KernelsService/GetAcceleratorQuotaStatistics', 'GET',
|
105 |
-
# # '/competitions/list', 'GET',
|
106 |
-
# path_params,
|
107 |
-
# query_params,
|
108 |
-
# header_params,
|
109 |
-
# body=body_params,
|
110 |
-
# post_params=form_params,
|
111 |
-
# files=local_var_files,
|
112 |
-
# response_type='Result', # noqa: E501
|
113 |
-
# auth_settings=auth_settings,
|
114 |
-
# async_req=params.get('async_req'),
|
115 |
-
# _return_http_data_only=params.get('_return_http_data_only'),
|
116 |
-
# _preload_content=params.get('_preload_content', True),
|
117 |
-
# _request_timeout=params.get('_request_timeout'),
|
118 |
-
# collection_formats=collection_formats)
|
119 |
-
#
|
120 |
-
# if __name__ == "__main__":
|
121 |
-
# api = KaggleApiWrapper('[email protected]', "c54e96568075fcc277bd10ba0e0a52b9")
|
122 |
-
# api.authenticate()
|
123 |
-
# print(api.get_accelerator_quota_with_http_info())
|
124 |
-
|
125 |
-
class ValidateException(Exception):
|
126 |
-
def __init__(self, message: str):
|
127 |
-
super(ValidateException, self).__init__(message)
|
128 |
-
|
129 |
-
@staticmethod
|
130 |
-
def from_api_exception(e: ApiException, kernel_slug: str):
|
131 |
-
return ValidateException(f"Error: {e.status} {e.reason} with notebook {kernel_slug}")
|
132 |
-
|
133 |
-
@staticmethod
|
134 |
-
def from_api_exception_list(el: List[ApiException], kernel_slug_list: List[str]):
|
135 |
-
message = f"Error: \n"
|
136 |
-
for e, k in zip(el, kernel_slug_list):
|
137 |
-
message = message + f"\t{e.status} {e.reason} with notebook {k}"
|
138 |
-
return ValidateException(message)
|
139 |
-
|
140 |
-
|
141 |
-
class KaggleNotebook:
|
142 |
-
def __init__(self, api: KaggleApi, kernel_slug: str, container_path: str = "./tmp", id=None):
|
143 |
-
"""
|
144 |
-
:param api: KaggleApi
|
145 |
-
:param kernel_slug: Notebook id, you can find it in the url of the notebook.
|
146 |
-
For example, `username/notebook-name-123456`
|
147 |
-
:param container_path: Path to the local folder where the notebook will be downloaded
|
148 |
-
"""
|
149 |
-
self.api = api
|
150 |
-
self.kernel_slug = kernel_slug
|
151 |
-
self.container_path = container_path
|
152 |
-
self.id = id
|
153 |
-
if self.id is None:
|
154 |
-
print(f"Warn: {self.__class__.__name__}.id is None")
|
155 |
-
|
156 |
-
def status(self) -> str or None:
|
157 |
-
"""
|
158 |
-
:return:
|
159 |
-
"running"
|
160 |
-
"cancelAcknowledged"
|
161 |
-
"queued": waiting for run
|
162 |
-
"error": when raise exception in notebook
|
163 |
-
Throw exception when failed
|
164 |
-
"""
|
165 |
-
res = self.api.kernels_status(self.kernel_slug)
|
166 |
-
print(f"Status: {res}")
|
167 |
-
if res is None:
|
168 |
-
if self.id is not None:
|
169 |
-
sheet_logger.update_job_status(self.id, notebook_status='None')
|
170 |
-
return None
|
171 |
-
if self.id is not None:
|
172 |
-
sheet_logger.update_job_status(self.id, notebook_status=res['status'])
|
173 |
-
return res['status']
|
174 |
-
|
175 |
-
def _get_local_nb_path(self) -> str:
|
176 |
-
return os.path.join(self.container_path, self.kernel_slug)
|
177 |
-
|
178 |
-
def pull(self, path=None) -> str or None:
|
179 |
-
"""
|
180 |
-
|
181 |
-
:param path:
|
182 |
-
:return:
|
183 |
-
:raises: ApiException if notebook not found or not share to user
|
184 |
-
"""
|
185 |
-
self._clean()
|
186 |
-
path = path or self._get_local_nb_path()
|
187 |
-
metadata_path = os.path.join(path, "kernel-metadata.json")
|
188 |
-
res = self.api.kernels_pull(self.kernel_slug, path=path, metadata=True, quiet=False)
|
189 |
-
if not os.path.exists(metadata_path):
|
190 |
-
print(f"Warn: Not found {metadata_path}. Clean {path}")
|
191 |
-
self._clean()
|
192 |
-
return None
|
193 |
-
return res
|
194 |
-
|
195 |
-
def push(self, path=None) -> str or None:
|
196 |
-
status = self.status()
|
197 |
-
if status in ['queued', 'running']:
|
198 |
-
print("Warn: Notebook is " + status + ". Skip push notebook!")
|
199 |
-
return None
|
200 |
-
|
201 |
-
self.api.kernels_push(path or self._get_local_nb_path())
|
202 |
-
time.sleep(1)
|
203 |
-
status = self.status()
|
204 |
-
return status
|
205 |
-
|
206 |
-
def _clean(self) -> None:
|
207 |
-
if os.path.exists(self._get_local_nb_path()):
|
208 |
-
shutil.rmtree(self._get_local_nb_path())
|
209 |
-
|
210 |
-
def get_metadata(self, path=None):
|
211 |
-
path = path or self._get_local_nb_path()
|
212 |
-
metadata_path = os.path.join(path, "kernel-metadata.json")
|
213 |
-
if not os.path.exists(metadata_path):
|
214 |
-
return None
|
215 |
-
return json.loads(open(metadata_path).read())
|
216 |
-
|
217 |
-
def check_nb_permission(self) -> Union[tuple[bool], tuple[bool, None]]:
|
218 |
-
status = self.status() # raise ApiException
|
219 |
-
if status is None:
|
220 |
-
return False, status
|
221 |
-
return True, status
|
222 |
-
|
223 |
-
|
224 |
-
def check_datasets_permission(self) -> bool:
|
225 |
-
meta = self.get_metadata()
|
226 |
-
if meta is None:
|
227 |
-
print("Warn: cannot get metadata. Pull and try again?")
|
228 |
-
dataset_sources = meta['dataset_sources']
|
229 |
-
ex_list = []
|
230 |
-
slugs = []
|
231 |
-
for dataset in dataset_sources:
|
232 |
-
try:
|
233 |
-
self.api.dataset_status(dataset)
|
234 |
-
except ApiException as e:
|
235 |
-
print(f"Error: {e.status} {e.reason} with dataset {dataset} in notebook {self.kernel_slug}")
|
236 |
-
ex_list.append(e)
|
237 |
-
slugs.append(self.kernel_slug)
|
238 |
-
# return False
|
239 |
-
if len(ex_list) > 0:
|
240 |
-
raise ValidateException.from_api_exception_list(ex_list, slugs)
|
241 |
-
return True
|
242 |
-
|
243 |
-
|
244 |
-
class AccountTransactionManager:
|
245 |
-
def __init__(self, acc_secret_dict: dict=None):
|
246 |
-
"""
|
247 |
-
:param acc_secret_dict: {username: secret}
|
248 |
-
"""
|
249 |
-
self.acc_secret_dict = acc_secret_dict
|
250 |
-
if self.acc_secret_dict is None:
|
251 |
-
self.acc_secret_dict = {}
|
252 |
-
# self.api_dict = {username: KaggleApiWrapper(username, secret) for username, secret in acc_secret_dict.items()}
|
253 |
-
# lock for each account to avoid concurrent use api
|
254 |
-
self.lock_dict = {username: False for username in self.acc_secret_dict.keys()}
|
255 |
-
self.state_lock = threading.Lock()
|
256 |
-
|
257 |
-
def _get_api(self, username: str) -> KaggleApiWrapper:
|
258 |
-
# return self.api_dict[username]
|
259 |
-
return KaggleApiWrapper(username, self.acc_secret_dict[username])
|
260 |
-
|
261 |
-
def _get_lock(self, username: str) -> bool:
|
262 |
-
return self.lock_dict[username]
|
263 |
-
|
264 |
-
def _set_lock(self, username: str, lock: bool) -> None:
|
265 |
-
self.lock_dict[username] = lock
|
266 |
-
|
267 |
-
def add_account(self, username, secret):
|
268 |
-
if username not in self.acc_secret_dict.keys():
|
269 |
-
self.state_lock.acquire()
|
270 |
-
self.acc_secret_dict[username] = secret
|
271 |
-
self.lock_dict[username] = False
|
272 |
-
self.state_lock.release()
|
273 |
-
|
274 |
-
def remove_account(self, username):
|
275 |
-
if username in self.acc_secret_dict.keys():
|
276 |
-
self.state_lock.acquire()
|
277 |
-
del self.acc_secret_dict[username]
|
278 |
-
del self.lock_dict[username]
|
279 |
-
self.state_lock.release()
|
280 |
-
else:
|
281 |
-
print(f"Warn: try to remove account not in the list: {username}, list: {self.acc_secret_dict.keys()}")
|
282 |
-
|
283 |
-
def get_unlocked_api_unblocking(self, username_list: List[str]) -> tuple[KaggleApiWrapper, Callable[[], None]]:
|
284 |
-
"""
|
285 |
-
:param username_list: list of username
|
286 |
-
:return: (api, release) where release is a function to release api
|
287 |
-
"""
|
288 |
-
while True:
|
289 |
-
print("get_unlocked_api_unblocking" + str(username_list))
|
290 |
-
for username in username_list:
|
291 |
-
self.state_lock.acquire()
|
292 |
-
if not self._get_lock(username):
|
293 |
-
self._set_lock(username, True)
|
294 |
-
api = self._get_api(username)
|
295 |
-
|
296 |
-
def release():
|
297 |
-
self.state_lock.acquire()
|
298 |
-
self._set_lock(username, False)
|
299 |
-
api.__del__()
|
300 |
-
self.state_lock.release()
|
301 |
-
|
302 |
-
self.state_lock.release()
|
303 |
-
return api, release
|
304 |
-
self.state_lock.release()
|
305 |
-
time.sleep(1)
|
306 |
-
|
307 |
-
|
308 |
-
class NbJob:
|
309 |
-
def __init__(self, acc_dict: dict, nb_slug: str, rerun_stt: List[str] = None, not_rerun_stt: List[str] = None, id=None):
|
310 |
-
"""
|
311 |
-
|
312 |
-
:param acc_dict:
|
313 |
-
:param nb_slug:
|
314 |
-
:param rerun_stt:
|
315 |
-
:param not_rerun_stt: If notebook status in this list, do not rerun it. (Note: do not add "queued", "running")
|
316 |
-
"""
|
317 |
-
self.rerun_stt = rerun_stt
|
318 |
-
if self.rerun_stt is None:
|
319 |
-
self.rerun_stt = ['complete']
|
320 |
-
self.not_rerun_stt = not_rerun_stt
|
321 |
-
|
322 |
-
if self.not_rerun_stt is None:
|
323 |
-
self.not_rerun_stt = ['queued', 'running', 'cancelAcknowledged']
|
324 |
-
assert "queued" in self.not_rerun_stt
|
325 |
-
assert "running" in self.not_rerun_stt
|
326 |
-
|
327 |
-
self.acc_dict = acc_dict
|
328 |
-
self.nb_slug = nb_slug
|
329 |
-
self.id = id
|
330 |
-
|
331 |
-
def get_acc_dict(self):
|
332 |
-
return self.acc_dict
|
333 |
-
|
334 |
-
def get_username_list(self):
|
335 |
-
return list(self.acc_dict.keys())
|
336 |
-
|
337 |
-
def is_valid_with_acc(self, api):
|
338 |
-
"""
|
339 |
-
:param api:
|
340 |
-
:return:
|
341 |
-
:raise: ValidationException
|
342 |
-
"""
|
343 |
-
notebook = KaggleNotebook(api, self.nb_slug, id=self.id)
|
344 |
-
|
345 |
-
try:
|
346 |
-
notebook.pull() # raise ApiException
|
347 |
-
stt, _ = notebook.check_nb_permission() # note: raise ApiException
|
348 |
-
stt = notebook.check_datasets_permission() # raise ValidationException
|
349 |
-
except ApiException as e:
|
350 |
-
raise ValidateException.from_api_exception(e, self.nb_slug)
|
351 |
-
# if not stt:
|
352 |
-
# return False
|
353 |
-
return True
|
354 |
-
|
355 |
-
def is_valid(self):
|
356 |
-
for username in self.acc_dict.keys():
|
357 |
-
secrets = self.acc_dict[username]
|
358 |
-
api = KaggleApiWrapper(username=username, secret=secrets)
|
359 |
-
api.authenticate()
|
360 |
-
if not self.is_valid_with_acc(api):
|
361 |
-
return False
|
362 |
-
return True
|
363 |
-
|
364 |
-
def acc_check_and_rerun_if_need(self, api: KaggleApi) -> bool:
|
365 |
-
"""
|
366 |
-
:return:
|
367 |
-
True if rerun success or notebook is running
|
368 |
-
False user does not have enough gpu quotas
|
369 |
-
:raises
|
370 |
-
Exception if setup error
|
371 |
-
"""
|
372 |
-
notebook = KaggleNotebook(api, self.nb_slug, "./tmp", id=self.id) # todo: change hardcode container_path here
|
373 |
-
|
374 |
-
notebook.pull()
|
375 |
-
assert notebook.check_datasets_permission(), f"User {api} does not have permission on datasets of notebook {self.nb_slug}"
|
376 |
-
success, status1 = notebook.check_nb_permission()
|
377 |
-
assert success, f"User {api} does not have permission on notebook {self.nb_slug}" # todo: using api.username
|
378 |
-
|
379 |
-
if status1 in self.rerun_stt:
|
380 |
-
status2 = notebook.push()
|
381 |
-
time.sleep(10)
|
382 |
-
status3 = notebook.status()
|
383 |
-
|
384 |
-
# if 3 times same stt -> acc out of quota
|
385 |
-
if status1 == status2 == status3:
|
386 |
-
sheet_logger.log(username=api.username, nb=self.nb_slug, log="Try but no effect. Seem account to be out of quota")
|
387 |
-
return False
|
388 |
-
if status3 in self.not_rerun_stt:
|
389 |
-
# sheet_logger.log(username=api.username, nb=self.nb_slug, log=f"Notebook status is {status3} is in ignore status list {self.not_rerun_stt}, do nothing!")
|
390 |
-
sheet_logger.log(username=api.username, nb=self.nb_slug,
|
391 |
-
log=f"Schedule notebook successfully. Current status is '{status3}'")
|
392 |
-
return True
|
393 |
-
if status3 not in ["queued", "running"]:
|
394 |
-
# return False # todo: check when user is out of quota
|
395 |
-
print(f"Error: status is {status3}")
|
396 |
-
|
397 |
-
raise Exception("Setup exception")
|
398 |
-
|
399 |
-
return True
|
400 |
-
|
401 |
-
sheet_logger.log(username=api.username, nb=self.nb_slug, log=f"Notebook status is '{status1}' is not in {self.rerun_stt}, do nothing!")
|
402 |
-
return True
|
403 |
-
|
404 |
-
@staticmethod
|
405 |
-
def from_dict(obj: dict, id=None):
|
406 |
-
return NbJob(acc_dict=obj['accounts'], nb_slug=obj['slug'], rerun_stt=obj.get('rerun_status'), not_rerun_stt=obj.get('not_rerun_stt'), id=id)
|
407 |
-
|
408 |
-
|
409 |
-
class KernelRerunService:
|
410 |
-
def __init__(self):
|
411 |
-
self.jobs: Dict[str, NbJob] = {}
|
412 |
-
self.acc_manager = AccountTransactionManager()
|
413 |
-
self.username2jobid = {}
|
414 |
-
self.jobid2username = {}
|
415 |
-
|
416 |
-
def add_job(self, nb_job: NbJob):
|
417 |
-
if nb_job.nb_slug in self.jobs.keys():
|
418 |
-
print("Warn: nb_job already in job list")
|
419 |
-
return
|
420 |
-
self.jobs[nb_job.nb_slug] = nb_job
|
421 |
-
self.jobid2username[nb_job.nb_slug] = nb_job.get_username_list()
|
422 |
-
for username in nb_job.get_username_list():
|
423 |
-
if username not in self.username2jobid.keys():
|
424 |
-
self.username2jobid[username] = []
|
425 |
-
self.acc_manager.add_account(username, nb_job.acc_dict[username])
|
426 |
-
self.username2jobid[username].append(nb_job.nb_slug)
|
427 |
-
|
428 |
-
def remove_job(self, nb_job):
|
429 |
-
if nb_job.nb_slug not in self.jobs.keys():
|
430 |
-
print("Warn: try to remove nb_job not in list")
|
431 |
-
return
|
432 |
-
username_list = self.jobid2username[nb_job.nb_slug]
|
433 |
-
username_list = [username for username in username_list if len(self.username2jobid[username]) == 1]
|
434 |
-
|
435 |
-
for username in username_list:
|
436 |
-
del self.username2jobid[username]
|
437 |
-
self.acc_manager.remove_account(username)
|
438 |
-
del self.jobs[nb_job.nb_slug]
|
439 |
-
del self.jobid2username[nb_job.nb_slug]
|
440 |
-
|
441 |
-
def validate_all(self):
|
442 |
-
for username in self.acc_manager.acc_secret_dict.keys():
|
443 |
-
api, release = self.acc_manager.get_unlocked_api_unblocking([username])
|
444 |
-
api.authenticate()
|
445 |
-
print(f"Using username: {api.username}")
|
446 |
-
|
447 |
-
for job in self.jobs.values():
|
448 |
-
ex_msg_list = []
|
449 |
-
if username in job.get_username_list():
|
450 |
-
print(f"Validate user: {username}, job: {job.nb_slug}")
|
451 |
-
|
452 |
-
try:
|
453 |
-
job.is_valid_with_acc(api)
|
454 |
-
except ValidateException as e:
|
455 |
-
print(f"Error: not valid")
|
456 |
-
a = f"Setup error: {username} does not have permission on notebook {job.nb_slug} or related datasets"
|
457 |
-
if job.id is not None: # if have id, write log
|
458 |
-
ex_msg_list.append(f"Account {username}\n" + str(e) + "\n")
|
459 |
-
else: # if not have id, raise
|
460 |
-
raise Exception(a)
|
461 |
-
if len(ex_msg_list) > 0:
|
462 |
-
sheet_logger.update_job_status(job.id, validate_status="\n".join(ex_msg_list))
|
463 |
-
else:
|
464 |
-
sheet_logger.update_job_status(job.id, validate_status="success")
|
465 |
-
|
466 |
-
release()
|
467 |
-
return True
|
468 |
-
|
469 |
-
def status_all(self):
|
470 |
-
for job in self.jobs.values():
|
471 |
-
print(f"Job: {job.nb_slug}")
|
472 |
-
api, release = self.acc_manager.get_unlocked_api_unblocking(job.get_username_list())
|
473 |
-
api.authenticate()
|
474 |
-
print(f"Using username: {api.username}")
|
475 |
-
notebook = KaggleNotebook(api, job.nb_slug, id=job.id)
|
476 |
-
print(f"Notebook: {notebook.kernel_slug}")
|
477 |
-
print(notebook.status())
|
478 |
-
release()
|
479 |
-
|
480 |
-
def run(self, nb_job: NbJob):
|
481 |
-
username_list = copy.copy(nb_job.get_username_list())
|
482 |
-
while len(username_list) > 0:
|
483 |
-
api, release = self.acc_manager.get_unlocked_api_unblocking(username_list)
|
484 |
-
api.authenticate()
|
485 |
-
print(f"Using username: {api.username}")
|
486 |
-
|
487 |
-
try:
|
488 |
-
result = nb_job.acc_check_and_rerun_if_need(api)
|
489 |
-
if result:
|
490 |
-
return True
|
491 |
-
except Exception as e:
|
492 |
-
print(e)
|
493 |
-
release()
|
494 |
-
break
|
495 |
-
|
496 |
-
if api.username in username_list:
|
497 |
-
username_list.remove(api.username)
|
498 |
-
release()
|
499 |
-
else:
|
500 |
-
release()
|
501 |
-
raise Exception("")
|
502 |
-
return False
|
503 |
-
|
504 |
-
def run_all(self):
|
505 |
-
for job in self.jobs.values():
|
506 |
-
success = self.run(job)
|
507 |
-
print(f"Job: {job.nb_slug} {success}")
|
508 |
-
|
509 |
-
|
510 |
-
# if __name__ == "__main__":
|
511 |
-
# service = KernelRerunService()
|
512 |
-
# files = os.listdir("./config")
|
513 |
-
# for file in files:
|
514 |
-
# if '.example' not in file:
|
515 |
-
# with open(os.path.join("./config", file), "r") as f:
|
516 |
-
# obj = json.loads(f.read())
|
517 |
-
# print(obj)
|
518 |
-
# service.add_job(NbJob.from_dict(obj))
|
519 |
-
# service.run_all()
|
520 |
-
|
521 |
-
# try:
|
522 |
-
# acc_secret_dict = {
|
523 |
-
# "hahunavth": "secret",
|
524 |
-
# "hahunavth2": "secret",
|
525 |
-
# "hahunavth3": "secret",
|
526 |
-
# "hahunavth4": "secret",
|
527 |
-
# "hahunavth5": "secret",
|
528 |
-
# }
|
529 |
-
# acc_manager = AccountTransactionManager(acc_secret_dict)
|
530 |
-
#
|
531 |
-
#
|
532 |
-
# def test1():
|
533 |
-
# username_list = ["hahunavth", "hahunavth2", "hahunavth3", "hahunavth4", "hahunavth5"]
|
534 |
-
# while len(username_list) > 0:
|
535 |
-
# api, release = acc_manager.get_unlocked_api_unblocking(username_list)
|
536 |
-
# print("test1 is using " + api.username)
|
537 |
-
# time.sleep(1)
|
538 |
-
# release()
|
539 |
-
# if api.username in username_list:
|
540 |
-
# username_list.remove(api.username)
|
541 |
-
# else:
|
542 |
-
# raise Exception("")
|
543 |
-
# print("test1 release " + api.username)
|
544 |
-
#
|
545 |
-
#
|
546 |
-
# def test2():
|
547 |
-
# username_list = ["hahunavth2", "hahunavth3", "hahunavth5"]
|
548 |
-
# while len(username_list) > 0:
|
549 |
-
# api, release = acc_manager.get_unlocked_api_unblocking(username_list)
|
550 |
-
# print("test2 is using " + api.username)
|
551 |
-
# time.sleep(3)
|
552 |
-
# release()
|
553 |
-
# if api.username in username_list:
|
554 |
-
# username_list.remove(api.username)
|
555 |
-
# else:
|
556 |
-
# raise Exception("")
|
557 |
-
# print("test2 release " + api.username)
|
558 |
-
#
|
559 |
-
#
|
560 |
-
# t1 = threading.Thread(target=test1)
|
561 |
-
# t2 = threading.Thread(target=test2)
|
562 |
-
# t1.start()
|
563 |
-
# t2.start()
|
564 |
-
# t1.join()
|
565 |
-
# t2.join()
|
566 |
-
#
|
567 |
-
# # kgapi = KaggleApiWrapper("hahunavth", "fb3d65ea4d06f91a83cf571e9a39d40d")
|
568 |
-
# # kgapi.authenticate()
|
569 |
-
# # # kgapi = get_api()
|
570 |
-
# # notebook = KaggleNotebook(kgapi, "hahunavth/ess-vlsp2023-denoising", "./tmp")
|
571 |
-
# # # print(notebook.pull())
|
572 |
-
# # # print(notebook.check_datasets_permission())
|
573 |
-
# # print(notebook.check_nb_permission())
|
574 |
-
# # # print(notebook.status())
|
575 |
-
# # # notebook.push()
|
576 |
-
# # # print(notebook.status())
|
577 |
-
# except ApiException as e:
|
578 |
-
# print(e.status)
|
579 |
-
# print(e.reason)
|
580 |
-
# raise e
|
581 |
-
# # 403 when nb not exists or not share to acc
|
582 |
-
# # 404 when push to unknow kenel_slug.username
|
583 |
# # 401 when invalid username, pass
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
from typing import Callable, List, Union, Dict
|
4 |
+
|
5 |
+
# fake default account to use kaggle.api.kaggle_api_extended
|
6 |
+
os.environ['KAGGLE_USERNAME']=''
|
7 |
+
os.environ['KAGGLE_KEY']=''
|
8 |
+
|
9 |
+
from kaggle.api.kaggle_api_extended import KaggleApi
|
10 |
+
from kaggle.rest import ApiException
|
11 |
+
import shutil
|
12 |
+
import time
|
13 |
+
import threading
|
14 |
+
import copy
|
15 |
+
from logger import sheet_logger
|
16 |
+
|
17 |
+
|
18 |
+
def get_api():
|
19 |
+
api = KaggleApi()
|
20 |
+
api.authenticate()
|
21 |
+
return api
|
22 |
+
|
23 |
+
|
24 |
+
class KaggleApiWrapper(KaggleApi):
|
25 |
+
"""
|
26 |
+
Override KaggleApi.read_config_environment to use username and secret without environment variables
|
27 |
+
"""
|
28 |
+
|
29 |
+
def __init__(self, username, secret):
|
30 |
+
super().__init__()
|
31 |
+
self.username = username
|
32 |
+
self.secret = secret
|
33 |
+
|
34 |
+
def read_config_environment(self, config_data=None, quiet=False):
|
35 |
+
config = super().read_config_environment(config_data, quiet)
|
36 |
+
config['username'] = self.username
|
37 |
+
config['key'] = self.secret
|
38 |
+
# only work for pythonanyware
|
39 |
+
# config['proxy'] = "http://proxy.server:3128"
|
40 |
+
|
41 |
+
return config_data
|
42 |
+
|
43 |
+
def __del__(self):
|
44 |
+
# todo: fix bug when delete api
|
45 |
+
pass
|
46 |
+
|
47 |
+
# def get_accelerator_quota_with_http_info(self): # noqa: E501
|
48 |
+
# """
|
49 |
+
#
|
50 |
+
# This method makes a synchronous HTTP request by default. To make an
|
51 |
+
# asynchronous HTTP request, please pass async_req=True
|
52 |
+
# >>> thread = api.competitions_list_with_http_info(async_req=True)
|
53 |
+
# >>> result = thread.get()
|
54 |
+
#
|
55 |
+
# :param async_req bool
|
56 |
+
# :param str group: Filter competitions by a particular group
|
57 |
+
# :param str category: Filter competitions by a particular category
|
58 |
+
# :param str sort_by: Sort the results
|
59 |
+
# :param int page: Page number
|
60 |
+
# :param str search: Search terms
|
61 |
+
# :return: Result
|
62 |
+
# If the method is called asynchronously,
|
63 |
+
# returns the request thread.
|
64 |
+
# """
|
65 |
+
#
|
66 |
+
# all_params = [] # noqa: E501
|
67 |
+
# all_params.append('async_req')
|
68 |
+
# all_params.append('_return_http_data_only')
|
69 |
+
# all_params.append('_preload_content')
|
70 |
+
# all_params.append('_request_timeout')
|
71 |
+
#
|
72 |
+
# params = locals()
|
73 |
+
#
|
74 |
+
# collection_formats = {}
|
75 |
+
#
|
76 |
+
# path_params = {}
|
77 |
+
#
|
78 |
+
# query_params = []
|
79 |
+
# # if 'group' in params:
|
80 |
+
# # query_params.append(('group', params['group'])) # noqa: E501
|
81 |
+
# # if 'category' in params:
|
82 |
+
# # query_params.append(('category', params['category'])) # noqa: E501
|
83 |
+
# # if 'sort_by' in params:
|
84 |
+
# # query_params.append(('sortBy', params['sort_by'])) # noqa: E501
|
85 |
+
# # if 'page' in params:
|
86 |
+
# # query_params.append(('page', params['page'])) # noqa: E501
|
87 |
+
# # if 'search' in params:
|
88 |
+
# # query_params.append(('search', params['search'])) # noqa: E501
|
89 |
+
#
|
90 |
+
# header_params = {}
|
91 |
+
#
|
92 |
+
# form_params = []
|
93 |
+
# local_var_files = {}
|
94 |
+
#
|
95 |
+
# body_params = None
|
96 |
+
# # HTTP header `Accept`
|
97 |
+
# header_params['Accept'] = self.api_client.select_header_accept(
|
98 |
+
# ['application/json']) # noqa: E501
|
99 |
+
#
|
100 |
+
# # Authentication setting
|
101 |
+
# auth_settings = ['basicAuth'] # noqa: E501
|
102 |
+
#
|
103 |
+
# return self.api_client.call_api(
|
104 |
+
# 'i/kernels.KernelsService/GetAcceleratorQuotaStatistics', 'GET',
|
105 |
+
# # '/competitions/list', 'GET',
|
106 |
+
# path_params,
|
107 |
+
# query_params,
|
108 |
+
# header_params,
|
109 |
+
# body=body_params,
|
110 |
+
# post_params=form_params,
|
111 |
+
# files=local_var_files,
|
112 |
+
# response_type='Result', # noqa: E501
|
113 |
+
# auth_settings=auth_settings,
|
114 |
+
# async_req=params.get('async_req'),
|
115 |
+
# _return_http_data_only=params.get('_return_http_data_only'),
|
116 |
+
# _preload_content=params.get('_preload_content', True),
|
117 |
+
# _request_timeout=params.get('_request_timeout'),
|
118 |
+
# collection_formats=collection_formats)
|
119 |
+
#
|
120 |
+
# if __name__ == "__main__":
|
121 |
+
# api = KaggleApiWrapper('[email protected]', "c54e96568075fcc277bd10ba0e0a52b9")
|
122 |
+
# api.authenticate()
|
123 |
+
# print(api.get_accelerator_quota_with_http_info())
|
124 |
+
|
125 |
+
class ValidateException(Exception):
|
126 |
+
def __init__(self, message: str):
|
127 |
+
super(ValidateException, self).__init__(message)
|
128 |
+
|
129 |
+
@staticmethod
|
130 |
+
def from_api_exception(e: ApiException, kernel_slug: str):
|
131 |
+
return ValidateException(f"Error: {e.status} {e.reason} with notebook {kernel_slug}")
|
132 |
+
|
133 |
+
@staticmethod
|
134 |
+
def from_api_exception_list(el: List[ApiException], kernel_slug_list: List[str]):
|
135 |
+
message = f"Error: \n"
|
136 |
+
for e, k in zip(el, kernel_slug_list):
|
137 |
+
message = message + f"\t{e.status} {e.reason} with notebook {k}"
|
138 |
+
return ValidateException(message)
|
139 |
+
|
140 |
+
|
141 |
+
class KaggleNotebook:
|
142 |
+
def __init__(self, api: KaggleApi, kernel_slug: str, container_path: str = "./tmp", id=None):
|
143 |
+
"""
|
144 |
+
:param api: KaggleApi
|
145 |
+
:param kernel_slug: Notebook id, you can find it in the url of the notebook.
|
146 |
+
For example, `username/notebook-name-123456`
|
147 |
+
:param container_path: Path to the local folder where the notebook will be downloaded
|
148 |
+
"""
|
149 |
+
self.api = api
|
150 |
+
self.kernel_slug = kernel_slug
|
151 |
+
self.container_path = container_path
|
152 |
+
self.id = id
|
153 |
+
if self.id is None:
|
154 |
+
print(f"Warn: {self.__class__.__name__}.id is None")
|
155 |
+
|
156 |
+
def status(self) -> str or None:
|
157 |
+
"""
|
158 |
+
:return:
|
159 |
+
"running"
|
160 |
+
"cancelAcknowledged"
|
161 |
+
"queued": waiting for run
|
162 |
+
"error": when raise exception in notebook
|
163 |
+
Throw exception when failed
|
164 |
+
"""
|
165 |
+
res = self.api.kernels_status(self.kernel_slug)
|
166 |
+
print(f"Status: {res}")
|
167 |
+
if res is None:
|
168 |
+
if self.id is not None:
|
169 |
+
sheet_logger.update_job_status(self.id, notebook_status='None')
|
170 |
+
return None
|
171 |
+
if self.id is not None:
|
172 |
+
sheet_logger.update_job_status(self.id, notebook_status=res['status'])
|
173 |
+
return res['status']
|
174 |
+
|
175 |
+
def _get_local_nb_path(self) -> str:
|
176 |
+
return os.path.join(self.container_path, self.kernel_slug)
|
177 |
+
|
178 |
+
def pull(self, path=None) -> str or None:
|
179 |
+
"""
|
180 |
+
|
181 |
+
:param path:
|
182 |
+
:return:
|
183 |
+
:raises: ApiException if notebook not found or not share to user
|
184 |
+
"""
|
185 |
+
self._clean()
|
186 |
+
path = path or self._get_local_nb_path()
|
187 |
+
metadata_path = os.path.join(path, "kernel-metadata.json")
|
188 |
+
res = self.api.kernels_pull(self.kernel_slug, path=path, metadata=True, quiet=False)
|
189 |
+
if not os.path.exists(metadata_path):
|
190 |
+
print(f"Warn: Not found {metadata_path}. Clean {path}")
|
191 |
+
self._clean()
|
192 |
+
return None
|
193 |
+
return res
|
194 |
+
|
195 |
+
def push(self, path=None) -> str or None:
|
196 |
+
status = self.status()
|
197 |
+
if status in ['queued', 'running']:
|
198 |
+
print("Warn: Notebook is " + status + ". Skip push notebook!")
|
199 |
+
return None
|
200 |
+
|
201 |
+
self.api.kernels_push(path or self._get_local_nb_path())
|
202 |
+
time.sleep(1)
|
203 |
+
status = self.status()
|
204 |
+
return status
|
205 |
+
|
206 |
+
def _clean(self) -> None:
|
207 |
+
if os.path.exists(self._get_local_nb_path()):
|
208 |
+
shutil.rmtree(self._get_local_nb_path())
|
209 |
+
|
210 |
+
def get_metadata(self, path=None):
|
211 |
+
path = path or self._get_local_nb_path()
|
212 |
+
metadata_path = os.path.join(path, "kernel-metadata.json")
|
213 |
+
if not os.path.exists(metadata_path):
|
214 |
+
return None
|
215 |
+
return json.loads(open(metadata_path).read())
|
216 |
+
|
217 |
+
def check_nb_permission(self) -> Union[tuple[bool], tuple[bool, None]]:
|
218 |
+
status = self.status() # raise ApiException
|
219 |
+
if status is None:
|
220 |
+
return False, status
|
221 |
+
return True, status
|
222 |
+
|
223 |
+
|
224 |
+
def check_datasets_permission(self) -> bool:
|
225 |
+
meta = self.get_metadata()
|
226 |
+
if meta is None:
|
227 |
+
print("Warn: cannot get metadata. Pull and try again?")
|
228 |
+
dataset_sources = meta['dataset_sources']
|
229 |
+
ex_list = []
|
230 |
+
slugs = []
|
231 |
+
for dataset in dataset_sources:
|
232 |
+
try:
|
233 |
+
self.api.dataset_status(dataset)
|
234 |
+
except ApiException as e:
|
235 |
+
print(f"Error: {e.status} {e.reason} with dataset {dataset} in notebook {self.kernel_slug}")
|
236 |
+
ex_list.append(e)
|
237 |
+
slugs.append(self.kernel_slug)
|
238 |
+
# return False
|
239 |
+
if len(ex_list) > 0:
|
240 |
+
raise ValidateException.from_api_exception_list(ex_list, slugs)
|
241 |
+
return True
|
242 |
+
|
243 |
+
|
244 |
+
class AccountTransactionManager:
|
245 |
+
def __init__(self, acc_secret_dict: dict=None):
|
246 |
+
"""
|
247 |
+
:param acc_secret_dict: {username: secret}
|
248 |
+
"""
|
249 |
+
self.acc_secret_dict = acc_secret_dict
|
250 |
+
if self.acc_secret_dict is None:
|
251 |
+
self.acc_secret_dict = {}
|
252 |
+
# self.api_dict = {username: KaggleApiWrapper(username, secret) for username, secret in acc_secret_dict.items()}
|
253 |
+
# lock for each account to avoid concurrent use api
|
254 |
+
self.lock_dict = {username: False for username in self.acc_secret_dict.keys()}
|
255 |
+
self.state_lock = threading.Lock()
|
256 |
+
|
257 |
+
def _get_api(self, username: str) -> KaggleApiWrapper:
|
258 |
+
# return self.api_dict[username]
|
259 |
+
return KaggleApiWrapper(username, self.acc_secret_dict[username])
|
260 |
+
|
261 |
+
def _get_lock(self, username: str) -> bool:
|
262 |
+
return self.lock_dict[username]
|
263 |
+
|
264 |
+
def _set_lock(self, username: str, lock: bool) -> None:
|
265 |
+
self.lock_dict[username] = lock
|
266 |
+
|
267 |
+
def add_account(self, username, secret):
|
268 |
+
if username not in self.acc_secret_dict.keys():
|
269 |
+
self.state_lock.acquire()
|
270 |
+
self.acc_secret_dict[username] = secret
|
271 |
+
self.lock_dict[username] = False
|
272 |
+
self.state_lock.release()
|
273 |
+
|
274 |
+
def remove_account(self, username):
|
275 |
+
if username in self.acc_secret_dict.keys():
|
276 |
+
self.state_lock.acquire()
|
277 |
+
del self.acc_secret_dict[username]
|
278 |
+
del self.lock_dict[username]
|
279 |
+
self.state_lock.release()
|
280 |
+
else:
|
281 |
+
print(f"Warn: try to remove account not in the list: {username}, list: {self.acc_secret_dict.keys()}")
|
282 |
+
|
283 |
+
def get_unlocked_api_unblocking(self, username_list: List[str]) -> tuple[KaggleApiWrapper, Callable[[], None]]:
|
284 |
+
"""
|
285 |
+
:param username_list: list of username
|
286 |
+
:return: (api, release) where release is a function to release api
|
287 |
+
"""
|
288 |
+
while True:
|
289 |
+
print("get_unlocked_api_unblocking" + str(username_list))
|
290 |
+
for username in username_list:
|
291 |
+
self.state_lock.acquire()
|
292 |
+
if not self._get_lock(username):
|
293 |
+
self._set_lock(username, True)
|
294 |
+
api = self._get_api(username)
|
295 |
+
|
296 |
+
def release():
|
297 |
+
self.state_lock.acquire()
|
298 |
+
self._set_lock(username, False)
|
299 |
+
api.__del__()
|
300 |
+
self.state_lock.release()
|
301 |
+
|
302 |
+
self.state_lock.release()
|
303 |
+
return api, release
|
304 |
+
self.state_lock.release()
|
305 |
+
time.sleep(1)
|
306 |
+
|
307 |
+
|
308 |
+
class NbJob:
|
309 |
+
def __init__(self, acc_dict: dict, nb_slug: str, rerun_stt: List[str] = None, not_rerun_stt: List[str] = None, id=None):
|
310 |
+
"""
|
311 |
+
|
312 |
+
:param acc_dict:
|
313 |
+
:param nb_slug:
|
314 |
+
:param rerun_stt:
|
315 |
+
:param not_rerun_stt: If notebook status in this list, do not rerun it. (Note: do not add "queued", "running")
|
316 |
+
"""
|
317 |
+
self.rerun_stt = rerun_stt
|
318 |
+
if self.rerun_stt is None:
|
319 |
+
self.rerun_stt = ['complete']
|
320 |
+
self.not_rerun_stt = not_rerun_stt
|
321 |
+
|
322 |
+
if self.not_rerun_stt is None:
|
323 |
+
self.not_rerun_stt = ['queued', 'running', 'cancelAcknowledged']
|
324 |
+
assert "queued" in self.not_rerun_stt
|
325 |
+
assert "running" in self.not_rerun_stt
|
326 |
+
|
327 |
+
self.acc_dict = acc_dict
|
328 |
+
self.nb_slug = nb_slug
|
329 |
+
self.id = id
|
330 |
+
|
331 |
+
def get_acc_dict(self):
|
332 |
+
return self.acc_dict
|
333 |
+
|
334 |
+
def get_username_list(self):
|
335 |
+
return list(self.acc_dict.keys())
|
336 |
+
|
337 |
+
def is_valid_with_acc(self, api):
|
338 |
+
"""
|
339 |
+
:param api:
|
340 |
+
:return:
|
341 |
+
:raise: ValidationException
|
342 |
+
"""
|
343 |
+
notebook = KaggleNotebook(api, self.nb_slug, id=self.id)
|
344 |
+
|
345 |
+
try:
|
346 |
+
notebook.pull() # raise ApiException
|
347 |
+
stt, _ = notebook.check_nb_permission() # note: raise ApiException
|
348 |
+
stt = notebook.check_datasets_permission() # raise ValidationException
|
349 |
+
except ApiException as e:
|
350 |
+
raise ValidateException.from_api_exception(e, self.nb_slug)
|
351 |
+
# if not stt:
|
352 |
+
# return False
|
353 |
+
return True
|
354 |
+
|
355 |
+
def is_valid(self):
|
356 |
+
for username in self.acc_dict.keys():
|
357 |
+
secrets = self.acc_dict[username]
|
358 |
+
api = KaggleApiWrapper(username=username, secret=secrets)
|
359 |
+
api.authenticate()
|
360 |
+
if not self.is_valid_with_acc(api):
|
361 |
+
return False
|
362 |
+
return True
|
363 |
+
|
364 |
+
def acc_check_and_rerun_if_need(self, api: KaggleApi) -> bool:
|
365 |
+
"""
|
366 |
+
:return:
|
367 |
+
True if rerun success or notebook is running
|
368 |
+
False user does not have enough gpu quotas
|
369 |
+
:raises
|
370 |
+
Exception if setup error
|
371 |
+
"""
|
372 |
+
notebook = KaggleNotebook(api, self.nb_slug, "./tmp", id=self.id) # todo: change hardcode container_path here
|
373 |
+
|
374 |
+
notebook.pull()
|
375 |
+
assert notebook.check_datasets_permission(), f"User {api} does not have permission on datasets of notebook {self.nb_slug}"
|
376 |
+
success, status1 = notebook.check_nb_permission()
|
377 |
+
assert success, f"User {api} does not have permission on notebook {self.nb_slug}" # todo: using api.username
|
378 |
+
|
379 |
+
if status1 in self.rerun_stt:
|
380 |
+
status2 = notebook.push()
|
381 |
+
time.sleep(10)
|
382 |
+
status3 = notebook.status()
|
383 |
+
|
384 |
+
# if 3 times same stt -> acc out of quota
|
385 |
+
if status1 == status2 == status3:
|
386 |
+
sheet_logger.log(username=api.username, nb=self.nb_slug, log="Try but no effect. Seem account to be out of quota")
|
387 |
+
return False
|
388 |
+
if status3 in self.not_rerun_stt:
|
389 |
+
# sheet_logger.log(username=api.username, nb=self.nb_slug, log=f"Notebook status is {status3} is in ignore status list {self.not_rerun_stt}, do nothing!")
|
390 |
+
sheet_logger.log(username=api.username, nb=self.nb_slug,
|
391 |
+
log=f"Schedule notebook successfully. Current status is '{status3}'")
|
392 |
+
return True
|
393 |
+
if status3 not in ["queued", "running"]:
|
394 |
+
# return False # todo: check when user is out of quota
|
395 |
+
print(f"Error: status is {status3}")
|
396 |
+
|
397 |
+
raise Exception("Setup exception")
|
398 |
+
|
399 |
+
return True
|
400 |
+
|
401 |
+
sheet_logger.log(username=api.username, nb=self.nb_slug, log=f"Notebook status is '{status1}' is not in {self.rerun_stt}, do nothing!")
|
402 |
+
return True
|
403 |
+
|
404 |
+
@staticmethod
|
405 |
+
def from_dict(obj: dict, id=None):
|
406 |
+
return NbJob(acc_dict=obj['accounts'], nb_slug=obj['slug'], rerun_stt=obj.get('rerun_status'), not_rerun_stt=obj.get('not_rerun_stt'), id=id)
|
407 |
+
|
408 |
+
|
409 |
+
class KernelRerunService:
|
410 |
+
def __init__(self):
|
411 |
+
self.jobs: Dict[str, NbJob] = {}
|
412 |
+
self.acc_manager = AccountTransactionManager()
|
413 |
+
self.username2jobid = {}
|
414 |
+
self.jobid2username = {}
|
415 |
+
|
416 |
+
def add_job(self, nb_job: NbJob):
|
417 |
+
if nb_job.nb_slug in self.jobs.keys():
|
418 |
+
print("Warn: nb_job already in job list")
|
419 |
+
return
|
420 |
+
self.jobs[nb_job.nb_slug] = nb_job
|
421 |
+
self.jobid2username[nb_job.nb_slug] = nb_job.get_username_list()
|
422 |
+
for username in nb_job.get_username_list():
|
423 |
+
if username not in self.username2jobid.keys():
|
424 |
+
self.username2jobid[username] = []
|
425 |
+
self.acc_manager.add_account(username, nb_job.acc_dict[username])
|
426 |
+
self.username2jobid[username].append(nb_job.nb_slug)
|
427 |
+
|
428 |
+
def remove_job(self, nb_job):
|
429 |
+
if nb_job.nb_slug not in self.jobs.keys():
|
430 |
+
print("Warn: try to remove nb_job not in list")
|
431 |
+
return
|
432 |
+
username_list = self.jobid2username[nb_job.nb_slug]
|
433 |
+
username_list = [username for username in username_list if len(self.username2jobid[username]) == 1]
|
434 |
+
|
435 |
+
for username in username_list:
|
436 |
+
del self.username2jobid[username]
|
437 |
+
self.acc_manager.remove_account(username)
|
438 |
+
del self.jobs[nb_job.nb_slug]
|
439 |
+
del self.jobid2username[nb_job.nb_slug]
|
440 |
+
|
441 |
+
def validate_all(self):
|
442 |
+
for username in self.acc_manager.acc_secret_dict.keys():
|
443 |
+
api, release = self.acc_manager.get_unlocked_api_unblocking([username])
|
444 |
+
api.authenticate()
|
445 |
+
print(f"Using username: {api.username}")
|
446 |
+
|
447 |
+
for job in self.jobs.values():
|
448 |
+
ex_msg_list = []
|
449 |
+
if username in job.get_username_list():
|
450 |
+
print(f"Validate user: {username}, job: {job.nb_slug}")
|
451 |
+
|
452 |
+
try:
|
453 |
+
job.is_valid_with_acc(api)
|
454 |
+
except ValidateException as e:
|
455 |
+
print(f"Error: not valid")
|
456 |
+
a = f"Setup error: {username} does not have permission on notebook {job.nb_slug} or related datasets"
|
457 |
+
if job.id is not None: # if have id, write log
|
458 |
+
ex_msg_list.append(f"Account {username}\n" + str(e) + "\n")
|
459 |
+
else: # if not have id, raise
|
460 |
+
raise Exception(a)
|
461 |
+
if len(ex_msg_list) > 0:
|
462 |
+
sheet_logger.update_job_status(job.id, validate_status="\n".join(ex_msg_list))
|
463 |
+
else:
|
464 |
+
sheet_logger.update_job_status(job.id, validate_status="success")
|
465 |
+
|
466 |
+
release()
|
467 |
+
return True
|
468 |
+
|
469 |
+
def status_all(self):
|
470 |
+
for job in self.jobs.values():
|
471 |
+
print(f"Job: {job.nb_slug}")
|
472 |
+
api, release = self.acc_manager.get_unlocked_api_unblocking(job.get_username_list())
|
473 |
+
api.authenticate()
|
474 |
+
print(f"Using username: {api.username}")
|
475 |
+
notebook = KaggleNotebook(api, job.nb_slug, id=job.id)
|
476 |
+
print(f"Notebook: {notebook.kernel_slug}")
|
477 |
+
print(notebook.status())
|
478 |
+
release()
|
479 |
+
|
480 |
+
def run(self, nb_job: NbJob):
|
481 |
+
username_list = copy.copy(nb_job.get_username_list())
|
482 |
+
while len(username_list) > 0:
|
483 |
+
api, release = self.acc_manager.get_unlocked_api_unblocking(username_list)
|
484 |
+
api.authenticate()
|
485 |
+
print(f"Using username: {api.username}")
|
486 |
+
|
487 |
+
try:
|
488 |
+
result = nb_job.acc_check_and_rerun_if_need(api)
|
489 |
+
if result:
|
490 |
+
return True
|
491 |
+
except Exception as e:
|
492 |
+
print(e)
|
493 |
+
release()
|
494 |
+
break
|
495 |
+
|
496 |
+
if api.username in username_list:
|
497 |
+
username_list.remove(api.username)
|
498 |
+
release()
|
499 |
+
else:
|
500 |
+
release()
|
501 |
+
raise Exception("")
|
502 |
+
return False
|
503 |
+
|
504 |
+
def run_all(self):
|
505 |
+
for job in self.jobs.values():
|
506 |
+
success = self.run(job)
|
507 |
+
print(f"Job: {job.nb_slug} {success}")
|
508 |
+
|
509 |
+
|
510 |
+
# if __name__ == "__main__":
|
511 |
+
# service = KernelRerunService()
|
512 |
+
# files = os.listdir("./config")
|
513 |
+
# for file in files:
|
514 |
+
# if '.example' not in file:
|
515 |
+
# with open(os.path.join("./config", file), "r") as f:
|
516 |
+
# obj = json.loads(f.read())
|
517 |
+
# print(obj)
|
518 |
+
# service.add_job(NbJob.from_dict(obj))
|
519 |
+
# service.run_all()
|
520 |
+
|
521 |
+
# try:
|
522 |
+
# acc_secret_dict = {
|
523 |
+
# "hahunavth": "secret",
|
524 |
+
# "hahunavth2": "secret",
|
525 |
+
# "hahunavth3": "secret",
|
526 |
+
# "hahunavth4": "secret",
|
527 |
+
# "hahunavth5": "secret",
|
528 |
+
# }
|
529 |
+
# acc_manager = AccountTransactionManager(acc_secret_dict)
|
530 |
+
#
|
531 |
+
#
|
532 |
+
# def test1():
|
533 |
+
# username_list = ["hahunavth", "hahunavth2", "hahunavth3", "hahunavth4", "hahunavth5"]
|
534 |
+
# while len(username_list) > 0:
|
535 |
+
# api, release = acc_manager.get_unlocked_api_unblocking(username_list)
|
536 |
+
# print("test1 is using " + api.username)
|
537 |
+
# time.sleep(1)
|
538 |
+
# release()
|
539 |
+
# if api.username in username_list:
|
540 |
+
# username_list.remove(api.username)
|
541 |
+
# else:
|
542 |
+
# raise Exception("")
|
543 |
+
# print("test1 release " + api.username)
|
544 |
+
#
|
545 |
+
#
|
546 |
+
# def test2():
|
547 |
+
# username_list = ["hahunavth2", "hahunavth3", "hahunavth5"]
|
548 |
+
# while len(username_list) > 0:
|
549 |
+
# api, release = acc_manager.get_unlocked_api_unblocking(username_list)
|
550 |
+
# print("test2 is using " + api.username)
|
551 |
+
# time.sleep(3)
|
552 |
+
# release()
|
553 |
+
# if api.username in username_list:
|
554 |
+
# username_list.remove(api.username)
|
555 |
+
# else:
|
556 |
+
# raise Exception("")
|
557 |
+
# print("test2 release " + api.username)
|
558 |
+
#
|
559 |
+
#
|
560 |
+
# t1 = threading.Thread(target=test1)
|
561 |
+
# t2 = threading.Thread(target=test2)
|
562 |
+
# t1.start()
|
563 |
+
# t2.start()
|
564 |
+
# t1.join()
|
565 |
+
# t2.join()
|
566 |
+
#
|
567 |
+
# # kgapi = KaggleApiWrapper("hahunavth", "fb3d65ea4d06f91a83cf571e9a39d40d")
|
568 |
+
# # kgapi.authenticate()
|
569 |
+
# # # kgapi = get_api()
|
570 |
+
# # notebook = KaggleNotebook(kgapi, "hahunavth/ess-vlsp2023-denoising", "./tmp")
|
571 |
+
# # # print(notebook.pull())
|
572 |
+
# # # print(notebook.check_datasets_permission())
|
573 |
+
# # print(notebook.check_nb_permission())
|
574 |
+
# # # print(notebook.status())
|
575 |
+
# # # notebook.push()
|
576 |
+
# # # print(notebook.status())
|
577 |
+
# except ApiException as e:
|
578 |
+
# print(e.status)
|
579 |
+
# print(e.reason)
|
580 |
+
# raise e
|
581 |
+
# # 403 when nb not exists or not share to acc
|
582 |
+
# # 404 when push to unknow kenel_slug.username
|
583 |
# # 401 when invalid username, pass
|
logger.py
CHANGED
@@ -1,53 +1,59 @@
|
|
1 |
-
import platform,socket,re,uuid,json,psutil,logging
|
2 |
-
from datetime import datetime as dt
|
3 |
-
from google_sheet import log_repo, conf_repo, SheetCRUDRepository
|
4 |
-
from version import VERSION
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
info
|
16 |
-
info['
|
17 |
-
info['
|
18 |
-
info['
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
self.config_repo.
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
sheet_logger.update_job_status(5, "abc" , )
|
|
|
1 |
+
import platform,socket,re,uuid,json,psutil,logging
|
2 |
+
from datetime import datetime as dt
|
3 |
+
from google_sheet import log_repo, conf_repo, SheetCRUDRepository
|
4 |
+
from version import VERSION
|
5 |
+
import pytz
|
6 |
+
|
7 |
+
|
8 |
+
def get_now():
|
9 |
+
current_time = dt.now(pytz.timezone('Asia/Ho_Chi_Minh'))
|
10 |
+
return current_time
|
11 |
+
|
12 |
+
|
13 |
+
def get_sys_info():
|
14 |
+
try:
|
15 |
+
info={}
|
16 |
+
info['platform']=platform.system()
|
17 |
+
info['platform-release']=platform.release()
|
18 |
+
info['platform-version']=platform.version()
|
19 |
+
info['architecture']=platform.machine()
|
20 |
+
info['hostname']=socket.gethostname()
|
21 |
+
info['ip-address']=socket.gethostbyname(socket.gethostname())
|
22 |
+
info['mac-address']=':'.join(re.findall('..', '%012x' % uuid.getnode()))
|
23 |
+
info['processor']=platform.processor()
|
24 |
+
info['ram']=str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB"
|
25 |
+
return json.dumps(info)
|
26 |
+
except Exception as e:
|
27 |
+
logging.exception(e)
|
28 |
+
|
29 |
+
|
30 |
+
class SheetLogger:
|
31 |
+
def __init__(self, log_repo: SheetCRUDRepository, config_repo: SheetCRUDRepository):
|
32 |
+
self.log_repo = log_repo
|
33 |
+
self.config_repo = config_repo
|
34 |
+
|
35 |
+
def log(self, log='', nb='', username=''):
|
36 |
+
self.log_repo.create({
|
37 |
+
"time": str(get_now()),
|
38 |
+
"notebook_name": nb,
|
39 |
+
"kaggle_username": username,
|
40 |
+
"log": log,
|
41 |
+
"device": str(get_sys_info()),
|
42 |
+
"version": VERSION
|
43 |
+
})
|
44 |
+
|
45 |
+
def update_job_status(self, row, validate_status: str = None, notebook_status: str = None):
|
46 |
+
data = self.config_repo.read(row)
|
47 |
+
data.update({"last_updated": str(get_now())})
|
48 |
+
if validate_status is not None:
|
49 |
+
data.update({"validate_status": validate_status})
|
50 |
+
if notebook_status is not None:
|
51 |
+
data.update({"notebook_status": notebook_status})
|
52 |
+
self.config_repo.update(row, data)
|
53 |
+
# print(self.config_repo.find({"config": "hahunavth/vlsp-sv-2023-s2pecnet-train"}))
|
54 |
+
|
55 |
+
|
56 |
+
sheet_logger = SheetLogger(log_repo, conf_repo)
|
57 |
+
|
58 |
+
if __name__ == "__main__":
|
59 |
sheet_logger.update_job_status(5, "abc" , )
|
main.py
CHANGED
@@ -1,99 +1,109 @@
|
|
1 |
-
from
|
2 |
-
|
3 |
-
|
4 |
-
from
|
5 |
-
from
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Annotated
|
2 |
+
|
3 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
+
from fastapi.encoders import jsonable_encoder
|
5 |
+
from fastapi.exceptions import RequestValidationError
|
6 |
+
from starlette.middleware.cors import CORSMiddleware
|
7 |
+
from fastapi import FastAPI, Header, UploadFile, Depends, HTTPException, status
|
8 |
+
import base64
|
9 |
+
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
10 |
+
from starlette.responses import JSONResponse
|
11 |
+
from collections import defaultdict
|
12 |
+
from pydantic import BaseModel
|
13 |
+
from threading import Lock
|
14 |
+
|
15 |
+
from logger import get_now
|
16 |
+
from run import main as run_main
|
17 |
+
|
18 |
+
START_AT = get_now()
|
19 |
+
app = FastAPI()
|
20 |
+
|
21 |
+
lock = Lock()
|
22 |
+
n_run = 0
|
23 |
+
last_run = None
|
24 |
+
is_running=False
|
25 |
+
def scheduled_job():
|
26 |
+
|
27 |
+
with lock:
|
28 |
+
global is_running
|
29 |
+
if is_running:
|
30 |
+
return False
|
31 |
+
is_running = True
|
32 |
+
|
33 |
+
print("Job is running!")
|
34 |
+
run_main()
|
35 |
+
|
36 |
+
with lock:
|
37 |
+
global n_run
|
38 |
+
n_run = n_run + 1
|
39 |
+
global last_run
|
40 |
+
last_run = get_now()
|
41 |
+
is_running = False
|
42 |
+
return True
|
43 |
+
|
44 |
+
# Create a scheduler
|
45 |
+
scheduler = BackgroundScheduler()
|
46 |
+
|
47 |
+
# Add the scheduled job to the scheduler
|
48 |
+
scheduler.add_job(scheduled_job, 'interval', minutes=30)
|
49 |
+
|
50 |
+
# Start the scheduler
|
51 |
+
scheduler.start()
|
52 |
+
|
53 |
+
# You can also stop the scheduler when the FastAPI application shuts down
|
54 |
+
@app.on_event("shutdown")
|
55 |
+
def shutdown_event():
|
56 |
+
scheduler.shutdown()
|
57 |
+
|
58 |
+
|
59 |
+
class BaseResponse(BaseModel):
|
60 |
+
status: int = 1
|
61 |
+
message: str = ""
|
62 |
+
result: object = None
|
63 |
+
|
64 |
+
@app.exception_handler(HTTPException)
|
65 |
+
async def http_exception_handler(request, exc: HTTPException):
|
66 |
+
return JSONResponse(
|
67 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
68 |
+
content=jsonable_encoder(BaseResponse(status=0, message=exc.detail))
|
69 |
+
)
|
70 |
+
|
71 |
+
@app.exception_handler(RequestValidationError)
|
72 |
+
def validation_exception_handler(request, exc: RequestValidationError) -> JSONResponse:
|
73 |
+
reformatted_message = defaultdict(list)
|
74 |
+
for pydantic_error in exc.errors():
|
75 |
+
loc, msg = pydantic_error["loc"], pydantic_error["msg"]
|
76 |
+
filtered_loc = loc[1:] if loc[0] in ("body", "query", "path") else loc
|
77 |
+
field_string = ".".join(filtered_loc)
|
78 |
+
reformatted_message[field_string].append(msg)
|
79 |
+
|
80 |
+
return JSONResponse(
|
81 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
82 |
+
content=jsonable_encoder(BaseResponse(status=0, message="Invalid request", result=reformatted_message))
|
83 |
+
)
|
84 |
+
|
85 |
+
@app.get("/status", response_model=BaseResponse)
|
86 |
+
def status():
|
87 |
+
return BaseResponse(result={
|
88 |
+
"start_at": START_AT,
|
89 |
+
"current": get_now(),
|
90 |
+
"n_runs": n_run,
|
91 |
+
"last_run": last_run,
|
92 |
+
})
|
93 |
+
|
94 |
+
@app.get("/run")
|
95 |
+
def run_once():
|
96 |
+
print("Running the job once.")
|
97 |
+
success = scheduled_job() # Manually trigger the job
|
98 |
+
if not success:
|
99 |
+
return BaseResponse(message="Job is running, not start a new job")
|
100 |
+
return BaseResponse(message="Job executed once.")
|
101 |
+
|
102 |
+
|
103 |
+
app.add_middleware(
|
104 |
+
CORSMiddleware,
|
105 |
+
allow_origins=["*"],
|
106 |
+
allow_credentials=True,
|
107 |
+
allow_methods=["*"],
|
108 |
+
allow_headers=["*"],
|
109 |
+
)
|
requirements.txt
CHANGED
@@ -1,3 +1,8 @@
|
|
1 |
-
kaggle
|
2 |
-
gspread
|
3 |
-
oauth2client
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
kaggle
|
2 |
+
gspread
|
3 |
+
oauth2client
|
4 |
+
pytz
|
5 |
+
apscheduler
|
6 |
+
fastapi
|
7 |
+
starlette
|
8 |
+
pydantic
|
run.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from google_sheet import conf_repo, secret_repo
|
2 |
+
import json
|
3 |
+
|
4 |
+
from kaggle_service import KernelRerunService, NbJob
|
5 |
+
from logger import sheet_logger, get_now
|
6 |
+
from run_stt import run_stt_service
|
7 |
+
|
8 |
+
|
9 |
+
def get_secret_dict():
|
10 |
+
# load all account and secret into a dict
|
11 |
+
secret_dict = {}
|
12 |
+
try:
|
13 |
+
for i in range(2, 50): # note: read row 2 - 50
|
14 |
+
rs = secret_repo.read(i)
|
15 |
+
if not rs:
|
16 |
+
break
|
17 |
+
assert 'username' in rs
|
18 |
+
assert 'secret' in rs
|
19 |
+
|
20 |
+
username = rs['username'].rstrip()
|
21 |
+
secret = rs['secret'].rstrip()
|
22 |
+
|
23 |
+
secret_dict[username] = secret
|
24 |
+
except Exception as e:
|
25 |
+
sheet_logger.log(log="Get secret table failed!!" + str(e))
|
26 |
+
return secret_dict
|
27 |
+
|
28 |
+
|
29 |
+
def get_config_list(secret_dict):
|
30 |
+
configs = []
|
31 |
+
ids = []
|
32 |
+
try:
|
33 |
+
for i in range(2, 50): # note: read row 2 - 50
|
34 |
+
rs = conf_repo.read(i)
|
35 |
+
if not rs:
|
36 |
+
break
|
37 |
+
if not rs['config']:
|
38 |
+
reset_keys = []
|
39 |
+
for key in rs:
|
40 |
+
if rs[key]:
|
41 |
+
reset_keys.append(key)
|
42 |
+
if len(reset_keys) > 0:
|
43 |
+
conf_repo.update(row_index=i, data={k: '' for k in reset_keys})
|
44 |
+
break
|
45 |
+
print(i, rs)
|
46 |
+
try:
|
47 |
+
# validation
|
48 |
+
# assert 'config' in rs, 'require config column'
|
49 |
+
assert 'usernames' in rs, 'require usernames column'
|
50 |
+
assert 'enable' in rs, 'require enable column'
|
51 |
+
|
52 |
+
assert rs['enable'].rstrip() in ['enable', 'disable'], rs['enable']
|
53 |
+
assert 'slug' in rs['config'], 'require config.slug'
|
54 |
+
|
55 |
+
cfg = json.loads(rs['config'])
|
56 |
+
usernames = set(rs['usernames']
|
57 |
+
.rstrip().replace('\n', ' ').replace(',', ' ').replace(';', ' ').replace('|', ' ')
|
58 |
+
.split(' '))
|
59 |
+
usernames = [u for u in usernames if u]
|
60 |
+
is_enabled = rs['enable'].rstrip() == 'enable'
|
61 |
+
if not is_enabled:
|
62 |
+
sheet_logger.log(log="Disabled, skip!", nb=cfg['slug'])
|
63 |
+
print("skip ", cfg['slug'])
|
64 |
+
continue
|
65 |
+
|
66 |
+
assert len(usernames) > 0, 'len usernames == 0'
|
67 |
+
|
68 |
+
# process config
|
69 |
+
accounts = {u: secret_dict[u] for u in secret_dict if u in usernames}
|
70 |
+
assert not set(usernames).difference(set(accounts.keys())), set(usernames).difference(set(accounts.keys()))
|
71 |
+
cfg = {**cfg, "accounts": accounts}
|
72 |
+
|
73 |
+
# save
|
74 |
+
configs.append(cfg)
|
75 |
+
ids.append(i)
|
76 |
+
print(cfg)
|
77 |
+
except AssertionError:
|
78 |
+
import traceback
|
79 |
+
sheet_logger.update_job_status(i, validate_status=str(traceback.format_exc()))
|
80 |
+
except Exception:
|
81 |
+
import traceback
|
82 |
+
sheet_logger.log(log="Get config failed!!" + str(traceback.format_exc()))
|
83 |
+
return configs, ids
|
84 |
+
|
85 |
+
|
86 |
+
def create_service(configs, ids):
|
87 |
+
service = KernelRerunService()
|
88 |
+
for config, idx in zip(configs, ids):
|
89 |
+
try:
|
90 |
+
service.add_job(NbJob.from_dict(config, id=idx))
|
91 |
+
except Exception as e:
|
92 |
+
sheet_logger.update_job_status(idx, validate_status=str(e))
|
93 |
+
return service
|
94 |
+
|
95 |
+
|
96 |
+
def main():
|
97 |
+
sheet_logger.log("========= start ==========")
|
98 |
+
|
99 |
+
obj = run_stt_service.get_obj()
|
100 |
+
if run_stt_service.is_need_to_run(obj):
|
101 |
+
# start
|
102 |
+
run_stt_service.set_is_running({})
|
103 |
+
|
104 |
+
secret_dict = get_secret_dict()
|
105 |
+
|
106 |
+
# load config from gg sheet file
|
107 |
+
configs, ids = get_config_list(secret_dict)
|
108 |
+
|
109 |
+
# add config to service
|
110 |
+
service = create_service(configs, ids)
|
111 |
+
|
112 |
+
# run service
|
113 |
+
try:
|
114 |
+
service.validate_all()
|
115 |
+
# service.status_all()
|
116 |
+
service.run_all()
|
117 |
+
except Exception as e:
|
118 |
+
sheet_logger.log(log=str(e))
|
119 |
+
|
120 |
+
# done
|
121 |
+
run_stt_service.set_run_done()
|
122 |
+
|
123 |
+
else:
|
124 |
+
sheet_logger.log(f"Current time is {get_now()} is not after {obj.get('last_run')} {obj.get('auto_run_after_last_run')} or not require new run")
|
125 |
+
|
126 |
+
sheet_logger.log("========= end ==========")
|
127 |
+
|
128 |
+
|
129 |
+
if __name__ == "__main__":
|
130 |
+
main()
|
run_stt.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from google_sheet import run_stt_repo as _run_stt_repo, Converter
|
2 |
+
from logger import get_now
|
3 |
+
from dateutil import parser
|
4 |
+
import datetime as dt
|
5 |
+
|
6 |
+
|
7 |
+
class RunSttService:
|
8 |
+
def __init__(self, run_stt_repo):
|
9 |
+
self.run_stt_repo = run_stt_repo
|
10 |
+
|
11 |
+
def get_row(self):
|
12 |
+
config = self.run_stt_repo.read(2)
|
13 |
+
return config
|
14 |
+
|
15 |
+
def get_obj(self):
|
16 |
+
return Converter.convert_to_obj(self.get_row())
|
17 |
+
|
18 |
+
def is_need_to_run(self, obj):
|
19 |
+
is_running = obj['is_running']
|
20 |
+
if is_running:
|
21 |
+
return False
|
22 |
+
if obj['require_new_run']:
|
23 |
+
return True
|
24 |
+
last_run = parser.parse(obj['last_run'])
|
25 |
+
duration = obj['auto_run_after_last_run']
|
26 |
+
if not last_run:
|
27 |
+
return True
|
28 |
+
if not duration:
|
29 |
+
return False
|
30 |
+
# print(duration)
|
31 |
+
if get_now() - last_run > dt.timedelta(**duration):
|
32 |
+
return True
|
33 |
+
return False
|
34 |
+
|
35 |
+
def get_const_cfg(self, const_keys=['require_new_run', 'auto_run_after_last_run']):
|
36 |
+
row = self.get_row()
|
37 |
+
row = {k: row[k] for k in row if k in const_keys}
|
38 |
+
return row
|
39 |
+
|
40 |
+
def set_is_running(self, obj):
|
41 |
+
obj['last_run'] = str(get_now())
|
42 |
+
obj['is_running'] = True
|
43 |
+
obj = {**obj, **self.get_const_cfg()}
|
44 |
+
row = Converter.convert_to_row(obj)
|
45 |
+
print(row)
|
46 |
+
self.run_stt_repo.update(row_index=2, data=row)
|
47 |
+
|
48 |
+
def set_run_done(self):
|
49 |
+
obj = {
|
50 |
+
'is_running': False,
|
51 |
+
"require_new_run": False,
|
52 |
+
**self.get_const_cfg(['auto_run_after_last_run', 'last_run'])
|
53 |
+
}
|
54 |
+
row = Converter.convert_to_row(obj)
|
55 |
+
self.run_stt_repo.update(row_index=2, data=row)
|
56 |
+
|
57 |
+
|
58 |
+
run_stt_service = RunSttService(_run_stt_repo)
|
59 |
+
|
60 |
+
|
61 |
+
# if __name__ == '__main__':
|
62 |
+
# run_stt_service = RunSttService(_run_stt_repo)
|
63 |
+
# obj = run_stt_service.get_obj()
|
64 |
+
# print(run_stt_service.is_need_to_run(obj))
|
65 |
+
# run_stt_service.set_is_running({})
|
66 |
+
# import time
|
67 |
+
# time.sleep(10)
|
68 |
+
# run_stt_service.set_run_done()
|
test.ipynb
CHANGED
@@ -1,160 +1,160 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 1,
|
6 |
-
"id": "initial_id",
|
7 |
-
"metadata": {
|
8 |
-
"collapsed": true,
|
9 |
-
"ExecuteTime": {
|
10 |
-
"end_time": "2023-11-05T00:47:48.131663100Z",
|
11 |
-
"start_time": "2023-11-05T00:47:35.527891600Z"
|
12 |
-
}
|
13 |
-
},
|
14 |
-
"outputs": [
|
15 |
-
{
|
16 |
-
"name": "stdout",
|
17 |
-
"output_type": "stream",
|
18 |
-
"text": [
|
19 |
-
"Collecting kaggle\r\n",
|
20 |
-
" Downloading kaggle-1.5.16.tar.gz (83 kB)\r\n",
|
21 |
-
"\u001B[2K \u001B[90mβββββββββββββββββββββββββββββββββββββββ\u001B[0m \u001B[32m83.6/83.6 kB\u001B[0m \u001B[31m444.7 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m0:01\u001B[0m00:01\u001B[0m\r\n",
|
22 |
-
"\u001B[?25h Preparing metadata (setup.py) ... \u001B[?25ldone\r\n",
|
23 |
-
"\u001B[?25hRequirement already satisfied: six>=1.10 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (1.16.0)\r\n",
|
24 |
-
"Requirement already satisfied: certifi in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2023.7.22)\r\n",
|
25 |
-
"Requirement already satisfied: python-dateutil in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.8.2)\r\n",
|
26 |
-
"Requirement already satisfied: requests in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.31.0)\r\n",
|
27 |
-
"Collecting tqdm (from kaggle)\r\n",
|
28 |
-
" Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\r\n",
|
29 |
-
"\u001B[2K \u001B[90mβββββββββββββββββββββββββββββββββββββββ\u001B[0m \u001B[32m57.6/57.6 kB\u001B[0m \u001B[31m332.3 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m \u001B[36m0:00:01\u001B[0m\r\n",
|
30 |
-
"\u001B[?25hCollecting python-slugify (from kaggle)\r\n",
|
31 |
-
" Downloading python_slugify-8.0.1-py2.py3-none-any.whl (9.7 kB)\r\n",
|
32 |
-
"Requirement already satisfied: urllib3 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.0.7)\r\n",
|
33 |
-
"Requirement already satisfied: bleach in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (6.1.0)\r\n",
|
34 |
-
"Requirement already satisfied: webencodings in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from bleach->kaggle) (0.5.1)\r\n",
|
35 |
-
"Collecting text-unidecode>=1.3 (from python-slugify->kaggle)\r\n",
|
36 |
-
" Downloading text_unidecode-1.3-py2.py3-none-any.whl (78 kB)\r\n",
|
37 |
-
"\u001B[2K \u001B[90mβββββββββββββββββββββββββββββββββββββββ\u001B[0m \u001B[32m78.2/78.2 kB\u001B[0m \u001B[31m944.9 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0ma \u001B[36m0:00:01\u001B[0m\r\n",
|
38 |
-
"\u001B[?25hRequirement already satisfied: charset-normalizer<4,>=2 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from requests->kaggle) (3.3.1)\r\n",
|
39 |
-
"Requirement already satisfied: idna<4,>=2.5 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from requests->kaggle) (3.4)\r\n",
|
40 |
-
"Downloading tqdm-4.66.1-py3-none-any.whl (78 kB)\r\n",
|
41 |
-
"\u001B[2K \u001B[90mββββββββββββββββββββββββββββββββββββββββ\u001B[0m \u001B[32m78.3/78.3 kB\u001B[0m \u001B[31m748.5 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m \u001B[36m0:00:01\u001B[0m\r\n",
|
42 |
-
"\u001B[?25hBuilding wheels for collected packages: kaggle\r\n",
|
43 |
-
" Building wheel for kaggle (setup.py) ... \u001B[?25ldone\r\n",
|
44 |
-
"\u001B[?25h Created wheel for kaggle: filename=kaggle-1.5.16-py3-none-any.whl size=110683 sha256=fbd426d35c51820adb030ee423c20d997221fd1cdb7093a4b6af3afa94d170f3\r\n",
|
45 |
-
" Stored in directory: /home/kryo/.cache/pip/wheels/43/4b/fb/736478af5e8004810081a06259f9aa2f7c3329fc5d03c2c412\r\n",
|
46 |
-
"Successfully built kaggle\r\n",
|
47 |
-
"Installing collected packages: text-unidecode, tqdm, python-slugify, kaggle\r\n",
|
48 |
-
"Successfully installed kaggle-1.5.16 python-slugify-8.0.1 text-unidecode-1.3 tqdm-4.66.1\r\n"
|
49 |
-
]
|
50 |
-
}
|
51 |
-
],
|
52 |
-
"source": [
|
53 |
-
"!pip install kaggle"
|
54 |
-
]
|
55 |
-
},
|
56 |
-
{
|
57 |
-
"cell_type": "code",
|
58 |
-
"execution_count": null,
|
59 |
-
"outputs": [],
|
60 |
-
"source": [],
|
61 |
-
"metadata": {
|
62 |
-
"collapsed": false
|
63 |
-
},
|
64 |
-
"id": "55f5e4a1d2ec2ecc"
|
65 |
-
},
|
66 |
-
{
|
67 |
-
"cell_type": "code",
|
68 |
-
"execution_count": 4,
|
69 |
-
"outputs": [
|
70 |
-
{
|
71 |
-
"name": "stdout",
|
72 |
-
"output_type": "stream",
|
73 |
-
"text": [
|
74 |
-
"\u001B[31mERROR: Could not find a version that satisfies the requirement kaggle_api_extended (from versions: none)\u001B[0m\u001B[31m\r\n",
|
75 |
-
"\u001B[0m\u001B[31mERROR: No matching distribution found for kaggle_api_extended\u001B[0m\u001B[31m\r\n",
|
76 |
-
"\u001B[0m"
|
77 |
-
]
|
78 |
-
}
|
79 |
-
],
|
80 |
-
"source": [],
|
81 |
-
"metadata": {
|
82 |
-
"collapsed": false,
|
83 |
-
"ExecuteTime": {
|
84 |
-
"end_time": "2023-11-05T00:51:45.918622100Z",
|
85 |
-
"start_time": "2023-11-05T00:51:45.022051100Z"
|
86 |
-
}
|
87 |
-
},
|
88 |
-
"id": "28cea8fd0c172fff"
|
89 |
-
},
|
90 |
-
{
|
91 |
-
"cell_type": "markdown",
|
92 |
-
"source": [
|
93 |
-
"## merge files"
|
94 |
-
],
|
95 |
-
"metadata": {
|
96 |
-
"collapsed": false
|
97 |
-
},
|
98 |
-
"id": "fd44ed9dee4275cf"
|
99 |
-
},
|
100 |
-
{
|
101 |
-
"cell_type": "code",
|
102 |
-
"execution_count": 6,
|
103 |
-
"outputs": [
|
104 |
-
{
|
105 |
-
"name": "stdout",
|
106 |
-
"output_type": "stream",
|
107 |
-
"text": [
|
108 |
-
"Files merged successfully!\n"
|
109 |
-
]
|
110 |
-
}
|
111 |
-
],
|
112 |
-
"source": [
|
113 |
-
"import shutil\n",
|
114 |
-
"from pathlib import Path\n",
|
115 |
-
"try:\n",
|
116 |
-
" with open(Path('merged_file.py'), 'wb') as destination_file:\n",
|
117 |
-
" with open(Path('google_sheet.py'), 'rb') as file1:\n",
|
118 |
-
" shutil.copyfileobj(file1, destination_file)\n",
|
119 |
-
" with open(Path('logger.py'), 'rb') as file2:\n",
|
120 |
-
" shutil.copyfileobj(file2, destination_file)\n",
|
121 |
-
" with open(Path('kaggle_service.py'), 'rb') as file3:\n",
|
122 |
-
" shutil.copyfileobj(file3, destination_file)\n",
|
123 |
-
" with open(Path('main.py'), 'rb') as file4:\n",
|
124 |
-
" shutil.copyfileobj(file4, destination_file)\n",
|
125 |
-
" print(\"Files merged successfully!\")\n",
|
126 |
-
"except :\n",
|
127 |
-
" raise Exception(\"Cannot merge files!\")"
|
128 |
-
],
|
129 |
-
"metadata": {
|
130 |
-
"collapsed": false,
|
131 |
-
"ExecuteTime": {
|
132 |
-
"end_time": "2023-11-05T02:15:34.843461500Z",
|
133 |
-
"start_time": "2023-11-05T02:15:34.781001200Z"
|
134 |
-
}
|
135 |
-
},
|
136 |
-
"id": "2568f49413e2057c"
|
137 |
-
}
|
138 |
-
],
|
139 |
-
"metadata": {
|
140 |
-
"kernelspec": {
|
141 |
-
"display_name": "Python 3",
|
142 |
-
"language": "python",
|
143 |
-
"name": "python3"
|
144 |
-
},
|
145 |
-
"language_info": {
|
146 |
-
"codemirror_mode": {
|
147 |
-
"name": "ipython",
|
148 |
-
"version": 2
|
149 |
-
},
|
150 |
-
"file_extension": ".py",
|
151 |
-
"mimetype": "text/x-python",
|
152 |
-
"name": "python",
|
153 |
-
"nbconvert_exporter": "python",
|
154 |
-
"pygments_lexer": "ipython2",
|
155 |
-
"version": "2.7.6"
|
156 |
-
}
|
157 |
-
},
|
158 |
-
"nbformat": 4,
|
159 |
-
"nbformat_minor": 5
|
160 |
-
}
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "initial_id",
|
7 |
+
"metadata": {
|
8 |
+
"collapsed": true,
|
9 |
+
"ExecuteTime": {
|
10 |
+
"end_time": "2023-11-05T00:47:48.131663100Z",
|
11 |
+
"start_time": "2023-11-05T00:47:35.527891600Z"
|
12 |
+
}
|
13 |
+
},
|
14 |
+
"outputs": [
|
15 |
+
{
|
16 |
+
"name": "stdout",
|
17 |
+
"output_type": "stream",
|
18 |
+
"text": [
|
19 |
+
"Collecting kaggle\r\n",
|
20 |
+
" Downloading kaggle-1.5.16.tar.gz (83 kB)\r\n",
|
21 |
+
"\u001B[2K \u001B[90mβββββββββββββββββββββββββββββββββββββββ\u001B[0m \u001B[32m83.6/83.6 kB\u001B[0m \u001B[31m444.7 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m0:01\u001B[0m00:01\u001B[0m\r\n",
|
22 |
+
"\u001B[?25h Preparing metadata (setup.py) ... \u001B[?25ldone\r\n",
|
23 |
+
"\u001B[?25hRequirement already satisfied: six>=1.10 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (1.16.0)\r\n",
|
24 |
+
"Requirement already satisfied: certifi in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2023.7.22)\r\n",
|
25 |
+
"Requirement already satisfied: python-dateutil in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.8.2)\r\n",
|
26 |
+
"Requirement already satisfied: requests in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.31.0)\r\n",
|
27 |
+
"Collecting tqdm (from kaggle)\r\n",
|
28 |
+
" Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\r\n",
|
29 |
+
"\u001B[2K \u001B[90mβββββββββββββββββββββββββββββββββββββββ\u001B[0m \u001B[32m57.6/57.6 kB\u001B[0m \u001B[31m332.3 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m \u001B[36m0:00:01\u001B[0m\r\n",
|
30 |
+
"\u001B[?25hCollecting python-slugify (from kaggle)\r\n",
|
31 |
+
" Downloading python_slugify-8.0.1-py2.py3-none-any.whl (9.7 kB)\r\n",
|
32 |
+
"Requirement already satisfied: urllib3 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (2.0.7)\r\n",
|
33 |
+
"Requirement already satisfied: bleach in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from kaggle) (6.1.0)\r\n",
|
34 |
+
"Requirement already satisfied: webencodings in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from bleach->kaggle) (0.5.1)\r\n",
|
35 |
+
"Collecting text-unidecode>=1.3 (from python-slugify->kaggle)\r\n",
|
36 |
+
" Downloading text_unidecode-1.3-py2.py3-none-any.whl (78 kB)\r\n",
|
37 |
+
"\u001B[2K \u001B[90mβββββββββββββββββββββββββββββββββββββββ\u001B[0m \u001B[32m78.2/78.2 kB\u001B[0m \u001B[31m944.9 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0ma \u001B[36m0:00:01\u001B[0m\r\n",
|
38 |
+
"\u001B[?25hRequirement already satisfied: charset-normalizer<4,>=2 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from requests->kaggle) (3.3.1)\r\n",
|
39 |
+
"Requirement already satisfied: idna<4,>=2.5 in /home/kryo/miniconda3/envs/vlsp2023-tts-api/lib/python3.10/site-packages (from requests->kaggle) (3.4)\r\n",
|
40 |
+
"Downloading tqdm-4.66.1-py3-none-any.whl (78 kB)\r\n",
|
41 |
+
"\u001B[2K \u001B[90mββββββββββββββββββββββββββββββββββββββββ\u001B[0m \u001B[32m78.3/78.3 kB\u001B[0m \u001B[31m748.5 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m \u001B[36m0:00:01\u001B[0m\r\n",
|
42 |
+
"\u001B[?25hBuilding wheels for collected packages: kaggle\r\n",
|
43 |
+
" Building wheel for kaggle (setup.py) ... \u001B[?25ldone\r\n",
|
44 |
+
"\u001B[?25h Created wheel for kaggle: filename=kaggle-1.5.16-py3-none-any.whl size=110683 sha256=fbd426d35c51820adb030ee423c20d997221fd1cdb7093a4b6af3afa94d170f3\r\n",
|
45 |
+
" Stored in directory: /home/kryo/.cache/pip/wheels/43/4b/fb/736478af5e8004810081a06259f9aa2f7c3329fc5d03c2c412\r\n",
|
46 |
+
"Successfully built kaggle\r\n",
|
47 |
+
"Installing collected packages: text-unidecode, tqdm, python-slugify, kaggle\r\n",
|
48 |
+
"Successfully installed kaggle-1.5.16 python-slugify-8.0.1 text-unidecode-1.3 tqdm-4.66.1\r\n"
|
49 |
+
]
|
50 |
+
}
|
51 |
+
],
|
52 |
+
"source": [
|
53 |
+
"!pip install kaggle"
|
54 |
+
]
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"cell_type": "code",
|
58 |
+
"execution_count": null,
|
59 |
+
"outputs": [],
|
60 |
+
"source": [],
|
61 |
+
"metadata": {
|
62 |
+
"collapsed": false
|
63 |
+
},
|
64 |
+
"id": "55f5e4a1d2ec2ecc"
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"cell_type": "code",
|
68 |
+
"execution_count": 4,
|
69 |
+
"outputs": [
|
70 |
+
{
|
71 |
+
"name": "stdout",
|
72 |
+
"output_type": "stream",
|
73 |
+
"text": [
|
74 |
+
"\u001B[31mERROR: Could not find a version that satisfies the requirement kaggle_api_extended (from versions: none)\u001B[0m\u001B[31m\r\n",
|
75 |
+
"\u001B[0m\u001B[31mERROR: No matching distribution found for kaggle_api_extended\u001B[0m\u001B[31m\r\n",
|
76 |
+
"\u001B[0m"
|
77 |
+
]
|
78 |
+
}
|
79 |
+
],
|
80 |
+
"source": [],
|
81 |
+
"metadata": {
|
82 |
+
"collapsed": false,
|
83 |
+
"ExecuteTime": {
|
84 |
+
"end_time": "2023-11-05T00:51:45.918622100Z",
|
85 |
+
"start_time": "2023-11-05T00:51:45.022051100Z"
|
86 |
+
}
|
87 |
+
},
|
88 |
+
"id": "28cea8fd0c172fff"
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"cell_type": "markdown",
|
92 |
+
"source": [
|
93 |
+
"## merge files"
|
94 |
+
],
|
95 |
+
"metadata": {
|
96 |
+
"collapsed": false
|
97 |
+
},
|
98 |
+
"id": "fd44ed9dee4275cf"
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"cell_type": "code",
|
102 |
+
"execution_count": 6,
|
103 |
+
"outputs": [
|
104 |
+
{
|
105 |
+
"name": "stdout",
|
106 |
+
"output_type": "stream",
|
107 |
+
"text": [
|
108 |
+
"Files merged successfully!\n"
|
109 |
+
]
|
110 |
+
}
|
111 |
+
],
|
112 |
+
"source": [
|
113 |
+
"import shutil\n",
|
114 |
+
"from pathlib import Path\n",
|
115 |
+
"try:\n",
|
116 |
+
" with open(Path('merged_file.py'), 'wb') as destination_file:\n",
|
117 |
+
" with open(Path('google_sheet.py'), 'rb') as file1:\n",
|
118 |
+
" shutil.copyfileobj(file1, destination_file)\n",
|
119 |
+
" with open(Path('logger.py'), 'rb') as file2:\n",
|
120 |
+
" shutil.copyfileobj(file2, destination_file)\n",
|
121 |
+
" with open(Path('kaggle_service.py'), 'rb') as file3:\n",
|
122 |
+
" shutil.copyfileobj(file3, destination_file)\n",
|
123 |
+
" with open(Path('main.py'), 'rb') as file4:\n",
|
124 |
+
" shutil.copyfileobj(file4, destination_file)\n",
|
125 |
+
" print(\"Files merged successfully!\")\n",
|
126 |
+
"except :\n",
|
127 |
+
" raise Exception(\"Cannot merge files!\")"
|
128 |
+
],
|
129 |
+
"metadata": {
|
130 |
+
"collapsed": false,
|
131 |
+
"ExecuteTime": {
|
132 |
+
"end_time": "2023-11-05T02:15:34.843461500Z",
|
133 |
+
"start_time": "2023-11-05T02:15:34.781001200Z"
|
134 |
+
}
|
135 |
+
},
|
136 |
+
"id": "2568f49413e2057c"
|
137 |
+
}
|
138 |
+
],
|
139 |
+
"metadata": {
|
140 |
+
"kernelspec": {
|
141 |
+
"display_name": "Python 3",
|
142 |
+
"language": "python",
|
143 |
+
"name": "python3"
|
144 |
+
},
|
145 |
+
"language_info": {
|
146 |
+
"codemirror_mode": {
|
147 |
+
"name": "ipython",
|
148 |
+
"version": 2
|
149 |
+
},
|
150 |
+
"file_extension": ".py",
|
151 |
+
"mimetype": "text/x-python",
|
152 |
+
"name": "python",
|
153 |
+
"nbconvert_exporter": "python",
|
154 |
+
"pygments_lexer": "ipython2",
|
155 |
+
"version": "2.7.6"
|
156 |
+
}
|
157 |
+
},
|
158 |
+
"nbformat": 4,
|
159 |
+
"nbformat_minor": 5
|
160 |
+
}
|
version.py
CHANGED
@@ -1 +1 @@
|
|
1 |
-
VERSION="v1.0.1"
|
|
|
1 |
+
VERSION="v1.0.1"
|