File size: 10,110 Bytes
4ef32b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import ccxt
import sys
import os
import argparse
import time
from datetime import datetime, timezone
import pytz
import threading
import csv
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import joblib

def fetch_markets(exchange):
    try:
        markets = exchange.fetch_markets()
        symbols = [market['symbol'] for market in markets if market['spot'] and market['symbol'].endswith('/USDT')]
        return symbols
    except (ccxt.ExchangeError, ccxt.NetworkError, ccxt.DDoSProtection) as e:
        print(f"Exchange error: {str(e)}")
        os.kill(os.getpid(), 9)
        sys.exit(-999)

def fetch_ohlcv(exchange, symbol, timeframe, limit=1000):
    try:
        all_candles = []
        since = exchange.parse8601('2020-01-01T00:00:00Z')  # Start date can be adjusted
        while True:
            candles = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=limit)
            if not candles:
                break
            all_candles.extend(candles)
            since = candles[-1][0] + 1
            if len(candles) < limit:
                break
        return all_candles
    except Exception as e:
        print(f"Error fetching OHLCV data for {symbol}: {str(e)}")
        return []

def fetch_current_price(exchange, symbol):
    try:
        ticker = exchange.fetch_ticker(symbol)
        return ticker['ask'] if 'ask' in ticker else None
    except Exception as e:
        print(f"Error fetching current price for {symbol}: {str(e)}")
        return None

def format_candle_time(timestamp):
    return datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')

def save_history_to_file(symbol, timeframe, ohlcv):
    directory = "downloaded_history"
    if not os.path.exists(directory):
        os.makedirs(directory)

    if not ohlcv:
        print(f"No OHLCV data to save for {symbol}")
        return

    start_date = format_candle_time(ohlcv[0][0]).split()[0]
    end_date = format_candle_time(ohlcv[-1][0]).split()[0]
    filename = (f"{directory}/{symbol.replace('/', '_')}_{timeframe}_{start_date}_{end_date}.csv").replace(" ", "_").replace(":", "-")

    with open(filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
        for candle in ohlcv:
            timestamp, open_price, high_price, low_price, close_price, volume = candle
            writer.writerow([format_candle_time(timestamp), open_price, high_price, low_price, close_price, volume])

    print(f"Saved history to {filename}")

def load_data_from_files(directory, symbol_filter=None):
    all_data = []
    filenames = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            if symbol_filter and not filename.startswith(symbol_filter):
                continue  # Skip files that do not start with the symbol filter
            filepath = os.path.join(directory, filename)
            df = pd.read_csv(filepath)
            df['symbol'] = filename.split('_')[0]  # Adjust this if your filenames have different formats
            timeframe = filename.split('_')[1]  # Extract timeframe from filename
            df['Timeframe'] = timeframe
            all_data.append(df)
            filenames.append(filepath)
    return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame(), filenames

def preprocess_data(df):
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df.set_index('Timestamp', inplace=True)
    df.sort_index(inplace=True)
    df['Target'] = df['Close'].shift(-1)
    df.dropna(inplace=True)
    
    X = df[['Open', 'High', 'Low', 'Close', 'Volume']]
    y = df['Target']
    
    return X, y

def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5
    print(f"Model trained. RMSE: {rmse:.4f}")
    
    return model, scaler

def predict_next_candle(model, scaler, data):
    data_scaled = scaler.transform(data[['Open', 'High', 'Low', 'Close', 'Volume']])
    prediction = model.predict(data_scaled)
    return prediction

def analyze_symbol(exchange, symbol, timeframe, output_file):
    try:
        ohlcv = fetch_ohlcv(exchange, symbol, timeframe)
        if not ohlcv:
            return

        save_history_to_file(symbol, timeframe, ohlcv)

        max_candle = max(ohlcv, key=lambda x: x[2] - x[3])
        open_price = max_candle[1]
        close_price = max_candle[4]
        highest_price = max_candle[2]
        lowest_price = max_candle[3]
        timestamp = max_candle[0]
        candle_date_time = format_candle_time(timestamp)

        greatest_candle_info = (
            f"Symbol: {symbol}, Timeframe: {timeframe}, "
            f"Open: {open_price:.4f}, Close: {close_price:.4f}, "
            f"High: {highest_price:.4f}, Low: {lowest_price:.4f}, "
            f"Range: {highest_price - lowest_price:.4f}, "
            f"Greatest Candle DateTime: {candle_date_time}\n"
        )
        
        current_price = fetch_current_price(exchange, symbol)
        if current_price is None:
            return

        current_time = datetime.now(pytz.UTC).strftime('%Y-%m-%d %H:%M:%S')
        result = (
            f"[{current_time}] {symbol} (BINANCE:{symbol.replace('/', '')}) "
            f"Timeframe: {timeframe}, Current price: {current_price:.4f}\n"
            f"{greatest_candle_info}\n"
        )
        print(result.strip())

        with open(output_file, 'a') as f:
            f.write(result)

    except Exception as e:
        print(f"Error analyzing symbol {symbol}: {str(e)}")

def write_predictions_to_file(output_file, predictions):
    with open(output_file, 'a') as f:
        for symbol, last_timestamp, timeframe, prediction in predictions:
            # Convert integer timestamp to datetime if needed
            if isinstance(last_timestamp, int):
                last_timestamp = datetime.fromtimestamp(last_timestamp / 1000, tz=timezone.utc)
            
            last_timestamp_str = last_timestamp.strftime('%Y-%m-%d %H:%M:%S')
            f.write(f"Prediction for {symbol} ({timeframe}):\n")
            f.write(f"- Last Timestamp: {last_timestamp_str}\n")
            f.write(f"- Predicted Value: {prediction[-1]:.4f}\n")  # Show the last prediction value
            f.write("\n")

def worker(exchange, symbols, timeframe, output_file):
    for symbol in symbols:
        if symbol in fetch_markets(exchange):
            analyze_symbol(exchange, symbol, timeframe, output_file)
        else:
            print(f"Skipping invalid symbol {symbol}")

def main():
    parser = argparse.ArgumentParser(description='Show details of the greatest historical candle, save historical data, and train a model.')
    parser.add_argument('--timeframe', type=str, required=True, help='Timeframe for the candlestick analysis')
    parser.add_argument('--train', action='store_true', help='Train model using existing historical data')
    parser.add_argument('--use-existing', action='store_true', help='Use existing historical data files')
    args = parser.parse_args()

    timeframe = args.timeframe
    train_model_flag = args.train
    use_existing_flag = args.use_existing
    script_name = os.path.basename(__file__).split('.')[0]
    result_directory = f"scan_results_{script_name}"

    if not os.path.exists(result_directory):
        os.makedirs(result_directory)

    output_file = os.path.join(result_directory, f"{datetime.now(pytz.UTC).strftime('%Y%m%d_%H%M%S')}_{timeframe}_greatest_candles.txt")

    if train_model_flag:
        historical_data_dir = "downloaded_history"
        symbol_filter = 'BTC_USDT'  # Filter files that start with BTC_USDT
        df, filenames = load_data_from_files(historical_data_dir, symbol_filter)
        if df.empty:
            print("No historical data found for training.")
            return

        print("Training model using the following BTC-related files:")
        for filename in filenames:
            print(f"- {filename}")

        X, y = preprocess_data(df)
        if X.empty or y.empty:
            print("No valid data available for training.")
            return

        model, scaler = train_model(X, y)

        joblib.dump(model, 'model.pkl')
        joblib.dump(scaler, 'scaler.pkl')
        
    elif use_existing_flag:
        model = joblib.load('model.pkl')
        scaler = joblib.load('scaler.pkl')
        
        historical_data_dir = "downloaded_history"
        symbol_filter = 'BTC_USDT'  # Filter files that start with BTC_USDT
        df, filenames = load_data_from_files(historical_data_dir, symbol_filter)
        if df.empty:
            print("No historical data found for prediction.")
            return

        print("Using the following BTC-related files for prediction:")
        for filename in filenames:
            print(f"- {filename}")

        predictions = []
        for symbol in df['symbol'].unique():
            symbol_data = df[df['symbol'] == symbol]
            timeframe = symbol_data['Timeframe'].iloc[0]  # Get timeframe for prediction
            last_timestamp = symbol_data.index[-1]  # Get the last timestamp
            next_candle_prediction = predict_next_candle(model, scaler, symbol_data)
            predictions.append((symbol, last_timestamp, timeframe, next_candle_prediction))

        write_predictions_to_file(output_file, predictions)

    else:
        exchange = ccxt.binance()
        symbols = fetch_markets(exchange)
        worker(exchange, symbols, timeframe, output_file)

if __name__ == "__main__":
    main()