File size: 10,110 Bytes

4ef32b8

import ccxt
import sys
import os
import argparse
import time
from datetime import datetime, timezone
import pytz
import threading
import csv
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import joblib

def fetch_markets(exchange):
    try:
        markets = exchange.fetch_markets()
        symbols = [market['symbol'] for market in markets if market['spot'] and market['symbol'].endswith('/USDT')]
        return symbols
    except (ccxt.ExchangeError, ccxt.NetworkError, ccxt.DDoSProtection) as e:
        print(f"Exchange error: {str(e)}")
        os.kill(os.getpid(), 9)
        sys.exit(-999)

def fetch_ohlcv(exchange, symbol, timeframe, limit=1000):
    try:
        all_candles = []
        since = exchange.parse8601('2020-01-01T00:00:00Z')  # Start date can be adjusted
        while True:
            candles = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=limit)
            if not candles:
                break
            all_candles.extend(candles)
            since = candles[-1][0] + 1
            if len(candles) < limit:
                break
        return all_candles
    except Exception as e:
        print(f"Error fetching OHLCV data for {symbol}: {str(e)}")
        return []

def fetch_current_price(exchange, symbol):
    try:
        ticker = exchange.fetch_ticker(symbol)
        return ticker['ask'] if 'ask' in ticker else None
    except Exception as e:
        print(f"Error fetching current price for {symbol}: {str(e)}")
        return None

def format_candle_time(timestamp):
    return datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')

def save_history_to_file(symbol, timeframe, ohlcv):
    directory = "downloaded_history"
    if not os.path.exists(directory):
        os.makedirs(directory)

    if not ohlcv:
        print(f"No OHLCV data to save for {symbol}")
        return

    start_date = format_candle_time(ohlcv[0][0]).split()[0]
    end_date = format_candle_time(ohlcv[-1][0]).split()[0]
    filename = (f"{directory}/{symbol.replace('/', '_')}_{timeframe}_{start_date}_{end_date}.csv").replace(" ", "_").replace(":", "-")

    with open(filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
        for candle in ohlcv:
            timestamp, open_price, high_price, low_price, close_price, volume = candle
            writer.writerow([format_candle_time(timestamp), open_price, high_price, low_price, close_price, volume])

    print(f"Saved history to {filename}")

def load_data_from_files(directory, symbol_filter=None):
    all_data = []
    filenames = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            if symbol_filter and not filename.startswith(symbol_filter):
                continue  # Skip files that do not start with the symbol filter
            filepath = os.path.join(directory, filename)
            df = pd.read_csv(filepath)
            df['symbol'] = filename.split('_')[0]  # Adjust this if your filenames have different formats
            timeframe = filename.split('_')[1]  # Extract timeframe from filename
            df['Timeframe'] = timeframe
            all_data.append(df)
            filenames.append(filepath)
    return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame(), filenames

def preprocess_data(df):
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df.set_index('Timestamp', inplace=True)
    df.sort_index(inplace=True)
    df['Target'] = df['Close'].shift(-1)
    df.dropna(inplace=True)
    
    X = df[['Open', 'High', 'Low', 'Close', 'Volume']]
    y = df['Target']
    
    return X, y

def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5
    print(f"Model trained. RMSE: {rmse:.4f}")
    
    return model, scaler

def predict_next_candle(model, scaler, data):
    data_scaled = scaler.transform(data[['Open', 'High', 'Low', 'Close', 'Volume']])
    prediction = model.predict(data_scaled)
    return prediction

def analyze_symbol(exchange, symbol, timeframe, output_file):
    try:
        ohlcv = fetch_ohlcv(exchange, symbol, timeframe)
        if not ohlcv:
            return

        save_history_to_file(symbol, timeframe, ohlcv)

        max_candle = max(ohlcv, key=lambda x: x[2] - x[3])
        open_price = max_candle[1]
        close_price = max_candle[4]
        highest_price = max_candle[2]
        lowest_price = max_candle[3]
        timestamp = max_candle[0]
        candle_date_time = format_candle_time(timestamp)

        greatest_candle_info = (
            f"Symbol: {symbol}, Timeframe: {timeframe}, "
            f"Open: {open_price:.4f}, Close: {close_price:.4f}, "
            f"High: {highest_price:.4f}, Low: {lowest_price:.4f}, "
            f"Range: {highest_price - lowest_price:.4f}, "
            f"Greatest Candle DateTime: {candle_date_time}\n"
        )
        
        current_price = fetch_current_price(exchange, symbol)
        if current_price is None:
            return

        current_time = datetime.now(pytz.UTC).strftime('%Y-%m-%d %H:%M:%S')
        result = (
            f"[{current_time}] {symbol} (BINANCE:{symbol.replace('/', '')}) "
            f"Timeframe: {timeframe}, Current price: {current_price:.4f}\n"
            f"{greatest_candle_info}\n"
        )
        print(result.strip())

        with open(output_file, 'a') as f:
            f.write(result)

    except Exception as e:
        print(f"Error analyzing symbol {symbol}: {str(e)}")

def write_predictions_to_file(output_file, predictions):
    with open(output_file, 'a') as f:
        for symbol, last_timestamp, timeframe, prediction in predictions:
            # Convert integer timestamp to datetime if needed
            if isinstance(last_timestamp, int):
                last_timestamp = datetime.fromtimestamp(last_timestamp / 1000, tz=timezone.utc)
            
            last_timestamp_str = last_timestamp.strftime('%Y-%m-%d %H:%M:%S')
            f.write(f"Prediction for {symbol} ({timeframe}):\n")
            f.write(f"- Last Timestamp: {last_timestamp_str}\n")
            f.write(f"- Predicted Value: {prediction[-1]:.4f}\n")  # Show the last prediction value
            f.write("\n")

def worker(exchange, symbols, timeframe, output_file):
    for symbol in symbols:
        if symbol in fetch_markets(exchange):
            analyze_symbol(exchange, symbol, timeframe, output_file)
        else:
            print(f"Skipping invalid symbol {symbol}")

def main():
    parser = argparse.ArgumentParser(description='Show details of the greatest historical candle, save historical data, and train a model.')
    parser.add_argument('--timeframe', type=str, required=True, help='Timeframe for the candlestick analysis')
    parser.add_argument('--train', action='store_true', help='Train model using existing historical data')
    parser.add_argument('--use-existing', action='store_true', help='Use existing historical data files')
    args = parser.parse_args()

    timeframe = args.timeframe
    train_model_flag = args.train
    use_existing_flag = args.use_existing
    script_name = os.path.basename(__file__).split('.')[0]
    result_directory = f"scan_results_{script_name}"

    if not os.path.exists(result_directory):
        os.makedirs(result_directory)

    output_file = os.path.join(result_directory, f"{datetime.now(pytz.UTC).strftime('%Y%m%d_%H%M%S')}_{timeframe}_greatest_candles.txt")

    if train_model_flag:
        historical_data_dir = "downloaded_history"
        symbol_filter = 'BTC_USDT'  # Filter files that start with BTC_USDT
        df, filenames = load_data_from_files(historical_data_dir, symbol_filter)
        if df.empty:
            print("No historical data found for training.")
            return

        print("Training model using the following BTC-related files:")
        for filename in filenames:
            print(f"- {filename}")

        X, y = preprocess_data(df)
        if X.empty or y.empty:
            print("No valid data available for training.")
            return

        model, scaler = train_model(X, y)

        joblib.dump(model, 'model.pkl')
        joblib.dump(scaler, 'scaler.pkl')
        
    elif use_existing_flag:
        model = joblib.load('model.pkl')
        scaler = joblib.load('scaler.pkl')
        
        historical_data_dir = "downloaded_history"
        symbol_filter = 'BTC_USDT'  # Filter files that start with BTC_USDT
        df, filenames = load_data_from_files(historical_data_dir, symbol_filter)
        if df.empty:
            print("No historical data found for prediction.")
            return

        print("Using the following BTC-related files for prediction:")
        for filename in filenames:
            print(f"- {filename}")

        predictions = []
        for symbol in df['symbol'].unique():
            symbol_data = df[df['symbol'] == symbol]
            timeframe = symbol_data['Timeframe'].iloc[0]  # Get timeframe for prediction
            last_timestamp = symbol_data.index[-1]  # Get the last timestamp
            next_candle_prediction = predict_next_candle(model, scaler, symbol_data)
            predictions.append((symbol, last_timestamp, timeframe, next_candle_prediction))

        write_predictions_to_file(output_file, predictions)

    else:
        exchange = ccxt.binance()
        symbols = fetch_markets(exchange)
        worker(exchange, symbols, timeframe, output_file)

if __name__ == "__main__":
    main()