RandomForestBTCUSDTModel / bluewenne8.py
solanaexpert's picture
Upload 10 files
4ef32b8 verified
raw
history blame
No virus
10.1 kB
import ccxt
import sys
import os
import argparse
import time
from datetime import datetime, timezone
import pytz
import threading
import csv
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import joblib
def fetch_markets(exchange):
try:
markets = exchange.fetch_markets()
symbols = [market['symbol'] for market in markets if market['spot'] and market['symbol'].endswith('/USDT')]
return symbols
except (ccxt.ExchangeError, ccxt.NetworkError, ccxt.DDoSProtection) as e:
print(f"Exchange error: {str(e)}")
os.kill(os.getpid(), 9)
sys.exit(-999)
def fetch_ohlcv(exchange, symbol, timeframe, limit=1000):
try:
all_candles = []
since = exchange.parse8601('2020-01-01T00:00:00Z') # Start date can be adjusted
while True:
candles = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=limit)
if not candles:
break
all_candles.extend(candles)
since = candles[-1][0] + 1
if len(candles) < limit:
break
return all_candles
except Exception as e:
print(f"Error fetching OHLCV data for {symbol}: {str(e)}")
return []
def fetch_current_price(exchange, symbol):
try:
ticker = exchange.fetch_ticker(symbol)
return ticker['ask'] if 'ask' in ticker else None
except Exception as e:
print(f"Error fetching current price for {symbol}: {str(e)}")
return None
def format_candle_time(timestamp):
return datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
def save_history_to_file(symbol, timeframe, ohlcv):
directory = "downloaded_history"
if not os.path.exists(directory):
os.makedirs(directory)
if not ohlcv:
print(f"No OHLCV data to save for {symbol}")
return
start_date = format_candle_time(ohlcv[0][0]).split()[0]
end_date = format_candle_time(ohlcv[-1][0]).split()[0]
filename = (f"{directory}/{symbol.replace('/', '_')}_{timeframe}_{start_date}_{end_date}.csv").replace(" ", "_").replace(":", "-")
with open(filename, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
for candle in ohlcv:
timestamp, open_price, high_price, low_price, close_price, volume = candle
writer.writerow([format_candle_time(timestamp), open_price, high_price, low_price, close_price, volume])
print(f"Saved history to {filename}")
def load_data_from_files(directory, symbol_filter=None):
all_data = []
filenames = []
for filename in os.listdir(directory):
if filename.endswith(".csv"):
if symbol_filter and not filename.startswith(symbol_filter):
continue # Skip files that do not start with the symbol filter
filepath = os.path.join(directory, filename)
df = pd.read_csv(filepath)
df['symbol'] = filename.split('_')[0] # Adjust this if your filenames have different formats
timeframe = filename.split('_')[1] # Extract timeframe from filename
df['Timeframe'] = timeframe
all_data.append(df)
filenames.append(filepath)
return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame(), filenames
def preprocess_data(df):
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df.set_index('Timestamp', inplace=True)
df.sort_index(inplace=True)
df['Target'] = df['Close'].shift(-1)
df.dropna(inplace=True)
X = df[['Open', 'High', 'Low', 'Close', 'Volume']]
y = df['Target']
return X, y
def train_model(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
rmse = mse**0.5
print(f"Model trained. RMSE: {rmse:.4f}")
return model, scaler
def predict_next_candle(model, scaler, data):
data_scaled = scaler.transform(data[['Open', 'High', 'Low', 'Close', 'Volume']])
prediction = model.predict(data_scaled)
return prediction
def analyze_symbol(exchange, symbol, timeframe, output_file):
try:
ohlcv = fetch_ohlcv(exchange, symbol, timeframe)
if not ohlcv:
return
save_history_to_file(symbol, timeframe, ohlcv)
max_candle = max(ohlcv, key=lambda x: x[2] - x[3])
open_price = max_candle[1]
close_price = max_candle[4]
highest_price = max_candle[2]
lowest_price = max_candle[3]
timestamp = max_candle[0]
candle_date_time = format_candle_time(timestamp)
greatest_candle_info = (
f"Symbol: {symbol}, Timeframe: {timeframe}, "
f"Open: {open_price:.4f}, Close: {close_price:.4f}, "
f"High: {highest_price:.4f}, Low: {lowest_price:.4f}, "
f"Range: {highest_price - lowest_price:.4f}, "
f"Greatest Candle DateTime: {candle_date_time}\n"
)
current_price = fetch_current_price(exchange, symbol)
if current_price is None:
return
current_time = datetime.now(pytz.UTC).strftime('%Y-%m-%d %H:%M:%S')
result = (
f"[{current_time}] {symbol} (BINANCE:{symbol.replace('/', '')}) "
f"Timeframe: {timeframe}, Current price: {current_price:.4f}\n"
f"{greatest_candle_info}\n"
)
print(result.strip())
with open(output_file, 'a') as f:
f.write(result)
except Exception as e:
print(f"Error analyzing symbol {symbol}: {str(e)}")
def write_predictions_to_file(output_file, predictions):
with open(output_file, 'a') as f:
for symbol, last_timestamp, timeframe, prediction in predictions:
# Convert integer timestamp to datetime if needed
if isinstance(last_timestamp, int):
last_timestamp = datetime.fromtimestamp(last_timestamp / 1000, tz=timezone.utc)
last_timestamp_str = last_timestamp.strftime('%Y-%m-%d %H:%M:%S')
f.write(f"Prediction for {symbol} ({timeframe}):\n")
f.write(f"- Last Timestamp: {last_timestamp_str}\n")
f.write(f"- Predicted Value: {prediction[-1]:.4f}\n") # Show the last prediction value
f.write("\n")
def worker(exchange, symbols, timeframe, output_file):
for symbol in symbols:
if symbol in fetch_markets(exchange):
analyze_symbol(exchange, symbol, timeframe, output_file)
else:
print(f"Skipping invalid symbol {symbol}")
def main():
parser = argparse.ArgumentParser(description='Show details of the greatest historical candle, save historical data, and train a model.')
parser.add_argument('--timeframe', type=str, required=True, help='Timeframe for the candlestick analysis')
parser.add_argument('--train', action='store_true', help='Train model using existing historical data')
parser.add_argument('--use-existing', action='store_true', help='Use existing historical data files')
args = parser.parse_args()
timeframe = args.timeframe
train_model_flag = args.train
use_existing_flag = args.use_existing
script_name = os.path.basename(__file__).split('.')[0]
result_directory = f"scan_results_{script_name}"
if not os.path.exists(result_directory):
os.makedirs(result_directory)
output_file = os.path.join(result_directory, f"{datetime.now(pytz.UTC).strftime('%Y%m%d_%H%M%S')}_{timeframe}_greatest_candles.txt")
if train_model_flag:
historical_data_dir = "downloaded_history"
symbol_filter = 'BTC_USDT' # Filter files that start with BTC_USDT
df, filenames = load_data_from_files(historical_data_dir, symbol_filter)
if df.empty:
print("No historical data found for training.")
return
print("Training model using the following BTC-related files:")
for filename in filenames:
print(f"- {filename}")
X, y = preprocess_data(df)
if X.empty or y.empty:
print("No valid data available for training.")
return
model, scaler = train_model(X, y)
joblib.dump(model, 'model.pkl')
joblib.dump(scaler, 'scaler.pkl')
elif use_existing_flag:
model = joblib.load('model.pkl')
scaler = joblib.load('scaler.pkl')
historical_data_dir = "downloaded_history"
symbol_filter = 'BTC_USDT' # Filter files that start with BTC_USDT
df, filenames = load_data_from_files(historical_data_dir, symbol_filter)
if df.empty:
print("No historical data found for prediction.")
return
print("Using the following BTC-related files for prediction:")
for filename in filenames:
print(f"- {filename}")
predictions = []
for symbol in df['symbol'].unique():
symbol_data = df[df['symbol'] == symbol]
timeframe = symbol_data['Timeframe'].iloc[0] # Get timeframe for prediction
last_timestamp = symbol_data.index[-1] # Get the last timestamp
next_candle_prediction = predict_next_candle(model, scaler, symbol_data)
predictions.append((symbol, last_timestamp, timeframe, next_candle_prediction))
write_predictions_to_file(output_file, predictions)
else:
exchange = ccxt.binance()
symbols = fetch_markets(exchange)
worker(exchange, symbols, timeframe, output_file)
if __name__ == "__main__":
main()