solanaexpert
/

RandomForestBTCUSDTModel

Model card Files Files and versions Community

RandomForestBTCUSDTModel / bluewenne8.py

solanaexpert

Upload 10 files

4ef32b8 verified 4 months ago

raw

history blame contribute delete

10.1 kB

	import ccxt
	import sys
	import os
	import argparse
	import time
	from datetime import datetime, timezone
	import pytz
	import threading
	import csv
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics import mean_squared_error
	import joblib

	def fetch_markets(exchange):
	try:
	markets = exchange.fetch_markets()
	symbols = [market['symbol'] for market in markets if market['spot'] and market['symbol'].endswith('/USDT')]
	return symbols
	except (ccxt.ExchangeError, ccxt.NetworkError, ccxt.DDoSProtection) as e:
	print(f"Exchange error: {str(e)}")
	os.kill(os.getpid(), 9)
	sys.exit(-999)

	def fetch_ohlcv(exchange, symbol, timeframe, limit=1000):
	try:
	all_candles = []
	since = exchange.parse8601('2020-01-01T00:00:00Z') # Start date can be adjusted
	while True:
	candles = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=limit)
	if not candles:
	break
	all_candles.extend(candles)
	since = candles[-1][0] + 1
	if len(candles) < limit:
	break
	return all_candles
	except Exception as e:
	print(f"Error fetching OHLCV data for {symbol}: {str(e)}")
	return []

	def fetch_current_price(exchange, symbol):
	try:
	ticker = exchange.fetch_ticker(symbol)
	return ticker['ask'] if 'ask' in ticker else None
	except Exception as e:
	print(f"Error fetching current price for {symbol}: {str(e)}")
	return None

	def format_candle_time(timestamp):
	return datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')

	def save_history_to_file(symbol, timeframe, ohlcv):
	directory = "downloaded_history"
	if not os.path.exists(directory):
	os.makedirs(directory)

	if not ohlcv:
	print(f"No OHLCV data to save for {symbol}")
	return

	start_date = format_candle_time(ohlcv[0][0]).split()[0]
	end_date = format_candle_time(ohlcv[-1][0]).split()[0]
	filename = (f"{directory}/{symbol.replace('/', '_')}_{timeframe}_{start_date}_{end_date}.csv").replace(" ", "_").replace(":", "-")

	with open(filename, 'w', newline='') as csvfile:
	writer = csv.writer(csvfile)
	writer.writerow(['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
	for candle in ohlcv:
	timestamp, open_price, high_price, low_price, close_price, volume = candle
	writer.writerow([format_candle_time(timestamp), open_price, high_price, low_price, close_price, volume])

	print(f"Saved history to {filename}")

	def load_data_from_files(directory, symbol_filter=None):
	all_data = []
	filenames = []
	for filename in os.listdir(directory):
	if filename.endswith(".csv"):
	if symbol_filter and not filename.startswith(symbol_filter):
	continue # Skip files that do not start with the symbol filter
	filepath = os.path.join(directory, filename)
	df = pd.read_csv(filepath)
	df['symbol'] = filename.split('_')[0] # Adjust this if your filenames have different formats
	timeframe = filename.split('_')[1] # Extract timeframe from filename
	df['Timeframe'] = timeframe
	all_data.append(df)
	filenames.append(filepath)
	return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame(), filenames

	def preprocess_data(df):
	df['Timestamp'] = pd.to_datetime(df['Timestamp'])
	df.set_index('Timestamp', inplace=True)
	df.sort_index(inplace=True)
	df['Target'] = df['Close'].shift(-1)
	df.dropna(inplace=True)

	X = df[['Open', 'High', 'Low', 'Close', 'Volume']]
	y = df['Target']

	return X, y

	def train_model(X, y):
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	model = RandomForestRegressor(n_estimators=100, random_state=42)
	model.fit(X_train_scaled, y_train)

	y_pred = model.predict(X_test_scaled)
	mse = mean_squared_error(y_test, y_pred)
	rmse = mse**0.5
	print(f"Model trained. RMSE: {rmse:.4f}")

	return model, scaler

	def predict_next_candle(model, scaler, data):
	data_scaled = scaler.transform(data[['Open', 'High', 'Low', 'Close', 'Volume']])
	prediction = model.predict(data_scaled)
	return prediction

	def analyze_symbol(exchange, symbol, timeframe, output_file):
	try:
	ohlcv = fetch_ohlcv(exchange, symbol, timeframe)
	if not ohlcv:
	return

	save_history_to_file(symbol, timeframe, ohlcv)

	max_candle = max(ohlcv, key=lambda x: x[2] - x[3])
	open_price = max_candle[1]
	close_price = max_candle[4]
	highest_price = max_candle[2]
	lowest_price = max_candle[3]
	timestamp = max_candle[0]
	candle_date_time = format_candle_time(timestamp)

	greatest_candle_info = (
	f"Symbol: {symbol}, Timeframe: {timeframe}, "
	f"Open: {open_price:.4f}, Close: {close_price:.4f}, "
	f"High: {highest_price:.4f}, Low: {lowest_price:.4f}, "
	f"Range: {highest_price - lowest_price:.4f}, "
	f"Greatest Candle DateTime: {candle_date_time}\n"
	)

	current_price = fetch_current_price(exchange, symbol)
	if current_price is None:
	return

	current_time = datetime.now(pytz.UTC).strftime('%Y-%m-%d %H:%M:%S')
	result = (
	f"[{current_time}] {symbol} (BINANCE:{symbol.replace('/', '')}) "
	f"Timeframe: {timeframe}, Current price: {current_price:.4f}\n"
	f"{greatest_candle_info}\n"
	)
	print(result.strip())

	with open(output_file, 'a') as f:
	f.write(result)

	except Exception as e:
	print(f"Error analyzing symbol {symbol}: {str(e)}")

	def write_predictions_to_file(output_file, predictions):
	with open(output_file, 'a') as f:
	for symbol, last_timestamp, timeframe, prediction in predictions:
	# Convert integer timestamp to datetime if needed
	if isinstance(last_timestamp, int):
	last_timestamp = datetime.fromtimestamp(last_timestamp / 1000, tz=timezone.utc)

	last_timestamp_str = last_timestamp.strftime('%Y-%m-%d %H:%M:%S')
	f.write(f"Prediction for {symbol} ({timeframe}):\n")
	f.write(f"- Last Timestamp: {last_timestamp_str}\n")
	f.write(f"- Predicted Value: {prediction[-1]:.4f}\n") # Show the last prediction value
	f.write("\n")

	def worker(exchange, symbols, timeframe, output_file):
	for symbol in symbols:
	if symbol in fetch_markets(exchange):
	analyze_symbol(exchange, symbol, timeframe, output_file)
	else:
	print(f"Skipping invalid symbol {symbol}")

	def main():
	parser = argparse.ArgumentParser(description='Show details of the greatest historical candle, save historical data, and train a model.')
	parser.add_argument('--timeframe', type=str, required=True, help='Timeframe for the candlestick analysis')
	parser.add_argument('--train', action='store_true', help='Train model using existing historical data')
	parser.add_argument('--use-existing', action='store_true', help='Use existing historical data files')
	args = parser.parse_args()

	timeframe = args.timeframe
	train_model_flag = args.train
	use_existing_flag = args.use_existing
	script_name = os.path.basename(__file__).split('.')[0]
	result_directory = f"scan_results_{script_name}"

	if not os.path.exists(result_directory):
	os.makedirs(result_directory)

	output_file = os.path.join(result_directory, f"{datetime.now(pytz.UTC).strftime('%Y%m%d_%H%M%S')}_{timeframe}_greatest_candles.txt")

	if train_model_flag:
	historical_data_dir = "downloaded_history"
	symbol_filter = 'BTC_USDT' # Filter files that start with BTC_USDT
	df, filenames = load_data_from_files(historical_data_dir, symbol_filter)
	if df.empty:
	print("No historical data found for training.")
	return

	print("Training model using the following BTC-related files:")
	for filename in filenames:
	print(f"- {filename}")

	X, y = preprocess_data(df)
	if X.empty or y.empty:
	print("No valid data available for training.")
	return

	model, scaler = train_model(X, y)

	joblib.dump(model, 'model.pkl')
	joblib.dump(scaler, 'scaler.pkl')

	elif use_existing_flag:
	model = joblib.load('model.pkl')
	scaler = joblib.load('scaler.pkl')

	historical_data_dir = "downloaded_history"
	symbol_filter = 'BTC_USDT' # Filter files that start with BTC_USDT
	df, filenames = load_data_from_files(historical_data_dir, symbol_filter)
	if df.empty:
	print("No historical data found for prediction.")
	return

	print("Using the following BTC-related files for prediction:")
	for filename in filenames:
	print(f"- {filename}")

	predictions = []
	for symbol in df['symbol'].unique():
	symbol_data = df[df['symbol'] == symbol]
	timeframe = symbol_data['Timeframe'].iloc[0] # Get timeframe for prediction
	last_timestamp = symbol_data.index[-1] # Get the last timestamp
	next_candle_prediction = predict_next_candle(model, scaler, symbol_data)
	predictions.append((symbol, last_timestamp, timeframe, next_candle_prediction))

	write_predictions_to_file(output_file, predictions)

	else:
	exchange = ccxt.binance()
	symbols = fetch_markets(exchange)
	worker(exchange, symbols, timeframe, output_file)

	if __name__ == "__main__":
	main()