Spaces:

sasha
/

AI_Carbon

Runtime error

AI_Carbon / app.py

sashavor

trying out image

248e2bb almost 2 years ago

5.61 kB

	import streamlit as st
	import pandas as pd
	import os, csv
	from huggingface_hub import hf_hub_download, HfApi

	HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')

	CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="dynamic_emissions.csv", repo_type="dataset")

	api = HfApi()

	def write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions):
	with open(CACHED_FILE_PATH,'a', newline='') as f:
	writer = csv.writer(f)
	writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
	api.upload_file(
	path_or_fileobj=CACHED_FILE_PATH,
	path_in_repo="dynamic_emissions.csv",
	repo_id="sasha/co2_submissions",
	repo_type="dataset",
	)



	st.set_page_config(
	page_title="AI Carbon Calculator",
	layout="wide",
	)

	tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv"
	compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv"


	electricity_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/2021-10-27yearly_averages.csv"

	server_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k"
	server_sheet_name = "Server%20Carbon%20Footprint"
	server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}"


	embodied_gpu_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k"
	embodied_gpu_sheet_name = "Scope%203%20Ratios"
	embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{embodied_gpu_sheet_id}/gviz/tq?tqx=out:csv&sheet={embodied_gpu_sheet_name}"

	TDP =pd.read_csv(tdp_url)

	instances = pd.read_csv(compute_url)
	providers = [p.upper() for p in instances['provider'].unique().tolist()]
	providers.append('Local/Private Infastructure')

	kg_per_mile = 0.348

	electricity = pd.read_csv(electricity_url)
	servers = pd.read_csv(server_url)
	embodied_gpu = pd.read_csv(embodied_gpu_url)

	st.image('images/MIT_carbon_image_narrow.png')
	st.title("AI Carbon Calculator")

	st.markdown('## Estimate your model\'s CO2 carbon footprint!')

	st.markdown('##### You can use this tool to calculate different aspects of your model\'s carbon footprint.')

	st.markdown('### Dynamic Emissions')
	st.markdown('##### These are the emissions produced by generating the electricity needed to train your model.')
	with st.expander("Calculate the emissions produced by energy consumption of model training"):
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	hardware = st.selectbox('GPU used', TDP['name'].tolist())
	gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
	st.markdown("Different GPUs have different TDP (Thermal Design Power), which impacts how much energy you use.")
	with col2:
	training_time = st.number_input('Total number of GPU hours')
	st.markdown('This is calculated by multiplying the number of GPUs you used by the training time: '
	'i.e. if you used 100 GPUs for 10 hours, this is equal to 100x10 = 1,000 GPU hours.')
	with col3:
	provider = st.selectbox('Provider used', providers)
	st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
	with col4:
	if provider != 'Local/Private Infastructure':
	provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
	region = st.selectbox('Provider used', provider_instances)
	carbon_intensity = instances['impact'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]

	else:
	carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
	st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
	' [Electricity Map](https://app.electricitymaps.com/) to get this information.')
	dynamic_emissions = round(gpu_tdp * training_time * carbon_intensity/1000000)
	st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
	st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
	' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
	st.button(label="Anonymously share my data", help="Share the data from your model anonymously for research purposes!",\
	on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions))



	st.markdown('### Idle Emissions')
	st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
	'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
	st.markdown('Do you know what the PUE (Power Usage Effectiveness) of your infrastructure is?')



	st.markdown('### Embodied Emissions')
	st.markdown('Choose your hardware, runtime and cloud provider/physical infrastructure to estimate the carbon impact of your research.')



	with st.expander("More information about our Methodology"):
	st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
	' other aspects of your model\'s carbon footprint based on the LCA methodology.')


	st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')