import os

from datasets.download.download_config import DownloadConfig
from datasets.utils.file_utils import cached_path
from datasets.utils.hub import hf_hub_url


def get_readme_path(dataset_name):
    readme_path = hf_hub_url(dataset_name, "README.md")
    return cached_path(readme_path, download_config=DownloadConfig())


def update_readme(dataset_name, subreddit, latest_date):
    path = get_readme_path(dataset_name=dataset_name)
    readme_text = f"""
## Dataset Overview
The goal is to have an open dataset of `{subreddit}` submissions. Im leveraging PRAW and the reddit API to get downloads.

There is a limit of 1000 in an API call and limited search functionality, so this is run every day to get new submissions.

# Dataset Name
{dataset_name}

## Update Frequency
The dataset is updated daily with the most recent day being: {latest_date}
"""

    append_readme(path=path, readme_text=readme_text)
    return readme_text


def append_readme(path, readme_text):
    generated_below_marker = "--- Generated Below ---"
    with open(path, "r") as file:
        content = file.read()

    if generated_below_marker in content:
        index = content.index(generated_below_marker) + len(generated_below_marker)
        content = content[:index] + "\n\n" + readme_text
    else:
        content += "\n\n" + generated_below_marker + "\n\n" + readme_text + "\n"

    with open(path, "w") as file:
        file.write(content)