{ "cells": [ { "cell_type": "code", "execution_count": 15, "id": "c9140a01-4f24-4dc2-8d8f-686f38dd5385", "metadata": {}, "outputs": [], "source": [ "path = '/root/autodl-tmp/labeled-recipes/data/train-00000-of-00001-5dd0d415a357ff24.parquet'\n", "output_file_name = '/root/autodl-tmp/data/train.jsonl'" ] }, { "cell_type": "code", "execution_count": 16, "id": "24d9b3a4-81e1-44c4-a25b-38c2bda4fdac", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# Read the Parquet file into a DataFrame\n", "df = pd.read_parquet(path, engine='pyarrow')\n", "\n", "# Convert the DataFrame to JSONL and save it to a file\n", "with open(output_file_name, 'w') as f:\n", " for index, row in df.iterrows():\n", " json_row = row.to_json()\n", " f.write(json_row + '\\n')" ] }, { "cell_type": "code", "execution_count": null, "id": "e4c72fcf-a59b-4a6e-be45-f2f66e28bf4a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }