Spaces:
Running
Running
import gradio as gr | |
docs = None | |
def request_pathname(files): | |
if files is None: | |
return [[]] | |
return [[file.name, file.name.split('/')[-1]] for file in files] | |
def validate_dataset(dataset, openapi): | |
global docs | |
docs = None # clear it out if dataset is modified | |
docs_ready = dataset.iloc[-1, 0] != "" | |
if docs_ready and type(openapi) is str and len(openapi) > 0: | |
return "✨Ready✨" | |
elif docs_ready: | |
return "⚠️Waiting for key..." | |
elif type(openapi) is str and len(openapi) > 0: | |
return "⚠️Waiting for documents..." | |
else: | |
return "⚠️Waiting for documents and key..." | |
def do_ask(question, button, openapi, dataset, progress=gr.Progress()): | |
global docs | |
docs_ready = dataset.iloc[-1, 0] != "" | |
if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready: | |
if docs is None: # don't want to rebuild index if it's already built | |
import os | |
os.environ['OPENAI_API_KEY'] = openapi.strip() | |
import paperqa | |
docs = paperqa.Docs() | |
# dataset is pandas dataframe | |
for _, row in dataset.iterrows(): | |
key = None | |
if ',' not in row['citation string']: | |
key = row['citation string'] | |
docs.add(row['filepath'], row['citation string'], key=key) | |
else: | |
return "" | |
progress(0, "Building Index...") | |
docs._build_faiss_index() | |
progress(0.25, "Querying...") | |
result = docs.query(question) | |
progress(1.0, "Done!") | |
return result.formatted_answer, result.context | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
# Document Question and Answer | |
This tool will enable asking questions of your uploaded text or PDF documents. | |
It uses OpenAI's GPT models and thus you must enter your API key below. This | |
tool is under active development and currently uses many tokens - up to 10,000 | |
for a single query. That is $0.10-0.20 per query, so please be careful! | |
* [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool. | |
* [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes. | |
## Instructions | |
1. Enter API Key ([What is that?](https://openai.com/api/)) | |
2. Upload your documents and modify citation strings if you want (to look prettier) | |
""") | |
openai_api_key = gr.Textbox( | |
label="OpenAI API Key", placeholder="sk-...", type="password") | |
uploaded_files = gr.File( | |
label="Your Documents Upload (PDF or txt)", file_count="multiple", ) | |
dataset = gr.Dataframe( | |
headers=["filepath", "citation string"], | |
datatype=["str", "str"], | |
col_count=(2, "fixed"), | |
interactive=True, | |
label="Documents and Citations" | |
) | |
buildb = gr.Textbox("⚠️Waiting for documents and key...", | |
label="Status", interactive=False, show_label=True) | |
openai_api_key.change(validate_dataset, inputs=[ | |
dataset, openai_api_key], outputs=[buildb]) | |
dataset.change(validate_dataset, inputs=[ | |
dataset, openai_api_key], outputs=[buildb]) | |
uploaded_files.change(request_pathname, inputs=[ | |
uploaded_files], outputs=[dataset]) | |
query = gr.Textbox( | |
placeholder="Enter your question here...", label="Question") | |
ask = gr.Button("Ask Question") | |
gr.Markdown("## Answer") | |
answer = gr.Markdown(label="Answer") | |
with gr.Accordion("Context", open=False): | |
gr.Markdown( | |
"### Context\n\nThe following context was used to generate the answer:") | |
context = gr.Markdown(label="Context") | |
ask.click(fn=do_ask, inputs=[query, buildb, | |
openai_api_key, dataset], outputs=[answer, context]) | |
demo.queue(concurrency_count=20) | |
demo.launch(show_error=True) | |