Research-chatbot

Runtime error

App Files Files Community

pseudotensor commited on Jul 14, 2023

Commit

edf6dca

•

1 Parent(s): ce8ae40

Update with h2oGPT hash a9971663accc92add02bde0be7622726ef2db350

Browse files

Files changed (15) hide show

client_test.py +5 -3
enums.py +12 -9
evaluate_params.py +1 -1
gen.py +117 -50
gpt_langchain.py +67 -92
gradio_runner.py +572 -446
gradio_themes.py +8 -0
gradio_utils/__pycache__/css.cpython-310.pyc +0 -0
gradio_utils/__pycache__/prompt_form.cpython-310.pyc +0 -0
gradio_utils/css.py +4 -1
gradio_utils/prompt_form.py +0 -27
loaders.py +18 -10
prompter.py +69 -7
requirements.txt +4 -4
utils.py +60 -1

client_test.py CHANGED Viewed

@@ -7,7 +7,7 @@ python generate.py  --base_model=h2oai/h2ogpt-oig-oasst1-512-6_9b
 NOTE: For private models, add --use-auth_token=True
-NOTE: --infer_devices=True (default) must be used for multi-GPU in case see failures with cuda:x cuda:y mismatches.
 Currently, this will force model to be on a single GPU.
 Then run this client as:
@@ -98,7 +98,8 @@ def get_args(prompt, prompt_type, chat=False, stream_output=False,
                          top_k_docs=top_k_docs,
                          chunk=True,
                          chunk_size=512,
-                         document_choice=[DocumentChoices.All_Relevant.name],
                          )
     from evaluate_params import eval_func_param_names
     assert len(set(eval_func_param_names).difference(set(list(kwargs.keys())))) == 0
@@ -203,7 +204,8 @@ def run_client_nochat_api_lean_morestuff(prompt, prompt_type='human_bot', max_ne
         langchain_mode='Disabled',
         langchain_action=LangChainAction.QUERY.value,
         top_k_docs=4,
-        document_choice=['All'],
     )
     api_name = '/submit_nochat_api'  # NOTE: like submit_nochat but stable API for string dict passing

 NOTE: For private models, add --use-auth_token=True
+NOTE: --use_gpu_id=True (default) must be used for multi-GPU in case see failures with cuda:x cuda:y mismatches.
 Currently, this will force model to be on a single GPU.
 Then run this client as:
                          top_k_docs=top_k_docs,
                          chunk=True,
                          chunk_size=512,
+                         document_subset=DocumentChoices.Relevant.name,
+                         document_choice=[],
                          )
     from evaluate_params import eval_func_param_names
     assert len(set(eval_func_param_names).difference(set(list(kwargs.keys())))) == 0
         langchain_mode='Disabled',
         langchain_action=LangChainAction.QUERY.value,
         top_k_docs=4,
+        document_subset=DocumentChoices.Relevant.name,
+        document_choice=[],
     )
     api_name = '/submit_nochat_api'  # NOTE: like submit_nochat but stable API for string dict passing

enums.py CHANGED Viewed

@@ -28,16 +28,21 @@ class PromptType(Enum):
     gptj = 22
     prompt_answer_openllama = 23
     vicuna11 = 24
 class DocumentChoices(Enum):
-    All_Relevant = 0
-    All_Relevant_Only_Sources = 1
-    Only_All_Sources = 2
-    Just_LLM = 3
-non_query_commands = [DocumentChoices.All_Relevant_Only_Sources.name, DocumentChoices.Only_All_Sources.name]
 class LangChainMode(Enum):
@@ -60,7 +65,7 @@ class LangChainAction(Enum):
     QUERY = "Query"
     # WIP:
-    #SUMMARIZE_MAP = "Summarize_map_reduce"
     SUMMARIZE_MAP = "Summarize"
     SUMMARIZE_ALL = "Summarize_all"
     SUMMARIZE_REFINE = "Summarize_refine"
@@ -68,7 +73,6 @@ class LangChainAction(Enum):
 no_server_str = no_lora_str = no_model_str = '[None/Remove]'
 # from site-packages/langchain/llms/openai.py
 # but needed since ChatOpenAI doesn't have this information
 model_token_mapping = {
@@ -77,7 +81,7 @@ model_token_mapping = {
     "gpt-4-32k": 32768,
     "gpt-4-32k-0314": 32768,
     "gpt-3.5-turbo": 4096,
-    "gpt-3.5-turbo-16k": 16*1024,
     "gpt-3.5-turbo-0301": 4096,
     "text-ada-001": 2049,
     "ada": 2049,
@@ -94,6 +98,5 @@ model_token_mapping = {
     "code-cushman-001": 2048,
 }
 source_prefix = "Sources [Score | Link]:"
 source_postfix = "End Sources<p>"

     gptj = 22
     prompt_answer_openllama = 23
     vicuna11 = 24
+    mptinstruct = 25
+    mptchat = 26
+    falcon = 27
 class DocumentChoices(Enum):
+    Relevant = 0
+    Sources = 1
+    All = 2
+non_query_commands = [
+    DocumentChoices.Sources.name,
+    DocumentChoices.All.name
+]
 class LangChainMode(Enum):
     QUERY = "Query"
     # WIP:
+    # SUMMARIZE_MAP = "Summarize_map_reduce"
     SUMMARIZE_MAP = "Summarize"
     SUMMARIZE_ALL = "Summarize_all"
     SUMMARIZE_REFINE = "Summarize_refine"
 no_server_str = no_lora_str = no_model_str = '[None/Remove]'
 # from site-packages/langchain/llms/openai.py
 # but needed since ChatOpenAI doesn't have this information
 model_token_mapping = {
     "gpt-4-32k": 32768,
     "gpt-4-32k-0314": 32768,
     "gpt-3.5-turbo": 4096,
+    "gpt-3.5-turbo-16k": 16 * 1024,
     "gpt-3.5-turbo-0301": 4096,
     "text-ada-001": 2049,
     "ada": 2049,
     "code-cushman-001": 2048,
 }
 source_prefix = "Sources [Score | Link]:"
 source_postfix = "End Sources<p>"

evaluate_params.py CHANGED Viewed

@@ -34,6 +34,7 @@ eval_func_param_names = ['instruction',
                          'top_k_docs',
                          'chunk',
                          'chunk_size',
                          'document_choice',
                          ]
@@ -43,5 +44,4 @@ for k in no_default_param_names:
     if k in eval_func_param_names_defaults:
         eval_func_param_names_defaults.remove(k)
 eval_extra_columns = ['prompt', 'response', 'score']

                          'top_k_docs',
                          'chunk',
                          'chunk_size',
+                         'document_subset',
                          'document_choice',
                          ]
     if k in eval_func_param_names_defaults:
         eval_func_param_names_defaults.remove(k)
 eval_extra_columns = ['prompt', 'response', 'score']

gen.py CHANGED Viewed

@@ -32,7 +32,8 @@ from enums import DocumentChoices, LangChainMode, no_lora_str, model_token_mappi
     source_postfix, LangChainAction
 from loaders import get_loaders
 from utils import set_seed, clear_torch_cache, save_generate_output, NullContext, wrapped_partial, EThread, get_githash, \
-    import_matplotlib, get_device, makedirs, get_kwargs, start_faulthandler, get_hf_server, FakeTokenizer, remove
 start_faulthandler()
 import_matplotlib()
@@ -60,7 +61,9 @@ def main(
         load_8bit: bool = False,
         load_4bit: bool = False,
         load_half: bool = True,
-        infer_devices: bool = True,
         base_model: str = '',
         tokenizer_base_model: str = '',
         lora_weights: str = "",
@@ -91,7 +94,7 @@ def main(
         memory_restriction_level: int = None,
         debug: bool = False,
         save_dir: str = None,
-        share: bool = True,
         local_files_only: bool = False,
         resume_download: bool = True,
         use_auth_token: Union[str, bool] = False,
@@ -138,14 +141,15 @@ def main(
         eval_prompts_only_seed: int = 1234,
         eval_as_output: bool = False,
-        langchain_mode: str = 'Disabled',
         langchain_action: str = LangChainAction.QUERY.value,
         force_langchain_evaluate: bool = False,
         visible_langchain_modes: list = ['UserData', 'MyData'],
         # WIP:
         # visible_langchain_actions: list = langchain_actions.copy(),
         visible_langchain_actions: list = [LangChainAction.QUERY.value, LangChainAction.SUMMARIZE_MAP.value],
-        document_choice: list = [DocumentChoices.All_Relevant.name],
         user_path: str = None,
         detect_user_path_changes_every_query: bool = False,
         load_db_if_exists: bool = True,
@@ -177,11 +181,13 @@ def main(
     :param load_8bit: load model in 8-bit using bitsandbytes
     :param load_4bit: load model in 4-bit using bitsandbytes
     :param load_half: load model in float16
-    :param infer_devices: whether to control devices with gpu_id.  If False, then spread across GPUs
     :param base_model: model HF-type name.  If use --base_model to preload model, cannot unload in gradio in models tab
     :param tokenizer_base_model: tokenizer HF-type name.  Usually not required, inferred from base_model.
     :param lora_weights: LORA weights path/HF link
-    :param gpu_id: if infer_devices, then use gpu_id for cuda device ID, or auto mode if gpu_id != -1
     :param compile_model Whether to compile the model
     :param use_cache: Whether to use caching in model (some models fail when multiple threads use)
     :param inference_server: Consume base_model as type of model at this address
@@ -289,7 +295,8 @@ def main(
            Default: If only want to consume local files, e.g. prepared by make_db.py, only include ['UserData']
            FIXME: Avoid 'All' for now, not implemented
     :param visible_langchain_actions: Which actions to allow
-    :param document_choice: Default document choice when taking subset of collection
     :param load_db_if_exists: Whether to load chroma db if exists or re-generate db
     :param keep_sources_in_context: Whether to keep url sources in context, not helpful usually
     :param db_type: 'faiss' for in-memory or 'chroma' or 'weaviate' for persisted on disk
@@ -379,10 +386,12 @@ def main(
     # allow enabling langchain via ENV
     # FIRST PLACE where LangChain referenced, but no imports related to it
     langchain_mode = os.environ.get("LANGCHAIN_MODE", langchain_mode)
-    assert langchain_mode in langchain_modes, "Invalid langchain_mode %s" % langchain_mode
     visible_langchain_modes = ast.literal_eval(os.environ.get("visible_langchain_modes", str(visible_langchain_modes)))
     if langchain_mode not in visible_langchain_modes and langchain_mode in langchain_modes:
-        visible_langchain_modes += [langchain_mode]
     assert langchain_action in langchain_actions, "Invalid langchain_action %s" % langchain_action
@@ -392,6 +401,25 @@ def main(
     if LangChainMode.USER_DATA.value not in visible_langchain_modes:
         allow_upload_to_user_data = False
     if is_public:
         allow_upload_to_user_data = False
         input_lines = 1  # ensure set, for ease of use
@@ -458,7 +486,9 @@ def main(
         load_8bit = False
         load_4bit = False
         load_half = False
-        infer_devices = False
         torch.backends.cudnn.benchmark = True
         torch.backends.cudnn.enabled = False
         torch.set_default_dtype(torch.float32)
@@ -714,7 +744,9 @@ def get_config(base_model,
     return config, model
-def get_non_lora_model(base_model, model_loader, load_half, model_kwargs, reward_type,
                        config, model,
                        gpu_id=0,
                        ):
@@ -761,16 +793,25 @@ def get_non_lora_model(base_model, model_loader, load_half, model_kwargs, reward
     load_in_8bit = model_kwargs.get('load_in_8bit', False)
     load_in_4bit = model_kwargs.get('load_in_4bit', False)
     model_kwargs['device_map'] = device_map
     pop_unused_model_kwargs(model_kwargs)
-    if load_in_8bit or load_in_4bit or not load_half:
-        model = model_loader.from_pretrained(
             base_model,
             config=config,
             **model_kwargs,
         )
     else:
-        model = model_loader.from_pretrained(
             base_model,
             config=config,
             **model_kwargs,
@@ -778,7 +819,7 @@ def get_non_lora_model(base_model, model_loader, load_half, model_kwargs, reward
     return model
-def get_client_from_inference_server(inference_server, raise_connection_exception=False):
     inference_server, headers = get_hf_server(inference_server)
     # preload client since slow for gradio case especially
     from gradio_utils.grclient import GradioClient
@@ -786,7 +827,7 @@ def get_client_from_inference_server(inference_server, raise_connection_exceptio
     hf_client = None
     if headers is None:
         try:
-            print("GR Client Begin: %s" % inference_server, flush=True)
             # first do sanity check if alive, else gradio client takes too long by default
             requests.get(inference_server, timeout=int(os.getenv('REQUEST_TIMEOUT', '30')))
             gr_client = GradioClient(inference_server)
@@ -794,19 +835,19 @@ def get_client_from_inference_server(inference_server, raise_connection_exceptio
         except (OSError, ValueError) as e:
             # Occurs when wrong endpoint and should have been HF client, so don't hard raise, just move to HF
             gr_client = None
-            print("GR Client Failed %s: %s" % (inference_server, str(e)), flush=True)
         except (ConnectTimeoutError, ConnectTimeout, MaxRetryError, ConnectionError, ConnectionError2,
                 JSONDecodeError, ReadTimeout2, KeyError) as e:
             t, v, tb = sys.exc_info()
             ex = ''.join(traceback.format_exception(t, v, tb))
-            print("GR Client Failed %s: %s" % (inference_server, str(ex)), flush=True)
             if raise_connection_exception:
                 raise
     if gr_client is None:
         res = None
         from text_generation import Client as HFClient
-        print("HF Client Begin: %s" % inference_server)
         try:
             hf_client = HFClient(inference_server, headers=headers, timeout=int(os.getenv('REQUEST_TIMEOUT', '30')))
             # quick check valid TGI endpoint
@@ -817,10 +858,10 @@ def get_client_from_inference_server(inference_server, raise_connection_exceptio
             hf_client = None
             t, v, tb = sys.exc_info()
             ex = ''.join(traceback.format_exception(t, v, tb))
-            print("HF Client Failed %s: %s" % (inference_server, str(ex)))
             if raise_connection_exception:
                 raise
-        print("HF Client End: %s %s" % (inference_server, res))
     return inference_server, gr_client, hf_client
@@ -828,7 +869,9 @@ def get_model(
         load_8bit: bool = False,
         load_4bit: bool = False,
         load_half: bool = True,
-        infer_devices: bool = True,
         base_model: str = '',
         inference_server: str = "",
         tokenizer_base_model: str = '',
@@ -850,7 +893,9 @@ def get_model(
     :param load_8bit: load model in 8-bit, not supported by all models
     :param load_4bit: load model in 4-bit, not supported by all models
     :param load_half: load model in 16-bit
-    :param infer_devices: Use torch infer of optimal placement of layers on devices (for non-lora case)
            For non-LORA case, False will spread shards across multiple GPUs, but this can lead to cuda:x cuda:y mismatches
            So it is not the default
     :param base_model: name/path of base model
@@ -868,8 +913,7 @@ def get_model(
     :param verbose:
     :return:
     """
-    if verbose:
-        print("Get %s model" % base_model, flush=True)
     triton_attn = False
     long_sequence = True
@@ -893,7 +937,8 @@ def get_model(
             print("Detected as llama type from"
                   " config (%s) or name (%s)" % (llama_type_from_config, llama_type_from_name), flush=True)
-    model_loader, tokenizer_loader = get_loaders(model_name=base_model, reward_type=reward_type, llama_type=llama_type)
     tokenizer_kwargs = dict(local_files_only=local_files_only,
                             resume_download=resume_download,
@@ -917,7 +962,8 @@ def get_model(
         tokenizer = FakeTokenizer()
     if isinstance(inference_server, str) and inference_server.startswith("http"):
-        inference_server, gr_client, hf_client = get_client_from_inference_server(inference_server)
         client = gr_client or hf_client
         # Don't return None, None for model, tokenizer so triggers
         return client, tokenizer, 'http'
@@ -937,7 +983,9 @@ def get_model(
     return get_hf_model(load_8bit=load_8bit,
                         load_4bit=load_4bit,
                         load_half=load_half,
-                        infer_devices=infer_devices,
                         base_model=base_model,
                         tokenizer_base_model=tokenizer_base_model,
                         lora_weights=lora_weights,
@@ -961,7 +1009,9 @@ def get_model(
 def get_hf_model(load_8bit: bool = False,
                  load_4bit: bool = False,
                  load_half: bool = True,
-                 infer_devices: bool = True,
                  base_model: str = '',
                  tokenizer_base_model: str = '',
                  lora_weights: str = "",
@@ -998,7 +1048,8 @@ def get_hf_model(load_8bit: bool = False,
         "Please choose a base model with --base_model (CLI) or load one from Models Tab (gradio)"
     )
-    model_loader, tokenizer_loader = get_loaders(model_name=base_model, reward_type=reward_type, llama_type=llama_type)
     config, _ = get_config(base_model, return_model=False, raise_exception=True, **config_kwargs)
@@ -1015,7 +1066,7 @@ def get_hf_model(load_8bit: bool = False,
                              device=0 if device == "cuda" else -1,
                              torch_dtype=torch.float16 if device == 'cuda' else torch.float32)
     else:
-        assert device in ["cuda", "cpu"], "Unsupported device %s" % device
         model_kwargs = dict(local_files_only=local_files_only,
                             torch_dtype=torch.float16 if device == 'cuda' else torch.float32,
                             resume_download=resume_download,
@@ -1024,11 +1075,16 @@ def get_hf_model(load_8bit: bool = False,
                             offload_folder=offload_folder,
                             )
         if 'mbart-' not in base_model.lower() and 'mpt-' not in base_model.lower():
             model_kwargs.update(dict(load_in_8bit=load_8bit,
                                      load_in_4bit=load_4bit,
-                                     device_map={"": 0} if (load_8bit or load_4bit) and device == 'cuda' else "auto",
                                      ))
         if 'mpt-' in base_model.lower() and gpu_id is not None and gpu_id >= 0:
             model_kwargs.update(dict(device_map={"": gpu_id} if device == 'cuda' else "cpu"))
         if 'OpenAssistant/reward-model'.lower() in base_model.lower():
@@ -1038,29 +1094,32 @@ def get_hf_model(load_8bit: bool = False,
         pop_unused_model_kwargs(model_kwargs)
         if not lora_weights:
-            with torch.device(device):
-                if infer_devices:
                     config, model = get_config(base_model, return_model=True, raise_exception=True, **config_kwargs)
-                    model = get_non_lora_model(base_model, model_loader, load_half, model_kwargs, reward_type,
                                                config, model,
                                                gpu_id=gpu_id,
                                                )
                 else:
                     config, _ = get_config(base_model, **config_kwargs)
-                    if load_half and not (load_8bit or load_4bit):
-                        model = model_loader.from_pretrained(
                             base_model,
                             config=config,
                             **model_kwargs).half()
                     else:
-                        model = model_loader.from_pretrained(
                             base_model,
                             config=config,
                             **model_kwargs)
         elif load_8bit or load_4bit:
             config, _ = get_config(base_model, **config_kwargs)
-            model = model_loader.from_pretrained(
                 base_model,
                 config=config,
                 **model_kwargs
@@ -1080,7 +1139,7 @@ def get_hf_model(load_8bit: bool = False,
         else:
             with torch.device(device):
                 config, _ = get_config(base_model, raise_exception=True, **config_kwargs)
-                model = model_loader.from_pretrained(
                     base_model,
                     config=config,
                     **model_kwargs
@@ -1097,7 +1156,7 @@ def get_hf_model(load_8bit: bool = False,
                     offload_folder=offload_folder,
                     device_map="auto",
                 )
-                if load_half:
                     model.half()
     # unwind broken decapoda-research config
@@ -1156,7 +1215,8 @@ def get_score_model(score_model: str = None,
                     load_8bit: bool = False,
                     load_4bit: bool = False,
                     load_half: bool = True,
-                    infer_devices: bool = True,
                     base_model: str = '',
                     inference_server: str = '',
                     tokenizer_base_model: str = '',
@@ -1177,6 +1237,8 @@ def get_score_model(score_model: str = None,
         load_8bit = False
         load_4bit = False
         load_half = False
         base_model = score_model.strip()
         tokenizer_base_model = ''
         lora_weights = ''
@@ -1219,6 +1281,7 @@ def evaluate(
         top_k_docs,
         chunk,
         chunk_size,
         document_choice,
         # END NOTE: Examples must have same order of parameters
         src_lang=None,
@@ -1435,6 +1498,7 @@ def evaluate(
                            chunk_size=chunk_size,
                            langchain_mode=langchain_mode,
                            langchain_action=langchain_action,
                            document_choice=document_choice,
                            db_type=db_type,
                            top_k_docs=top_k_docs,
@@ -1462,6 +1526,7 @@ def evaluate(
                               inference_server=inference_server,
                               langchain_mode=langchain_mode,
                               langchain_action=langchain_action,
                               document_choice=document_choice,
                               num_prompt_tokens=num_prompt_tokens,
                               instruction=instruction,
@@ -1563,7 +1628,8 @@ def evaluate(
                 gr_client = None
                 hf_client = model
             else:
-                inference_server, gr_client, hf_client = get_client_from_inference_server(inference_server)
             # quick sanity check to avoid long timeouts, just see if can reach server
             requests.get(inference_server, timeout=int(os.getenv('REQUEST_TIMEOUT_FAST', '10')))
@@ -1631,7 +1697,8 @@ def evaluate(
                                      top_k_docs=top_k_docs,
                                      chunk=chunk,
                                      chunk_size=chunk_size,
-                                     document_choice=[DocumentChoices.All_Relevant.name],
                                      )
                 api_name = '/submit_nochat_api'  # NOTE: like submit_nochat but stable API for string dict passing
                 if not stream_output:
@@ -1830,7 +1897,7 @@ def evaluate(
     with torch.no_grad():
         have_lora_weights = lora_weights not in [no_lora_str, '', None]
-        context_class_cast = NullContext if device == 'cpu' or have_lora_weights else torch.autocast
         with context_class_cast(device):
             # protection for gradio not keeping track of closed users,
             # else hit bitsandbytes lack of thread safety:
@@ -2207,8 +2274,8 @@ y = np.random.randint(0, 1, 100)
     # move to correct position
     for example in examples:
-        example += [chat, '', '', 'Disabled', LangChainAction.QUERY.value,
-                    top_k_docs, chunk, chunk_size, [DocumentChoices.All_Relevant.name]
                     ]
         # adjust examples if non-chat mode
         if not chat:
@@ -2431,9 +2498,9 @@ def entrypoint_main():
     python generate.py --base_model='togethercomputer/GPT-NeoXT-Chat-Base-20B' --prompt_type='human_bot' --lora_weights='GPT-NeoXT-Chat-Base-20B.merged.json.8_epochs.57b2892c53df5b8cefac45f84d019cace803ef26.28'
-    must have 4*48GB GPU and run without 8bit in order for sharding to work with infer_devices=False
     can also pass --prompt_type='human_bot' and model can somewhat handle instructions without being instruct tuned
-    python generate.py --base_model=decapoda-research/llama-65b-hf --load_8bit=False --infer_devices=False --prompt_type='human_bot'
     python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-512-6_9b
     """

     source_postfix, LangChainAction
 from loaders import get_loaders
 from utils import set_seed, clear_torch_cache, save_generate_output, NullContext, wrapped_partial, EThread, get_githash, \
+    import_matplotlib, get_device, makedirs, get_kwargs, start_faulthandler, get_hf_server, FakeTokenizer, remove, \
+    have_langchain
 start_faulthandler()
 import_matplotlib()
         load_8bit: bool = False,
         load_4bit: bool = False,
         load_half: bool = True,
+        load_gptq: str = '',
+        use_safetensors: bool = False,
+        use_gpu_id: bool = True,
         base_model: str = '',
         tokenizer_base_model: str = '',
         lora_weights: str = "",
         memory_restriction_level: int = None,
         debug: bool = False,
         save_dir: str = None,
+        share: bool = False,
         local_files_only: bool = False,
         resume_download: bool = True,
         use_auth_token: Union[str, bool] = False,
         eval_prompts_only_seed: int = 1234,
         eval_as_output: bool = False,
+        langchain_mode: str = None,
         langchain_action: str = LangChainAction.QUERY.value,
         force_langchain_evaluate: bool = False,
         visible_langchain_modes: list = ['UserData', 'MyData'],
         # WIP:
         # visible_langchain_actions: list = langchain_actions.copy(),
         visible_langchain_actions: list = [LangChainAction.QUERY.value, LangChainAction.SUMMARIZE_MAP.value],
+        document_subset: str = DocumentChoices.Relevant.name,
+        document_choice: list = [],
         user_path: str = None,
         detect_user_path_changes_every_query: bool = False,
         load_db_if_exists: bool = True,
     :param load_8bit: load model in 8-bit using bitsandbytes
     :param load_4bit: load model in 4-bit using bitsandbytes
     :param load_half: load model in float16
+    :param load_gptq: to load model with GPTQ, put model_basename here, e.g. gptq_model-4bit--1g
+    :param use_safetensors: to use safetensors version (assumes file/HF points to safe tensors version)
+    :param use_gpu_id: whether to control devices with gpu_id.  If False, then spread across GPUs
     :param base_model: model HF-type name.  If use --base_model to preload model, cannot unload in gradio in models tab
     :param tokenizer_base_model: tokenizer HF-type name.  Usually not required, inferred from base_model.
     :param lora_weights: LORA weights path/HF link
+    :param gpu_id: if use_gpu_id, then use gpu_id for cuda device ID, or auto mode if gpu_id != -1
     :param compile_model Whether to compile the model
     :param use_cache: Whether to use caching in model (some models fail when multiple threads use)
     :param inference_server: Consume base_model as type of model at this address
            Default: If only want to consume local files, e.g. prepared by make_db.py, only include ['UserData']
            FIXME: Avoid 'All' for now, not implemented
     :param visible_langchain_actions: Which actions to allow
+    :param document_subset: Default document choice when taking subset of collection
+    :param document_choice: Chosen document(s) by internal name
     :param load_db_if_exists: Whether to load chroma db if exists or re-generate db
     :param keep_sources_in_context: Whether to keep url sources in context, not helpful usually
     :param db_type: 'faiss' for in-memory or 'chroma' or 'weaviate' for persisted on disk
     # allow enabling langchain via ENV
     # FIRST PLACE where LangChain referenced, but no imports related to it
     langchain_mode = os.environ.get("LANGCHAIN_MODE", langchain_mode)
+    if langchain_mode is not None:
+        assert langchain_mode in langchain_modes, "Invalid langchain_mode %s" % langchain_mode
     visible_langchain_modes = ast.literal_eval(os.environ.get("visible_langchain_modes", str(visible_langchain_modes)))
     if langchain_mode not in visible_langchain_modes and langchain_mode in langchain_modes:
+        if langchain_mode is not None:
+            visible_langchain_modes += [langchain_mode]
     assert langchain_action in langchain_actions, "Invalid langchain_action %s" % langchain_action
     if LangChainMode.USER_DATA.value not in visible_langchain_modes:
         allow_upload_to_user_data = False
+    # auto-set langchain_mode
+    if have_langchain and langchain_mode is None:
+        if allow_upload_to_user_data and not is_public and user_path:
+            langchain_mode = 'UserData'
+            print("Auto set langchain_mode=%s" % langchain_mode, flush=True)
+        elif allow_upload_to_my_data:
+            langchain_mode = 'MyData'
+            print("Auto set langchain_mode=%s."
+                  "  To use UserData to pull files from disk,"
+                  " set user_path and ensure allow_upload_to_user_data=True" % langchain_mode, flush=True)
+        else:
+            raise RuntimeError("Please pass --langchain_mode=<chosen mode> out of %s" % langchain_modes)
+    if not have_langchain and langchain_mode not in [None, LangChainMode.DISABLED.value, LangChainMode.LLM.value, LangChainMode.CHAT_LLM.value]:
+        raise RuntimeError("Asked for LangChain mode but langchain python package cannot be found.")
+    if langchain_mode is None:
+        # if not set yet, disable
+        langchain_mode = LangChainMode.DISABLED.value
+        print("Auto set langchain_mode=%s" % langchain_mode, flush=True)
     if is_public:
         allow_upload_to_user_data = False
         input_lines = 1  # ensure set, for ease of use
         load_8bit = False
         load_4bit = False
         load_half = False
+        load_gptq = ''
+        use_safetensors = False
+        use_gpu_id = False
         torch.backends.cudnn.benchmark = True
         torch.backends.cudnn.enabled = False
         torch.set_default_dtype(torch.float32)
     return config, model
+def get_non_lora_model(base_model, model_loader, load_half,
+                       load_gptq, use_safetensors,
+                       model_kwargs, reward_type,
                        config, model,
                        gpu_id=0,
                        ):
     load_in_8bit = model_kwargs.get('load_in_8bit', False)
     load_in_4bit = model_kwargs.get('load_in_4bit', False)
     model_kwargs['device_map'] = device_map
+    model_kwargs['use_safetensors'] = use_safetensors
     pop_unused_model_kwargs(model_kwargs)
+    if load_gptq:
+        model_kwargs.pop('torch_dtype', None)
+        model_kwargs.pop('device_map')
+        model = model_loader(
+            model_name_or_path=base_model,
+            model_basename=load_gptq,
+            **model_kwargs,
+        )
+    elif load_in_8bit or load_in_4bit or not load_half:
+        model = model_loader(
             base_model,
             config=config,
             **model_kwargs,
         )
     else:
+        model = model_loader(
             base_model,
             config=config,
             **model_kwargs,
     return model
+def get_client_from_inference_server(inference_server, base_model=None, raise_connection_exception=False):
     inference_server, headers = get_hf_server(inference_server)
     # preload client since slow for gradio case especially
     from gradio_utils.grclient import GradioClient
     hf_client = None
     if headers is None:
         try:
+            print("GR Client Begin: %s %s" % (inference_server, base_model), flush=True)
             # first do sanity check if alive, else gradio client takes too long by default
             requests.get(inference_server, timeout=int(os.getenv('REQUEST_TIMEOUT', '30')))
             gr_client = GradioClient(inference_server)
         except (OSError, ValueError) as e:
             # Occurs when wrong endpoint and should have been HF client, so don't hard raise, just move to HF
             gr_client = None
+            print("GR Client Failed %s %s: %s" % (inference_server, base_model, str(e)), flush=True)
         except (ConnectTimeoutError, ConnectTimeout, MaxRetryError, ConnectionError, ConnectionError2,
                 JSONDecodeError, ReadTimeout2, KeyError) as e:
             t, v, tb = sys.exc_info()
             ex = ''.join(traceback.format_exception(t, v, tb))
+            print("GR Client Failed %s %s: %s" % (inference_server, base_model, str(ex)), flush=True)
             if raise_connection_exception:
                 raise
     if gr_client is None:
         res = None
         from text_generation import Client as HFClient
+        print("HF Client Begin: %s %s" % (inference_server, base_model))
         try:
             hf_client = HFClient(inference_server, headers=headers, timeout=int(os.getenv('REQUEST_TIMEOUT', '30')))
             # quick check valid TGI endpoint
             hf_client = None
             t, v, tb = sys.exc_info()
             ex = ''.join(traceback.format_exception(t, v, tb))
+            print("HF Client Failed %s %s: %s" % (inference_server, base_model, str(ex)))
             if raise_connection_exception:
                 raise
+        print("HF Client End: %s %s : %s" % (inference_server, base_model, res))
     return inference_server, gr_client, hf_client
         load_8bit: bool = False,
         load_4bit: bool = False,
         load_half: bool = True,
+        load_gptq: str = '',
+        use_safetensors: bool = False,
+        use_gpu_id: bool = True,
         base_model: str = '',
         inference_server: str = "",
         tokenizer_base_model: str = '',
     :param load_8bit: load model in 8-bit, not supported by all models
     :param load_4bit: load model in 4-bit, not supported by all models
     :param load_half: load model in 16-bit
+    :param load_gptq: GPTQ model_basename
+    :param use_safetensors: use safetensors file
+    :param use_gpu_id: Use torch infer of optimal placement of layers on devices (for non-lora case)
            For non-LORA case, False will spread shards across multiple GPUs, but this can lead to cuda:x cuda:y mismatches
            So it is not the default
     :param base_model: name/path of base model
     :param verbose:
     :return:
     """
+    print("Starting get_model: %s %s" % (base_model, inference_server), flush=True)
     triton_attn = False
     long_sequence = True
             print("Detected as llama type from"
                   " config (%s) or name (%s)" % (llama_type_from_config, llama_type_from_name), flush=True)
+    model_loader, tokenizer_loader = get_loaders(model_name=base_model, reward_type=reward_type, llama_type=llama_type,
+                                                 load_gptq=load_gptq)
     tokenizer_kwargs = dict(local_files_only=local_files_only,
                             resume_download=resume_download,
         tokenizer = FakeTokenizer()
     if isinstance(inference_server, str) and inference_server.startswith("http"):
+        inference_server, gr_client, hf_client = get_client_from_inference_server(inference_server,
+                                                                                  base_model=base_model)
         client = gr_client or hf_client
         # Don't return None, None for model, tokenizer so triggers
         return client, tokenizer, 'http'
     return get_hf_model(load_8bit=load_8bit,
                         load_4bit=load_4bit,
                         load_half=load_half,
+                        load_gptq=load_gptq,
+                        use_safetensors=use_safetensors,
+                        use_gpu_id=use_gpu_id,
                         base_model=base_model,
                         tokenizer_base_model=tokenizer_base_model,
                         lora_weights=lora_weights,
 def get_hf_model(load_8bit: bool = False,
                  load_4bit: bool = False,
                  load_half: bool = True,
+                 load_gptq: str = '',
+                 use_safetensors: bool = False,
+                 use_gpu_id: bool = True,
                  base_model: str = '',
                  tokenizer_base_model: str = '',
                  lora_weights: str = "",
         "Please choose a base model with --base_model (CLI) or load one from Models Tab (gradio)"
     )
+    model_loader, tokenizer_loader = get_loaders(model_name=base_model, reward_type=reward_type, llama_type=llama_type,
+                                                 load_gptq=load_gptq)
     config, _ = get_config(base_model, return_model=False, raise_exception=True, **config_kwargs)
                              device=0 if device == "cuda" else -1,
                              torch_dtype=torch.float16 if device == 'cuda' else torch.float32)
     else:
+        assert device in ["cuda", "cpu", "mps"], "Unsupported device %s" % device
         model_kwargs = dict(local_files_only=local_files_only,
                             torch_dtype=torch.float16 if device == 'cuda' else torch.float32,
                             resume_download=resume_download,
                             offload_folder=offload_folder,
                             )
         if 'mbart-' not in base_model.lower() and 'mpt-' not in base_model.lower():
+            if use_gpu_id and gpu_id is not None and gpu_id >= 0 and device == 'cuda':
+                device_map = {"": gpu_id}
+            else:
+                device_map = "auto"
             model_kwargs.update(dict(load_in_8bit=load_8bit,
                                      load_in_4bit=load_4bit,
+                                     device_map=device_map,
                                      ))
         if 'mpt-' in base_model.lower() and gpu_id is not None and gpu_id >= 0:
+            # MPT doesn't support spreading over GPUs
             model_kwargs.update(dict(device_map={"": gpu_id} if device == 'cuda' else "cpu"))
         if 'OpenAssistant/reward-model'.lower() in base_model.lower():
         pop_unused_model_kwargs(model_kwargs)
         if not lora_weights:
+            # torch.device context uses twice memory for AutoGPTQ
+            context = NullContext if load_gptq else torch.device
+            with context(device):
+                if use_gpu_id:
                     config, model = get_config(base_model, return_model=True, raise_exception=True, **config_kwargs)
+                    model = get_non_lora_model(base_model, model_loader, load_half, load_gptq, use_safetensors,
+                                               model_kwargs, reward_type,
                                                config, model,
                                                gpu_id=gpu_id,
                                                )
                 else:
                     config, _ = get_config(base_model, **config_kwargs)
+                    if load_half and not (load_8bit or load_4bit or load_gptq):
+                        model = model_loader(
                             base_model,
                             config=config,
                             **model_kwargs).half()
                     else:
+                        model = model_loader(
                             base_model,
                             config=config,
                             **model_kwargs)
         elif load_8bit or load_4bit:
             config, _ = get_config(base_model, **config_kwargs)
+            model = model_loader(
                 base_model,
                 config=config,
                 **model_kwargs
         else:
             with torch.device(device):
                 config, _ = get_config(base_model, raise_exception=True, **config_kwargs)
+                model = model_loader(
                     base_model,
                     config=config,
                     **model_kwargs
                     offload_folder=offload_folder,
                     device_map="auto",
                 )
+                if load_half and not load_gptq:
                     model.half()
     # unwind broken decapoda-research config
                     load_8bit: bool = False,
                     load_4bit: bool = False,
                     load_half: bool = True,
+                    load_gptq: str = '',
+                    use_gpu_id: bool = True,
                     base_model: str = '',
                     inference_server: str = '',
                     tokenizer_base_model: str = '',
         load_8bit = False
         load_4bit = False
         load_half = False
+        load_gptq = ''
+        use_safetensors = False
         base_model = score_model.strip()
         tokenizer_base_model = ''
         lora_weights = ''
         top_k_docs,
         chunk,
         chunk_size,
+        document_subset,
         document_choice,
         # END NOTE: Examples must have same order of parameters
         src_lang=None,
                            chunk_size=chunk_size,
                            langchain_mode=langchain_mode,
                            langchain_action=langchain_action,
+                           document_subset=document_subset,
                            document_choice=document_choice,
                            db_type=db_type,
                            top_k_docs=top_k_docs,
                               inference_server=inference_server,
                               langchain_mode=langchain_mode,
                               langchain_action=langchain_action,
+                              document_subset=document_subset,
                               document_choice=document_choice,
                               num_prompt_tokens=num_prompt_tokens,
                               instruction=instruction,
                 gr_client = None
                 hf_client = model
             else:
+                inference_server, gr_client, hf_client = get_client_from_inference_server(inference_server,
+                                                                                          base_model=base_model)
             # quick sanity check to avoid long timeouts, just see if can reach server
             requests.get(inference_server, timeout=int(os.getenv('REQUEST_TIMEOUT_FAST', '10')))
                                      top_k_docs=top_k_docs,
                                      chunk=chunk,
                                      chunk_size=chunk_size,
+                                     document_subset=DocumentChoices.Relevant.name,
+                                     document_choice=[],
                                      )
                 api_name = '/submit_nochat_api'  # NOTE: like submit_nochat but stable API for string dict passing
                 if not stream_output:
     with torch.no_grad():
         have_lora_weights = lora_weights not in [no_lora_str, '', None]
+        context_class_cast = NullContext if device == 'cpu' or have_lora_weights or device == 'mps' else torch.autocast
         with context_class_cast(device):
             # protection for gradio not keeping track of closed users,
             # else hit bitsandbytes lack of thread safety:
     # move to correct position
     for example in examples:
+        example += [chat, '', '', LangChainMode.DISABLED.value, LangChainAction.QUERY.value,
+                    top_k_docs, chunk, chunk_size, [DocumentChoices.Relevant.name], []
                     ]
         # adjust examples if non-chat mode
         if not chat:
     python generate.py --base_model='togethercomputer/GPT-NeoXT-Chat-Base-20B' --prompt_type='human_bot' --lora_weights='GPT-NeoXT-Chat-Base-20B.merged.json.8_epochs.57b2892c53df5b8cefac45f84d019cace803ef26.28'
+    must have 4*48GB GPU and run without 8bit in order for sharding to work with use_gpu_id=False
     can also pass --prompt_type='human_bot' and model can somewhat handle instructions without being instruct tuned
+    python generate.py --base_model=decapoda-research/llama-65b-hf --load_8bit=False --use_gpu_id=False --prompt_type='human_bot'
     python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-512-6_9b
     """

gpt_langchain.py CHANGED Viewed

@@ -29,7 +29,8 @@ from evaluate_params import gen_hyper
 from gen import get_model, SEED
 from prompter import non_hf_types, PromptType, Prompter
 from utils import wrapped_partial, EThread, import_matplotlib, sanitize_filename, makedirs, get_url, flatten_list, \
-    get_device, ProgressParallel, remove, hash_file, clear_torch_cache, NullContext, get_hf_server, FakeTokenizer
 from utils_langchain import StreamingGradioCallbackHandler
 import_matplotlib()
@@ -387,7 +388,8 @@ class GradioInference(LLM):
                              top_k_docs=top_k_docs,
                              chunk=chunk,
                              chunk_size=chunk_size,
-                             document_choice=[DocumentChoices.All_Relevant.name],
                              )
         api_name = '/submit_nochat_api'  # NOTE: like submit_nochat but stable API for string dict passing
         if not stream_output:
@@ -913,40 +915,6 @@ def get_dai_docs(from_hf=False, get_pickle=True):
     return sources
-import distutils.spawn
-have_tesseract = distutils.spawn.find_executable("tesseract")
-have_libreoffice = distutils.spawn.find_executable("libreoffice")
-import pkg_resources
-try:
-    assert pkg_resources.get_distribution('arxiv') is not None
-    assert pkg_resources.get_distribution('pymupdf') is not None
-    have_arxiv = True
-except (pkg_resources.DistributionNotFound, AssertionError):
-    have_arxiv = False
-try:
-    assert pkg_resources.get_distribution('pymupdf') is not None
-    have_pymupdf = True
-except (pkg_resources.DistributionNotFound, AssertionError):
-    have_pymupdf = False
-try:
-    assert pkg_resources.get_distribution('selenium') is not None
-    have_selenium = True
-except (pkg_resources.DistributionNotFound, AssertionError):
-    have_selenium = False
-try:
-    assert pkg_resources.get_distribution('playwright') is not None
-    have_playwright = True
-except (pkg_resources.DistributionNotFound, AssertionError):
-    have_playwright = False
-# disable, hangs too often
-have_playwright = False
 image_types = ["png", "jpg", "jpeg"]
 non_image_types = ["pdf", "txt", "csv", "toml", "py", "rst", "rtf",
@@ -973,7 +941,7 @@ def add_meta(docs1, file):
 def file_to_doc(file, base_path=None, verbose=False, fail_any_exception=False,
-                chunk=True, chunk_size=512,
                 is_url=False, is_txt=False,
                 enable_captions=True,
                 captions_model=None,
@@ -1208,6 +1176,7 @@ def file_to_doc(file, base_path=None, verbose=False, fail_any_exception=False,
 def path_to_doc1(file, verbose=False, fail_any_exception=False, return_file=True,
                  chunk=True, chunk_size=512,
                  is_url=False, is_txt=False,
                  enable_captions=True,
                  captions_model=None,
@@ -1224,6 +1193,7 @@ def path_to_doc1(file, verbose=False, fail_any_exception=False, return_file=True
         # don't pass base_path=path, would infinitely recurse
         res = file_to_doc(file, base_path=None, verbose=verbose, fail_any_exception=fail_any_exception,
                           chunk=chunk, chunk_size=chunk_size,
                           is_url=is_url, is_txt=is_txt,
                           enable_captions=enable_captions,
                           captions_model=captions_model,
@@ -1236,7 +1206,8 @@ def path_to_doc1(file, verbose=False, fail_any_exception=False, return_file=True
         else:
             exception_doc = Document(
                 page_content='',
-                metadata={"source": file, "exception": str(e), "traceback": traceback.format_exc()})
             res = [exception_doc]
     if return_file:
         base_tmp = "temp_path_to_doc1"
@@ -1326,6 +1297,7 @@ def path_to_docs(path_or_paths, verbose=False, fail_any_exception=False, n_jobs=
     kwargs = dict(verbose=verbose, fail_any_exception=fail_any_exception,
                   return_file=return_file,
                   chunk=chunk, chunk_size=chunk_size,
                   is_url=is_url,
                   is_txt=is_txt,
                   enable_captions=enable_captions,
@@ -1802,7 +1774,8 @@ def _run_qa_db(query=None,
                num_return_sequences=1,
                langchain_mode=None,
                langchain_action=None,
-               document_choice=[DocumentChoices.All_Relevant.name],
                n_jobs=-1,
                verbose=False,
                cli=False,
@@ -1873,19 +1846,13 @@ def _run_qa_db(query=None,
     if isinstance(document_choice, str):
         # support string as well
         document_choice = [document_choice]
-    # get first DocumentChoices as command to use, ignore others
-    doc_choices_set = set([x.name for x in list(DocumentChoices)])
-    cmd = [x for x in document_choice if x in doc_choices_set]
-    cmd = None if len(cmd) == 0 else cmd[0]
-    # now have cmd, filter out for only docs
-    document_choice = [x for x in document_choice if x not in doc_choices_set]
-    func_names = list(inspect.signature(get_similarity_chain).parameters)
     sim_kwargs = {k: v for k, v in locals().items() if k in func_names}
     missing_kwargs = [x for x in func_names if x not in sim_kwargs]
     assert not missing_kwargs, "Missing: %s" % missing_kwargs
-    docs, chain, scores, use_context, have_any_docs = get_similarity_chain(**sim_kwargs)
-    if cmd in non_query_commands:
         formatted_doc_chunks = '\n\n'.join([get_url(x) + '\n\n' + x.page_content for x in docs])
         yield formatted_doc_chunks, ''
         return
@@ -1963,36 +1930,36 @@ def _run_qa_db(query=None,
     return
-def get_similarity_chain(query=None,
-                         iinput=None,
-                         use_openai_model=False, use_openai_embedding=False,
-                         first_para=False, text_limit=None, top_k_docs=4, chunk=True, chunk_size=512,
-                         user_path=None,
-                         detect_user_path_changes_every_query=False,
-                         db_type='faiss',
-                         model_name=None,
-                         inference_server='',
-                         hf_embedding_model="sentence-transformers/all-MiniLM-L6-v2",
-                         prompt_type=None,
-                         prompt_dict=None,
-                         cut_distanct=1.1,
-                         load_db_if_exists=False,
-                         db=None,
-                         langchain_mode=None,
-                         langchain_action=None,
-                         document_choice=[DocumentChoices.All_Relevant.name],
-                         n_jobs=-1,
-                         # beyond run_db_query:
-                         llm=None,
-                         tokenizer=None,
-                         verbose=False,
-                         cmd=None,
-                         reverse_docs=True,
-                         # local
-                         auto_reduce_chunks=True,
-                         max_chunks=100,
-                         ):
     # determine whether use of context out of docs is planned
     if not use_openai_model and prompt_type not in ['plain'] or model_name in non_hf_types:
         if langchain_mode in ['Disabled', 'ChatLLM', 'LLM']:
@@ -2086,12 +2053,25 @@ def get_similarity_chain(query=None,
         use_template = False
     if db and use_context:
         if not isinstance(db, Chroma):
             # only chroma supports filtering
             filter_kwargs = {}
         else:
-            # if here then some cmd + documents selected or just documents selected
-            if len(document_choice) >= 2:
                 or_filter = [{"source": {"$eq": x}} for x in document_choice]
                 filter_kwargs = dict(filter={"$or": or_filter})
             elif len(document_choice) == 1:
@@ -2101,10 +2081,10 @@ def get_similarity_chain(query=None,
             else:
                 # shouldn't reach
                 filter_kwargs = {}
-        if cmd == DocumentChoices.Just_LLM.name:
             docs = []
             scores = []
-        elif cmd == DocumentChoices.Only_All_Sources.name or query in [None, '', '\n']:
             db_documents, db_metadatas = get_docs_and_meta(db, top_k_docs, filter_kwargs=filter_kwargs)
             # similar to langchain's chroma's _results_to_docs_and_scores
             docs_with_score = [(Document(page_content=result[0], metadata=result[1] or {}), 0)
@@ -2127,13 +2107,7 @@ def get_similarity_chain(query=None,
             if top_k_docs == -1 or auto_reduce_chunks:
                 # docs_with_score = db.similarity_search_with_score(query, k=k_db, **filter_kwargs)[:top_k_docs]
                 top_k_docs_tokenize = 100
-                base_path = 'locks'
-                makedirs(base_path)
-                if hasattr(db, '_persist_directory'):
-                    name_path = "sim_%s.lock" % os.path.basename(db._persist_directory)
-                else:
-                    name_path = "sim.lock"
-                with filelock.FileLock(os.path.join(base_path, name_path)):
                     docs_with_score = db.similarity_search_with_score(query, k=k_db, **filter_kwargs)[
                                       :top_k_docs_tokenize]
                 if hasattr(llm, 'pipeline') and hasattr(llm.pipeline, 'tokenizer'):
@@ -2189,7 +2163,8 @@ def get_similarity_chain(query=None,
                     top_k_docs = 1
                 docs_with_score = docs_with_score[:top_k_docs]
             else:
-                docs_with_score = db.similarity_search_with_score(query, k=k_db, **filter_kwargs)[:top_k_docs]
             # put most relevant chunks closest to question,
             # esp. if truncation occurs will be "oldest" or "farthest from response" text that is truncated
             # BUT: for small models, e.g. 6_9 pythia, if sees some stuff related to h2oGPT first, it can connect that and not listen to rest
@@ -2210,7 +2185,7 @@ def get_similarity_chain(query=None,
         # if HF type and have no docs, can bail out
         return docs, None, [], False, have_any_docs
-    if cmd in non_query_commands:
         # no LLM use
         return docs, None, [], False, have_any_docs

 from gen import get_model, SEED
 from prompter import non_hf_types, PromptType, Prompter
 from utils import wrapped_partial, EThread, import_matplotlib, sanitize_filename, makedirs, get_url, flatten_list, \
+    get_device, ProgressParallel, remove, hash_file, clear_torch_cache, NullContext, get_hf_server, FakeTokenizer, \
+    have_libreoffice, have_arxiv, have_playwright, have_selenium, have_tesseract, have_pymupdf
 from utils_langchain import StreamingGradioCallbackHandler
 import_matplotlib()
                              top_k_docs=top_k_docs,
                              chunk=chunk,
                              chunk_size=chunk_size,
+                             document_subset=DocumentChoices.Relevant.name,
+                             document_choice=[],
                              )
         api_name = '/submit_nochat_api'  # NOTE: like submit_nochat but stable API for string dict passing
         if not stream_output:
     return sources
 image_types = ["png", "jpg", "jpeg"]
 non_image_types = ["pdf", "txt", "csv", "toml", "py", "rst", "rtf",
 def file_to_doc(file, base_path=None, verbose=False, fail_any_exception=False,
+                chunk=True, chunk_size=512, n_jobs=-1,
                 is_url=False, is_txt=False,
                 enable_captions=True,
                 captions_model=None,
 def path_to_doc1(file, verbose=False, fail_any_exception=False, return_file=True,
                  chunk=True, chunk_size=512,
+                 n_jobs=-1,
                  is_url=False, is_txt=False,
                  enable_captions=True,
                  captions_model=None,
         # don't pass base_path=path, would infinitely recurse
         res = file_to_doc(file, base_path=None, verbose=verbose, fail_any_exception=fail_any_exception,
                           chunk=chunk, chunk_size=chunk_size,
+                          n_jobs=n_jobs,
                           is_url=is_url, is_txt=is_txt,
                           enable_captions=enable_captions,
                           captions_model=captions_model,
         else:
             exception_doc = Document(
                 page_content='',
+                metadata={"source": file, "exception": '%s hit %s' % (file, str(e)),
+                          "traceback": traceback.format_exc()})
             res = [exception_doc]
     if return_file:
         base_tmp = "temp_path_to_doc1"
     kwargs = dict(verbose=verbose, fail_any_exception=fail_any_exception,
                   return_file=return_file,
                   chunk=chunk, chunk_size=chunk_size,
+                  n_jobs=n_jobs,
                   is_url=is_url,
                   is_txt=is_txt,
                   enable_captions=enable_captions,
                num_return_sequences=1,
                langchain_mode=None,
                langchain_action=None,
+               document_subset=DocumentChoices.Relevant.name,
+               document_choice=[],
                n_jobs=-1,
                verbose=False,
                cli=False,
     if isinstance(document_choice, str):
         # support string as well
         document_choice = [document_choice]
+    func_names = list(inspect.signature(get_chain).parameters)
     sim_kwargs = {k: v for k, v in locals().items() if k in func_names}
     missing_kwargs = [x for x in func_names if x not in sim_kwargs]
     assert not missing_kwargs, "Missing: %s" % missing_kwargs
+    docs, chain, scores, use_context, have_any_docs = get_chain(**sim_kwargs)
+    if document_subset in non_query_commands:
         formatted_doc_chunks = '\n\n'.join([get_url(x) + '\n\n' + x.page_content for x in docs])
         yield formatted_doc_chunks, ''
         return
     return
+def get_chain(query=None,
+              iinput=None,
+              use_openai_model=False, use_openai_embedding=False,
+              first_para=False, text_limit=None, top_k_docs=4, chunk=True, chunk_size=512,
+              user_path=None,
+              detect_user_path_changes_every_query=False,
+              db_type='faiss',
+              model_name=None,
+              inference_server='',
+              hf_embedding_model="sentence-transformers/all-MiniLM-L6-v2",
+              prompt_type=None,
+              prompt_dict=None,
+              cut_distanct=1.1,
+              load_db_if_exists=False,
+              db=None,
+              langchain_mode=None,
+              langchain_action=None,
+              document_subset=DocumentChoices.Relevant.name,
+              document_choice=[],
+              n_jobs=-1,
+              # beyond run_db_query:
+              llm=None,
+              tokenizer=None,
+              verbose=False,
+              reverse_docs=True,
+              # local
+              auto_reduce_chunks=True,
+              max_chunks=100,
+              ):
     # determine whether use of context out of docs is planned
     if not use_openai_model and prompt_type not in ['plain'] or model_name in non_hf_types:
         if langchain_mode in ['Disabled', 'ChatLLM', 'LLM']:
         use_template = False
     if db and use_context:
+        base_path = 'locks'
+        makedirs(base_path)
+        if hasattr(db, '_persist_directory'):
+            name_path = "sim_%s.lock" % os.path.basename(db._persist_directory)
+        else:
+            name_path = "sim.lock"
+        lock_file = os.path.join(base_path, name_path)
         if not isinstance(db, Chroma):
             # only chroma supports filtering
             filter_kwargs = {}
         else:
+            assert document_choice is not None, "Document choice was None"
+            if len(document_choice) >= 1 and document_choice[0] == DocumentChoices.All.name:
+                filter_kwargs = {}
+            elif len(document_choice) >= 2:
+                if document_choice[0] == DocumentChoices.All.name:
+                    # remove 'All'
+                    document_choice = document_choice[1:]
                 or_filter = [{"source": {"$eq": x}} for x in document_choice]
                 filter_kwargs = dict(filter={"$or": or_filter})
             elif len(document_choice) == 1:
             else:
                 # shouldn't reach
                 filter_kwargs = {}
+        if langchain_mode in [LangChainMode.LLM.value, LangChainMode.CHAT_LLM.value]:
             docs = []
             scores = []
+        elif document_subset == DocumentChoices.All.name or query in [None, '', '\n']:
             db_documents, db_metadatas = get_docs_and_meta(db, top_k_docs, filter_kwargs=filter_kwargs)
             # similar to langchain's chroma's _results_to_docs_and_scores
             docs_with_score = [(Document(page_content=result[0], metadata=result[1] or {}), 0)
             if top_k_docs == -1 or auto_reduce_chunks:
                 # docs_with_score = db.similarity_search_with_score(query, k=k_db, **filter_kwargs)[:top_k_docs]
                 top_k_docs_tokenize = 100
+                with filelock.FileLock(lock_file):
                     docs_with_score = db.similarity_search_with_score(query, k=k_db, **filter_kwargs)[
                                       :top_k_docs_tokenize]
                 if hasattr(llm, 'pipeline') and hasattr(llm.pipeline, 'tokenizer'):
                     top_k_docs = 1
                 docs_with_score = docs_with_score[:top_k_docs]
             else:
+                with filelock.FileLock(lock_file):
+                    docs_with_score = db.similarity_search_with_score(query, k=k_db, **filter_kwargs)[:top_k_docs]
             # put most relevant chunks closest to question,
             # esp. if truncation occurs will be "oldest" or "farthest from response" text that is truncated
             # BUT: for small models, e.g. 6_9 pythia, if sees some stuff related to h2oGPT first, it can connect that and not listen to rest
         # if HF type and have no docs, can bail out
         return docs, None, [], False, have_any_docs
+    if document_subset in non_query_commands:
         # no LLM use
         return docs, None, [], False, have_any_docs

gradio_runner.py CHANGED Viewed

@@ -20,7 +20,7 @@ import tabulate
 from iterators import TimeoutIterator
 from gradio_utils.css import get_css
-from gradio_utils.prompt_form import make_prompt_form, make_chatbots
 # This is a hack to prevent Gradio from phoning home when it gets imported
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
@@ -56,7 +56,7 @@ from gradio_themes import H2oTheme, SoftTheme, get_h2o_title, get_simple_title,
 from prompter import prompt_type_to_model_name, prompt_types_strings, inv_prompt_type_to_model_lower, non_hf_types, \
     get_prompt
 from utils import get_githash, flatten_list, zip_data, s3up, clear_torch_cache, get_torch_allocated, system_info_print, \
-    ping, get_short_name, get_url, makedirs, get_kwargs, remove, system_info, ping_gpu
 from gen import get_model, languages_covered, evaluate, score_qa, langchain_modes, inputs_kwargs_list, scratch_base_dir, \
     get_max_max_new_tokens, get_minmax_top_k_docs, history_to_context, langchain_actions
 from evaluate_params import eval_func_param_names, no_default_param_names, eval_func_param_names_defaults
@@ -118,6 +118,13 @@ def go_gradio(**kwargs):
     allow_upload = allow_upload_to_user_data or allow_upload_to_my_data
     kwargs.update(locals())
     if 'mbart-' in kwargs['model_lower']:
         instruction_label_nochat = "Text to translate"
     else:
@@ -134,8 +141,7 @@ def go_gradio(**kwargs):
                       """
     else:
         description = more_info
-    description_bottom = "If this host is busy, try [LLaMa 65B](https://llama.h2o.ai), [Falcon 40B](https://gpt.h2o.ai), [Falcon 40B](http://falcon.h2o.ai), [HF Spaces1 12B](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot) or [HF Spaces2 12B](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot2)<br>"
-    description_bottom += """<p>By using h2oGPT, you accept our [Terms of Service](https://github.com/h2oai/h2ogpt/blob/main/docs/tos.md)</p>"""
     if is_hf:
         description_bottom += '''<a href="https://huggingface.co/spaces/h2oai/h2ogpt-chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" style="white-space: nowrap" alt="Duplicate Space"></a>'''
@@ -160,7 +166,7 @@ def go_gradio(**kwargs):
         theme_kwargs = dict()
     if kwargs['gradio_size'] == 'xsmall':
         theme_kwargs.update(dict(spacing_size=spacing_xsm, text_size=text_xsm, radius_size=radius_xsm))
-    elif kwargs['gradio_size'] == 'small':
         theme_kwargs.update(dict(spacing_size=gr.themes.sizes.spacing_sm, text_size=gr.themes.sizes.text_sm,
                                  radius_size=gr.themes.sizes.spacing_sm))
     elif kwargs['gradio_size'] == 'large':
@@ -262,14 +268,14 @@ def go_gradio(**kwargs):
         model_options_state = gr.State([model_options])
         lora_options_state = gr.State([lora_options])
         server_options_state = gr.State([server_options])
-        # uuid in db is used as user ID
-        my_db_state = gr.State([None, str(uuid.uuid4())])
         chat_state = gr.State({})
-        # make user default first and default choice, dedup
-        docs_state00 = kwargs['document_choice'] + [x.name for x in list(DocumentChoices)]
         docs_state0 = []
         [docs_state0.append(x) for x in docs_state00 if x not in docs_state0]
-        docs_state = gr.State(docs_state0)  # first is chosen as default
         gr.Markdown(f"""
             {get_h2o_title(title, description) if kwargs['h2ocolors'] else get_simple_title(title, description)}
             """)
@@ -282,179 +288,208 @@ def go_gradio(**kwargs):
         res_value = "Response Score: NA" if not kwargs[
             'model_lock'] else "Response Scores: %s" % nas
-        normal_block = gr.Row(visible=not base_wanted)
         with normal_block:
-            with gr.Tabs():
-                with gr.Row():
-                    col_nochat = gr.Column(visible=not kwargs['chat'])
-                    with col_nochat:  # FIXME: for model comparison, and check rest
-                        if kwargs['langchain_mode'] == 'Disabled':
-                            text_output_nochat = gr.Textbox(lines=5, label=output_label0, show_copy_button=True)
-                        else:
-                            # text looks a bit worse, but HTML links work
-                            text_output_nochat = gr.HTML(label=output_label0)
                         instruction_nochat = gr.Textbox(
                             lines=kwargs['input_lines'],
                             label=instruction_label_nochat,
                             placeholder=kwargs['placeholder_instruction'],
                         )
                         iinput_nochat = gr.Textbox(lines=4, label="Input context for Instruction",
-                                                   placeholder=kwargs['placeholder_input'])
-                        submit_nochat = gr.Button("Submit")
-                        flag_btn_nochat = gr.Button("Flag")
-                        with gr.Column(visible=kwargs['score_model']):
-                            score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
-                    col_chat = gr.Column(visible=kwargs['chat'])
-                    with col_chat:
-                        instruction, submit, stop_btn = make_prompt_form(kwargs, LangChainMode)
-                        text_output, text_output2, text_outputs = make_chatbots(output_label0, output_label0_model2,
-                                                                                **kwargs)
-                        with gr.Row():
-                            clear = gr.Button("Save Chat / New Chat")
-                            flag_btn = gr.Button("Flag")
-                            with gr.Column(visible=kwargs['score_model']):
-                                score_text = gr.Textbox(res_value,
-                                                        show_label=False,
-                                                        visible=True)
-                                score_text2 = gr.Textbox("Response Score2: NA", show_label=False,
-                                                         visible=False and not kwargs['model_lock'])
-                            retry_btn = gr.Button("Regenerate")
-                            undo = gr.Button("Undo")
-                    submit_nochat_api = gr.Button("Submit nochat API", visible=False)
-                    inputs_dict_str = gr.Textbox(label='API input for nochat', show_label=False, visible=False)
-                    text_output_nochat_api = gr.Textbox(lines=5, label='API nochat output', visible=False,
-                                                        show_copy_button=True)
-                with gr.TabItem("Documents"):
-                    langchain_readme = get_url('https://github.com/h2oai/h2ogpt/blob/main/docs/README_LangChain.md',
-                                               from_str=True)
-                    gr.HTML(value=f"""LangChain Support Disabled<p>
-                            Run:<p>
-                            <code>
-                            python generate.py --langchain_mode=MyData
-                            </code>
-                            <p>
-                            For more options see: {langchain_readme}""",
-                            visible=kwargs['langchain_mode'] == 'Disabled', interactive=False)
-                    data_row1 = gr.Row(visible=kwargs['langchain_mode'] != 'Disabled')
-                    with data_row1:
-                        if is_hf:
-                            # don't show 'wiki' since only usually useful for internal testing at moment
-                            no_show_modes = ['Disabled', 'wiki']
-                        else:
-                            no_show_modes = ['Disabled']
-                        allowed_modes = visible_langchain_modes.copy()
-                        allowed_modes = [x for x in allowed_modes if x in dbs]
-                        allowed_modes += ['ChatLLM', 'LLM']
-                        if allow_upload_to_my_data and 'MyData' not in allowed_modes:
-                            allowed_modes += ['MyData']
-                        if allow_upload_to_user_data and 'UserData' not in allowed_modes:
-                            allowed_modes += ['UserData']
-                        langchain_mode = gr.Radio(
-                            [x for x in langchain_modes if x in allowed_modes and x not in no_show_modes],
-                            value=kwargs['langchain_mode'],
-                            label="Data Collection of Sources",
-                            visible=kwargs['langchain_mode'] != 'Disabled')
-                        allowed_actions = [x for x in langchain_actions if x in visible_langchain_actions]
-                        langchain_action = gr.Radio(
-                            allowed_actions,
-                            value=allowed_actions[0] if len(allowed_actions) > 0 else None,
-                            label="Data Action",
-                            visible=True)
-                    data_row2 = gr.Row(visible=kwargs['langchain_mode'] != 'Disabled')
-                    with data_row2:
-                        with gr.Column(scale=50):
-                            document_choice = gr.Dropdown(docs_state.value,
-                                                          label="Choose Subset of Doc(s) in Collection [click get sources to update]",
-                                                          value=docs_state.value[0],
-                                                          interactive=True,
-                                                          multiselect=True,
-                                                          )
-                        with gr.Row(visible=kwargs['langchain_mode'] != 'Disabled' and enable_sources_list):
-                            get_sources_btn = gr.Button(value="Get Sources", scale=0, size='sm')
-                            show_sources_btn = gr.Button(value="Show Sources", scale=0, size='sm')
-                            refresh_sources_btn = gr.Button(value="Refresh Sources", scale=0, size='sm')
-                    # import control
-                    if kwargs['langchain_mode'] != 'Disabled':
-                        from gpt_langchain import file_types, have_arxiv
-                    else:
-                        have_arxiv = False
-                        file_types = []
-                    upload_row = gr.Row(visible=kwargs['langchain_mode'] != 'Disabled' and allow_upload,
-                                        equal_height=False)
-                    with upload_row:
-                        with gr.Column():
-                            file_types_str = '[' + ' '.join(file_types) + ']'
-                            fileup_output = gr.File(label=f'Upload {file_types_str}',
-                                                    file_types=file_types,
-                                                    file_count="multiple",
-                                                    elem_id="warning", elem_classes="feedback")
                             with gr.Row():
-                                add_to_shared_db_btn = gr.Button("Add File(s) to UserData",
-                                                                 visible=allow_upload_to_user_data,
-                                                                 elem_id='small_btn')
-                                add_to_my_db_btn = gr.Button("Add File(s) to Scratch MyData",
-                                                             visible=allow_upload_to_my_data and
-                                                                     allow_upload_to_user_data,
-                                                             elem_id='small_btn' if allow_upload_to_user_data else None,
-                                                             size='sm' if not allow_upload_to_user_data else None)
-                        with gr.Column(
-                                visible=kwargs['langchain_mode'] != 'Disabled' and allow_upload and enable_url_upload):
-                            url_label = 'URL (http/https) or ArXiv:' if have_arxiv else 'URL (http/https)'
-                            url_text = gr.Textbox(label=url_label,
-                                                  placeholder="Click Add to Submit" if
-                                                  allow_upload_to_my_data and
-                                                  allow_upload_to_user_data else
-                                                  "Enter to Submit",
-                                                  max_lines=1,
-                                                  interactive=True)
-                            with gr.Row():
-                                url_user_btn = gr.Button(value='Add URL content to Shared UserData',
-                                                         visible=allow_upload_to_user_data and allow_upload_to_my_data,
-                                                         elem_id='small_btn')
-                                url_my_btn = gr.Button(value='Add URL content to Scratch MyData',
-                                                       visible=allow_upload_to_my_data and allow_upload_to_user_data,
-                                                       elem_id='small_btn' if allow_upload_to_user_data else None,
-                                                       size='sm' if not allow_upload_to_user_data else None)
-                        with gr.Column(
-                                visible=kwargs['langchain_mode'] != 'Disabled' and allow_upload and enable_text_upload):
-                            user_text_text = gr.Textbox(label='Paste Text [Shift-Enter more lines]',
-                                                        placeholder="Click Add to Submit" if
-                                                        allow_upload_to_my_data and
-                                                        allow_upload_to_user_data else
-                                                        "Enter to Submit, Shift-Enter for more lines",
-                                                        interactive=True)
-                            with gr.Row():
-                                user_text_user_btn = gr.Button(value='Add Text to Shared UserData',
-                                                               visible=allow_upload_to_user_data and allow_upload_to_my_data,
-                                                               elem_id='small_btn')
-                                user_text_my_btn = gr.Button(value='Add Text to Scratch MyData',
-                                                             visible=allow_upload_to_my_data and allow_upload_to_user_data,
-                                                             elem_id='small_btn' if allow_upload_to_user_data else None,
-                                                             size='sm' if not allow_upload_to_user_data else None)
-                        with gr.Column(visible=False):
-                            # WIP:
-                            with gr.Row(visible=False, equal_height=False):
-                                github_textbox = gr.Textbox(label="Github URL")
-                                with gr.Row(visible=True):
-                                    github_shared_btn = gr.Button(value="Add Github to Shared UserData",
-                                                                  visible=allow_upload_to_user_data,
-                                                                  elem_id='small_btn')
-                                    github_my_btn = gr.Button(value="Add Github to Scratch MyData",
-                                                              visible=allow_upload_to_my_data, elem_id='small_btn')
                     sources_row = gr.Row(visible=kwargs['langchain_mode'] != 'Disabled' and enable_sources_list,
                                          equal_height=False)
                     with sources_row:
                         with gr.Column(scale=1):
                             file_source = gr.File(interactive=False,
-                                                  label="Download File w/Sources [click get sources to make file]")
                         with gr.Column(scale=2):
                             sources_text = gr.HTML(label='Sources Added', interactive=False)
                 with gr.TabItem("Chat History"):
                     with gr.Row():
                         if 'mbart-' in kwargs['model_lower']:
                             src_lang = gr.Dropdown(list(languages_covered().keys()),
@@ -463,20 +498,9 @@ def go_gradio(**kwargs):
                             tgt_lang = gr.Dropdown(list(languages_covered().keys()),
                                                    value=kwargs['tgt_lang'],
                                                    label="Output Language")
-                    radio_chats = gr.Radio(value=None, label="Saved Chats", visible=True, interactive=True,
-                                           type='value')
-                    with gr.Row():
-                        clear_chat_btn = gr.Button(value="Clear Chat", visible=True, size='sm')
-                        export_chats_btn = gr.Button(value="Export Chats to Download", size='sm')
-                        remove_chat_btn = gr.Button(value="Remove Selected Chat", visible=True, size='sm')
-                        add_to_chats_btn = gr.Button("Import Chats from Upload", size='sm')
-                    with gr.Row():
-                        chats_file = gr.File(interactive=False, label="Download Exported Chats")
-                        chatsup_output = gr.File(label="Upload Chat File(s)",
-                                                 file_types=['.json'],
-                                                 file_count='multiple',
-                                                 elem_id="warning", elem_classes="feedback")
                 with gr.TabItem("Expert"):
                     with gr.Row():
                         with gr.Column():
@@ -555,7 +579,7 @@ def go_gradio(**kwargs):
                                                  info="Directly pre-appended without prompt processing",
                                                  interactive=not is_public)
                             chat = gr.components.Checkbox(label="Chat mode", value=kwargs['chat'],
-                                                          visible=not kwargs['model_lock'],
                                                           interactive=not is_public,
                                                           )
                             count_chat_tokens_btn = gr.Button(value="Count Chat Tokens",
@@ -614,9 +638,9 @@ def go_gradio(**kwargs):
                                     model_load8bit_checkbox = gr.components.Checkbox(
                                         label="Load 8-bit [requires support]",
                                         value=kwargs['load_8bit'], interactive=not is_public)
-                                    model_infer_devices_checkbox = gr.components.Checkbox(
                                         label="Choose Devices [If not Checked, use all GPUs]",
-                                        value=kwargs['infer_devices'], interactive=not is_public)
                                     model_gpu = gr.Dropdown(n_gpus_list,
                                                             label="GPU ID [-1 = all GPUs, if Choose is enabled]",
                                                             value=kwargs['gpu_id'], interactive=not is_public)
@@ -649,10 +673,10 @@ def go_gradio(**kwargs):
                                     model_load8bit_checkbox2 = gr.components.Checkbox(
                                         label="Load 8-bit 2 [requires support]",
                                         value=kwargs['load_8bit'], interactive=not is_public)
-                                    model_infer_devices_checkbox2 = gr.components.Checkbox(
                                         label="Choose Devices 2 [If not Checked, use all GPUs]",
                                         value=kwargs[
-                                            'infer_devices'], interactive=not is_public)
                                     model_gpu2 = gr.Dropdown(n_gpus_list,
                                                              label="GPU ID 2 [-1 = all GPUs, if choose is enabled]",
                                                              value=kwargs['gpu_id'], interactive=not is_public)
@@ -679,35 +703,52 @@ def go_gradio(**kwargs):
                             add_model_lora_server_button = gr.Button("Add new Model, Lora, Server url:port", scale=0,
                                                                      size='sm', interactive=not is_public)
                 with gr.TabItem("System"):
                     admin_row = gr.Row()
                     with admin_row:
-                        admin_pass_textbox = gr.Textbox(label="Admin Password", type='password', visible=is_public)
-                        admin_btn = gr.Button(value="Admin Access", visible=is_public)
                     system_row = gr.Row(visible=not is_public)
                     with system_row:
                         with gr.Column():
                             with gr.Row():
-                                system_btn = gr.Button(value='Get System Info')
                                 system_text = gr.Textbox(label='System Info', interactive=False, show_copy_button=True)
                             with gr.Row():
                                 system_input = gr.Textbox(label='System Info Dict Password', interactive=True,
                                                           visible=not is_public)
-                                system_btn2 = gr.Button(value='Get System Info Dict', visible=not is_public)
                                 system_text2 = gr.Textbox(label='System Info Dict', interactive=False,
                                                           visible=not is_public, show_copy_button=True)
                             with gr.Row():
-                                system_btn3 = gr.Button(value='Get Hash', visible=not is_public)
                                 system_text3 = gr.Textbox(label='Hash', interactive=False,
                                                           visible=not is_public, show_copy_button=True)
                             with gr.Row():
-                                zip_btn = gr.Button("Zip")
                                 zip_text = gr.Textbox(label="Zip file name", interactive=False)
                                 file_output = gr.File(interactive=False, label="Zip file to Download")
                             with gr.Row():
-                                s3up_btn = gr.Button("S3UP")
                                 s3up_text = gr.Textbox(label='S3UP result', interactive=False)
-                with gr.TabItem("Disclaimers"):
                     description = ""
                     description += """<p><b> DISCLAIMERS: </b><ul><i><li>The model was trained on The Pile and other data, which may contain objectionable content.  Use at own risk.</i></li>"""
                     if kwargs['load_8bit']:
@@ -718,17 +759,18 @@ def go_gradio(**kwargs):
                     description += """<i><li>By using h2oGPT, you accept our <a href="https://github.com/h2oai/h2ogpt/blob/main/docs/tos.md">Terms of Service</a></i></li></ul></p>"""
                     gr.Markdown(value=description, show_label=False, interactive=False)
-        gr.Markdown(f"""
-            {description_bottom}
-            {task_info_md}
-            """)
         # Get flagged data
         zip_data1 = functools.partial(zip_data, root_dirs=['flagged_data_points', kwargs['save_dir']])
-        zip_btn.click(zip_data1, inputs=None, outputs=[file_output, zip_text], queue=False,
-                      api_name='zip_data' if allow_api else None)
-        s3up_btn.click(s3up, inputs=zip_text, outputs=s3up_text, queue=False,
-                       api_name='s3up_data' if allow_api else None)
         def clear_file_list():
             return None
@@ -746,182 +788,204 @@ def go_gradio(**kwargs):
                 return tuple([gr.update(interactive=True)] * len(args))
         # Add to UserData
-        update_user_db_func = functools.partial(update_user_db,
-                                                dbs=dbs, db_type=db_type, langchain_mode='UserData',
-                                                use_openai_embedding=use_openai_embedding,
-                                                hf_embedding_model=hf_embedding_model,
-                                                enable_captions=enable_captions,
-                                                captions_model=captions_model,
-                                                enable_ocr=enable_ocr,
-                                                caption_loader=caption_loader,
-                                                verbose=kwargs['verbose'],
-                                                user_path=kwargs['user_path'],
-                                                n_jobs=kwargs['n_jobs'],
-                                                )
-        add_file_outputs = [fileup_output, langchain_mode, add_to_shared_db_btn, add_to_my_db_btn]
-        add_file_kwargs = dict(fn=update_user_db_func,
-                               inputs=[fileup_output, my_db_state, add_to_shared_db_btn,
-                                       add_to_my_db_btn,
-                                       chunk, chunk_size],
-                               outputs=add_file_outputs + [sources_text],
                                queue=queue,
-                               api_name='add_to_shared' if allow_api and allow_upload_to_user_data else None)
-        if allow_upload_to_user_data and not allow_upload_to_my_data:
-            func1 = fileup_output.change
-        else:
-            func1 = add_to_shared_db_btn.click
         # then no need for add buttons, only single changeable db
-        eventdb1a = func1(make_non_interactive, inputs=add_file_outputs, outputs=add_file_outputs,
-                          show_progress='minimal')
-        eventdb1 = eventdb1a.then(**add_file_kwargs, show_progress='minimal')
-        eventdb1.then(make_interactive, inputs=add_file_outputs, outputs=add_file_outputs, show_progress='minimal')
         # note for update_user_db_func output is ignored for db
         def clear_textbox():
             return gr.Textbox.update(value='')
-        update_user_db_url_func = functools.partial(update_user_db_func, is_url=True)
-        add_url_outputs = [url_text, langchain_mode, url_user_btn, url_my_btn]
         add_url_kwargs = dict(fn=update_user_db_url_func,
-                              inputs=[url_text, my_db_state, url_user_btn, url_my_btn,
-                                      chunk, chunk_size],
-                              outputs=add_url_outputs + [sources_text],
                               queue=queue,
-                              api_name='add_url_to_shared' if allow_api and allow_upload_to_user_data else None)
-        if allow_upload_to_user_data and not allow_upload_to_my_data:
-            func2 = url_text.submit
-        else:
-            func2 = url_user_btn.click
-        eventdb2a = func2(fn=dummy_fun, inputs=url_text, outputs=url_text, queue=queue,
-                          show_progress='minimal')
         # work around https://github.com/gradio-app/gradio/issues/4733
         eventdb2b = eventdb2a.then(make_non_interactive, inputs=add_url_outputs, outputs=add_url_outputs,
                                    show_progress='minimal')
-        eventdb2 = eventdb2b.then(**add_url_kwargs, show_progress='minimal')
-        eventdb2.then(make_interactive, inputs=add_url_outputs, outputs=add_url_outputs, show_progress='minimal')
-        update_user_db_txt_func = functools.partial(update_user_db_func, is_txt=True)
-        add_text_outputs = [user_text_text, langchain_mode, user_text_user_btn, user_text_my_btn]
         add_text_kwargs = dict(fn=update_user_db_txt_func,
-                               inputs=[user_text_text, my_db_state, user_text_user_btn, user_text_my_btn,
-                                       chunk, chunk_size],
-                               outputs=add_text_outputs + [sources_text],
                                queue=queue,
-                               api_name='add_text_to_shared' if allow_api and allow_upload_to_user_data else None
                                )
-        if allow_upload_to_user_data and not allow_upload_to_my_data:
-            func3 = user_text_text.submit
-        else:
-            func3 = user_text_user_btn.click
-        eventdb3a = func3(fn=dummy_fun, inputs=user_text_text, outputs=user_text_text, queue=queue,
-                          show_progress='minimal')
         eventdb3b = eventdb3a.then(make_non_interactive, inputs=add_text_outputs, outputs=add_text_outputs,
                                    show_progress='minimal')
-        eventdb3 = eventdb3b.then(**add_text_kwargs, show_progress='minimal')
-        eventdb3.then(make_interactive, inputs=add_text_outputs, outputs=add_text_outputs,
-                      show_progress='minimal')
-        update_my_db_func = functools.partial(update_user_db, dbs=dbs, db_type=db_type, langchain_mode='MyData',
-                                              use_openai_embedding=use_openai_embedding,
-                                              hf_embedding_model=hf_embedding_model,
-                                              enable_captions=enable_captions,
-                                              captions_model=captions_model,
-                                              enable_ocr=enable_ocr,
-                                              caption_loader=caption_loader,
-                                              verbose=kwargs['verbose'],
-                                              user_path=kwargs['user_path'],
-                                              n_jobs=kwargs['n_jobs'],
-                                              )
-        add_my_file_outputs = [fileup_output, langchain_mode, my_db_state, add_to_shared_db_btn, add_to_my_db_btn]
-        add_my_file_kwargs = dict(fn=update_my_db_func,
-                                  inputs=[fileup_output, my_db_state, add_to_shared_db_btn, add_to_my_db_btn,
-                                          chunk, chunk_size],
-                                  outputs=add_my_file_outputs + [sources_text],
-                                  queue=queue,
-                                  api_name='add_to_my' if allow_api and allow_upload_to_my_data else None)
-        if not allow_upload_to_user_data and allow_upload_to_my_data:
-            func4 = fileup_output.change
-        else:
-            func4 = add_to_my_db_btn.click
-        eventdb4a = func4(make_non_interactive, inputs=add_my_file_outputs,
-                          outputs=add_my_file_outputs,
-                          show_progress='minimal')
-        eventdb4 = eventdb4a.then(**add_my_file_kwargs, show_progress='minimal')
-        eventdb4.then(make_interactive, inputs=add_my_file_outputs, outputs=add_my_file_outputs,
-                      show_progress='minimal')
-        update_my_db_url_func = functools.partial(update_my_db_func, is_url=True)
-        add_my_url_outputs = [url_text, langchain_mode, my_db_state, url_user_btn, url_my_btn]
-        add_my_url_kwargs = dict(fn=update_my_db_url_func,
-                                 inputs=[url_text, my_db_state, url_user_btn, url_my_btn,
-                                         chunk, chunk_size],
-                                 outputs=add_my_url_outputs + [sources_text],
-                                 queue=queue,
-                                 api_name='add_url_to_my' if allow_api and allow_upload_to_my_data else None)
-        if not allow_upload_to_user_data and allow_upload_to_my_data:
-            func5 = url_text.submit
-        else:
-            func5 = url_my_btn.click
-        eventdb5a = func5(fn=dummy_fun, inputs=url_text, outputs=url_text, queue=queue,
-                          show_progress='minimal')
-        eventdb5b = eventdb5a.then(make_non_interactive, inputs=add_my_url_outputs, outputs=add_my_url_outputs,
-                                   show_progress='minimal')
-        eventdb5 = eventdb5b.then(**add_my_url_kwargs, show_progress='minimal')
-        eventdb5.then(make_interactive, inputs=add_my_url_outputs, outputs=add_my_url_outputs,
-                      show_progress='minimal')
-        update_my_db_txt_func = functools.partial(update_my_db_func, is_txt=True)
-        add_my_text_outputs = [user_text_text, langchain_mode, my_db_state, user_text_user_btn,
-                               user_text_my_btn]
-        add_my_text_kwargs = dict(fn=update_my_db_txt_func,
-                                  inputs=[user_text_text, my_db_state, user_text_user_btn, user_text_my_btn,
-                                          chunk, chunk_size],
-                                  outputs=add_my_text_outputs + [sources_text],
-                                  queue=queue,
-                                  api_name='add_txt_to_my' if allow_api and allow_upload_to_my_data else None)
-        if not allow_upload_to_user_data and allow_upload_to_my_data:
-            func6 = user_text_text.submit
-        else:
-            func6 = user_text_my_btn.click
-        eventdb6a = func6(fn=dummy_fun, inputs=user_text_text, outputs=user_text_text, queue=queue,
-                          show_progress='minimal')
-        eventdb6b = eventdb6a.then(make_non_interactive, inputs=add_my_text_outputs, outputs=add_my_text_outputs,
-                                   show_progress='minimal')
-        eventdb6 = eventdb6b.then(**add_my_text_kwargs, show_progress='minimal')
-        eventdb6.then(make_interactive, inputs=add_my_text_outputs, outputs=add_my_text_outputs,
-                      show_progress='minimal')
         get_sources1 = functools.partial(get_sources, dbs=dbs, docs_state0=docs_state0)
         # if change collection source, must clear doc selections from it to avoid inconsistency
         def clear_doc_choice():
-            return gr.Dropdown.update(choices=docs_state0, value=[docs_state0[0]])
-        langchain_mode.change(clear_doc_choice, inputs=None, outputs=document_choice)
         def update_dropdown(x):
             return gr.Dropdown.update(choices=x, value=[docs_state0[0]])
-        eventdb7 = get_sources_btn.click(get_sources1, inputs=[my_db_state, langchain_mode],
-                                         outputs=[file_source, docs_state],
-                                         queue=queue,
-                                         api_name='get_sources' if allow_api else None) \
             .then(fn=update_dropdown, inputs=docs_state, outputs=document_choice)
         # show button, else only show when add.  Could add to above get_sources for download/dropdown, but bit much maybe
         show_sources1 = functools.partial(get_source_files_given_langchain_mode, dbs=dbs)
         eventdb8 = show_sources_btn.click(fn=show_sources1, inputs=[my_db_state, langchain_mode], outputs=sources_text,
                                           api_name='show_sources' if allow_api else None)
         # Get inputs to evaluate() and make_db()
         # don't deepcopy, can contain model itself
         all_kwargs = kwargs.copy()
@@ -1008,9 +1072,6 @@ def go_gradio(**kwargs):
                                     **kwargs_evaluate
                                     )
-        dark_mode_btn = gr.Button("Dark Mode", variant="primary", size="sm")
-        # FIXME: Could add exceptions for non-chat but still streaming
-        exception_text = gr.Textbox(value="", visible=kwargs['chat'], label='Chat Exceptions', interactive=False)
         dark_mode_btn.click(
             None,
             None,
@@ -1020,20 +1081,19 @@ def go_gradio(**kwargs):
             queue=False,
         )
-        # Control chat and non-chat blocks, which can be independently used by chat checkbox swap
-        def col_nochat_fun(x):
-            return gr.Column.update(visible=not x)
-        def col_chat_fun(x):
-            return gr.Column.update(visible=bool(x))
-        def context_fun(x):
-            return gr.Textbox.update(visible=not x)
-        chat.select(col_nochat_fun, chat, col_nochat, api_name="chat_checkbox" if allow_api else None) \
-            .then(col_chat_fun, chat, col_chat) \
-            .then(context_fun, chat, context) \
-            .then(col_chat_fun, chat, exception_text)
         # examples after submit or any other buttons for chat or no chat
         if kwargs['examples'] is not None and kwargs['show_examples']:
@@ -1154,6 +1214,7 @@ def go_gradio(**kwargs):
             prompt_type1 = args_list[eval_func_param_names.index('prompt_type')]
             langchain_mode1 = args_list[eval_func_param_names.index('langchain_mode')]
             langchain_action1 = args_list[eval_func_param_names.index('langchain_action')]
             document_choice1 = args_list[eval_func_param_names.index('document_choice')]
             if not prompt_type1:
                 # shouldn't have to specify if CLI launched model
@@ -1186,7 +1247,7 @@ def go_gradio(**kwargs):
                 return history
             if user_message1 in ['', None, '\n']:
                 if langchain_action1 in LangChainAction.QUERY.value and \
-                        DocumentChoices.Only_All_Sources.name not in document_choice1 \
                         or \
                         langchain_mode1 in [LangChainMode.CHAT_LLM.value, LangChainMode.LLM.value]:
                     # reject non-retry submit/enter
@@ -1249,6 +1310,7 @@ def go_gradio(**kwargs):
             args_list = args_list[:-3]  # only keep rest needed for evaluate()
             langchain_mode1 = args_list[eval_func_param_names.index('langchain_mode')]
             langchain_action1 = args_list[eval_func_param_names.index('langchain_action')]
             document_choice1 = args_list[eval_func_param_names.index('document_choice')]
             if not history:
                 print("No history", flush=True)
@@ -1261,7 +1323,7 @@ def go_gradio(**kwargs):
                 history[-1][1] = None
             elif not instruction1:
                 if langchain_action1 in LangChainAction.QUERY.value and \
-                        DocumentChoices.Only_All_Sources.name not in document_choice1 \
                         or \
                         langchain_mode1 in [LangChainMode.CHAT_LLM.value, LangChainMode.LLM.value]:
                     # if not retrying, then reject empty query
@@ -1432,11 +1494,11 @@ def go_gradio(**kwargs):
                          )
         bot_args = dict(fn=bot,
                         inputs=inputs_list + [model_state, my_db_state] + [text_output],
-                        outputs=[text_output, exception_text],
                         )
         retry_bot_args = dict(fn=functools.partial(bot, retry=True),
                               inputs=inputs_list + [model_state, my_db_state] + [text_output],
-                              outputs=[text_output, exception_text],
                               )
         retry_user_args = dict(fn=functools.partial(user, retry=True),
                                inputs=inputs_list + [text_output],
@@ -1454,11 +1516,11 @@ def go_gradio(**kwargs):
                           )
         bot_args2 = dict(fn=bot,
                          inputs=inputs_list2 + [model_state2, my_db_state] + [text_output2],
-                         outputs=[text_output2, exception_text],
                          )
         retry_bot_args2 = dict(fn=functools.partial(bot, retry=True),
                                inputs=inputs_list2 + [model_state2, my_db_state] + [text_output2],
-                               outputs=[text_output2, exception_text],
                                )
         retry_user_args2 = dict(fn=functools.partial(user, retry=True),
                                 inputs=inputs_list2 + [text_output2],
@@ -1479,11 +1541,11 @@ def go_gradio(**kwargs):
                              )
         all_bot_args = dict(fn=functools.partial(all_bot, model_states1=model_states),
                             inputs=inputs_list + [my_db_state] + text_outputs,
-                            outputs=text_outputs + [exception_text],
                             )
         all_retry_bot_args = dict(fn=functools.partial(all_bot, model_states1=model_states, retry=True),
                                   inputs=inputs_list + [my_db_state] + text_outputs,
-                                  outputs=text_outputs + [exception_text],
                                   )
         all_retry_user_args = dict(fn=functools.partial(all_user, retry=True,
                                                         sanitize_user_prompt=kwargs['sanitize_user_prompt'],
@@ -1681,13 +1743,26 @@ def go_gradio(**kwargs):
                         return False
             return is_same
-        def save_chat(*args):
             args_list = list(args)
-            chat_list = args_list[:-1]  # list of chatbot histories
             # remove None histories
             chat_list_not_none = [x for x in chat_list if x and len(x) > 0 and len(x[0]) == 2 and x[0][1] is not None]
-            chat_state1 = args_list[
-                -1]  # dict with keys of short chat names, values of list of list of chatbot histories
             short_chats = list(chat_state1.keys())
             if len(chat_list_not_none) > 0:
                 # make short_chat key from only first history, based upon question that is same anyways
@@ -1699,13 +1774,14 @@ def go_gradio(**kwargs):
                     if not already_exists:
                         chat_state1[short_chat] = chat_list.copy()
                 # clear chat_list so saved and then new conversation starts
-                chat_list = [[]] * len(chat_list)
-            ret_list = chat_list + [chat_state1]
             return tuple(ret_list)
-        def update_radio_chats(chat_state1):
-            return gr.update(choices=list(chat_state1.keys()), value=None)
         def switch_chat(chat_key, chat_state1, num_model_lock=0):
             chosen_chat = chat_state1[chat_key]
             # deal with possible different size of chat list vs. current list
@@ -1729,11 +1805,13 @@ def go_gradio(**kwargs):
             .then(clear_scores, outputs=[score_text, score_text2, score_text_nochat])
         def remove_chat(chat_key, chat_state1):
-            chat_state1.pop(chat_key, None)
-            return chat_state1
-        remove_chat_btn.click(remove_chat, inputs=[radio_chats, chat_state], outputs=chat_state) \
-            .then(update_radio_chats, inputs=chat_state, outputs=radio_chats)
         def get_chats1(chat_state1):
             base = 'chats'
@@ -1743,18 +1821,19 @@ def go_gradio(**kwargs):
                 f.write(json.dumps(chat_state1, indent=2))
             return filename
-        export_chats_btn.click(get_chats1, inputs=chat_state, outputs=chats_file, queue=False,
-                               api_name='export_chats' if allow_api else None)
-        def add_chats_from_file(file, chat_state1, add_btn):
             if not file:
-                return chat_state1, add_btn
             if isinstance(file, str):
                 files = [file]
             else:
                 files = file
             if not files:
-                return chat_state1, add_btn
             for file1 in files:
                 try:
                     if hasattr(file1, 'name'):
@@ -1763,33 +1842,42 @@ def go_gradio(**kwargs):
                         new_chats = json.loads(f.read())
                         for chat1_k, chat1_v in new_chats.items():
                             # ignore chat1_k, regenerate and de-dup to avoid loss
-                            _, chat_state1 = save_chat(chat1_v, chat_state1)
                 except BaseException as e:
                     t, v, tb = sys.exc_info()
                     ex = ''.join(traceback.format_exception(t, v, tb))
-                    print("Add chats exception: %s" % str(ex), flush=True)
-            return chat_state1, add_btn
         # note for update_user_db_func output is ignored for db
-        add_to_chats_btn.click(add_chats_from_file,
-                               inputs=[chatsup_output, chat_state, add_to_chats_btn],
-                               outputs=[chat_state, add_to_my_db_btn], queue=False,
-                               api_name='add_to_chats' if allow_api else None) \
-            .then(clear_file_list, outputs=chatsup_output, queue=False) \
-            .then(update_radio_chats, inputs=chat_state, outputs=radio_chats, queue=False)
-        clear_chat_btn.click(fn=clear_texts,
-                             inputs=[text_output, text_output2] + text_outputs,
-                             outputs=[text_output, text_output2] + text_outputs,
-                             queue=False, api_name='clear' if allow_api else None) \
             .then(deselect_radio_chats, inputs=None, outputs=radio_chats, queue=False) \
             .then(clear_scores, outputs=[score_text, score_text2, score_text_nochat])
-        # does both models
-        clear.click(save_chat,
-                    inputs=[text_output, text_output2] + text_outputs + [chat_state],
-                    outputs=[text_output, text_output2] + text_outputs + [chat_state],
-                    api_name='save_chat' if allow_api else None) \
             .then(update_radio_chats, inputs=chat_state, outputs=radio_chats,
                   api_name='update_chats' if allow_api else None) \
             .then(clear_scores, outputs=[score_text, score_text2, score_text_nochat])
@@ -1823,7 +1911,7 @@ def go_gradio(**kwargs):
             .then(clear_torch_cache)
         def load_model(model_name, lora_weights, server_name, model_state_old, prompt_type_old, load_8bit,
-                       infer_devices, gpu_id):
             # ensure no API calls reach here
             if is_public:
                 raise RuntimeError("Illegal access for %s" % model_name)
@@ -1867,7 +1955,7 @@ def go_gradio(**kwargs):
             all_kwargs1 = all_kwargs.copy()
             all_kwargs1['base_model'] = model_name.strip()
             all_kwargs1['load_8bit'] = load_8bit
-            all_kwargs1['infer_devices'] = infer_devices
             all_kwargs1['gpu_id'] = int(gpu_id)  # detranscribe
             model_lower = model_name.strip().lower()
             if model_lower in inv_prompt_type_to_model_lower:
@@ -1920,8 +2008,9 @@ def go_gradio(**kwargs):
         get_prompt_str_func1 = functools.partial(get_prompt_str, which=1)
         get_prompt_str_func2 = functools.partial(get_prompt_str, which=2)
-        prompt_type.change(fn=get_prompt_str_func1, inputs=[prompt_type, prompt_dict], outputs=prompt_dict)
-        prompt_type2.change(fn=get_prompt_str_func2, inputs=[prompt_type2, prompt_dict2], outputs=prompt_dict2)
         def dropdown_prompt_type_list(x):
             return gr.Dropdown.update(value=x)
@@ -1931,7 +2020,7 @@ def go_gradio(**kwargs):
         load_model_args = dict(fn=load_model,
                                inputs=[model_choice, lora_choice, server_choice, model_state, prompt_type,
-                                       model_load8bit_checkbox, model_infer_devices_checkbox, model_gpu],
                                outputs=[model_state, model_used, lora_used, server_used,
                                         # if prompt_type changes, prompt_dict will change via change rule
                                         prompt_type, max_new_tokens, min_new_tokens,
@@ -1939,28 +2028,27 @@ def go_gradio(**kwargs):
         prompt_update_args = dict(fn=dropdown_prompt_type_list, inputs=prompt_type, outputs=prompt_type)
         chatbot_update_args = dict(fn=chatbot_list, inputs=[text_output, model_used], outputs=text_output)
         nochat_update_args = dict(fn=chatbot_list, inputs=[text_output_nochat, model_used], outputs=text_output_nochat)
-        if not is_public:
-            load_model_event = load_model_button.click(**load_model_args, api_name='load_model' if allow_api else None) \
-                .then(**prompt_update_args) \
-                .then(**chatbot_update_args) \
-                .then(**nochat_update_args) \
-                .then(clear_torch_cache)
         load_model_args2 = dict(fn=load_model,
                                 inputs=[model_choice2, lora_choice2, server_choice2, model_state2, prompt_type2,
-                                        model_load8bit_checkbox2, model_infer_devices_checkbox2, model_gpu2],
                                 outputs=[model_state2, model_used2, lora_used2, server_used2,
                                          # if prompt_type2 changes, prompt_dict2 will change via change rule
                                          prompt_type2, max_new_tokens2, min_new_tokens2
                                          ])
         prompt_update_args2 = dict(fn=dropdown_prompt_type_list, inputs=prompt_type2, outputs=prompt_type2)
         chatbot_update_args2 = dict(fn=chatbot_list, inputs=[text_output2, model_used2], outputs=text_output2)
-        if not is_public:
-            load_model_event2 = load_model_button2.click(**load_model_args2,
-                                                         api_name='load_model2' if allow_api else None) \
-                .then(**prompt_update_args2) \
-                .then(**chatbot_update_args2) \
-                .then(clear_torch_cache)
         def dropdown_model_lora_server_list(model_list0, model_x,
                                             lora_list0, lora_x,
@@ -2009,7 +2097,8 @@ def go_gradio(**kwargs):
                                                         server_options_state],
                                                queue=False)
-        go_btn.click(lambda: gr.update(visible=False), None, go_btn, api_name="go" if allow_api else None, queue=False) \
             .then(lambda: gr.update(visible=True), None, normal_block, queue=False) \
             .then(**load_model_args, queue=False).then(**prompt_update_args, queue=False)
@@ -2077,23 +2166,11 @@ def go_gradio(**kwargs):
         def get_hash():
             return kwargs['git_hash']
-        system_btn3.click(get_hash,
-                          outputs=system_text3,
-                          api_name='system_hash' if allow_api else None,
-                          queue=False,
-                          )
-        # don't pass text_output, don't want to clear output, just stop it
-        # cancel only stops outer generation, not inner generation or non-generation
-        stop_btn.click(lambda: None, None, None,
-                       cancels=submits1 + submits2 + submits3 +
-                               submits4 +
-                               [submit_event_nochat, submit_event_nochat2] +
-                               [eventdb1, eventdb2, eventdb3,
-                                eventdb4, eventdb5, eventdb6] +
-                               [eventdb7, eventdb8, eventdb9]
-                       ,
-                       queue=False, api_name='stop' if allow_api else None).then(clear_torch_cache, queue=False)
         def count_chat_tokens(model_state1, chat1, prompt_type1, prompt_dict1,
                               memory_restriction_level1=0,
@@ -2121,9 +2198,25 @@ def go_gradio(**kwargs):
         count_chat_tokens_func = functools.partial(count_chat_tokens,
                                                    memory_restriction_level1=memory_restriction_level,
                                                    keep_sources_in_context1=kwargs['keep_sources_in_context'])
-        count_chat_tokens_btn.click(fn=count_chat_tokens,
-                                    inputs=[model_state, text_output, prompt_type, prompt_dict],
-                                    outputs=chat_token_count, api_name='count_tokens' if allow_api else None)
         demo.load(None, None, None, _js=get_dark_js() if kwargs['h2ocolors'] and False else None)  # light best
@@ -2196,6 +2289,8 @@ def get_inputs_list(inputs_dict, model_lower, model_id=1):
 def get_sources(db1, langchain_mode, dbs=None, docs_state0=None):
     if langchain_mode in ['ChatLLM', 'LLM']:
         source_files_added = "NA"
         source_list = []
@@ -2226,9 +2321,24 @@ def get_sources(db1, langchain_mode, dbs=None, docs_state0=None):
     return sources_file, source_list
-def update_user_db(file, db1, x, y, *args, dbs=None, langchain_mode='UserData', **kwargs):
     try:
-        return _update_user_db(file, db1, x, y, *args, dbs=dbs, langchain_mode=langchain_mode, **kwargs)
     except BaseException as e:
         print(traceback.format_exc(), flush=True)
         # gradio has issues if except, so fail semi-gracefully, else would hang forever in processing textbox
@@ -2245,15 +2355,14 @@ def update_user_db(file, db1, x, y, *args, dbs=None, langchain_mode='UserData',
           </body>
         </html>
         """.format(ex_str)
-        if langchain_mode == 'MyData':
-            return None, langchain_mode, db1, x, y, source_files_added
-        else:
-            return None, langchain_mode, x, y, source_files_added
     finally:
         clear_torch_cache()
 def get_lock_file(db1, langchain_mode):
     assert len(db1) == 2 and db1[1] is not None and isinstance(db1[1], str)
     user_id = db1[1]
     base_path = 'locks'
@@ -2262,7 +2371,10 @@ def get_lock_file(db1, langchain_mode):
     return lock_file
-def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None, langchain_mode='UserData',
                     user_path=None,
                     use_openai_embedding=None,
                     hf_embedding_model=None,
@@ -2273,6 +2385,9 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
                     verbose=None,
                     is_url=None, is_txt=None,
                     n_jobs=-1):
     assert use_openai_embedding is not None
     assert hf_embedding_model is not None
     assert caption_loader is not None
@@ -2281,6 +2396,8 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
     assert enable_ocr is not None
     assert verbose is not None
     if dbs is None:
         dbs = {}
     assert isinstance(dbs, dict), "Wrong type for dbs: %s" % str(type(dbs))
@@ -2295,6 +2412,14 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
     if not isinstance(file, (list, tuple, typing.Generator)) and isinstance(file, str):
         file = [file]
     if langchain_mode == 'UserData' and user_path is not None:
         # move temp files from gradio upload to stable location
         for fili, fil in enumerate(file):
@@ -2323,6 +2448,7 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
                            caption_loader=caption_loader,
                            )
     exceptions = [x for x in sources if x.metadata.get('exception')]
     sources = [x for x in sources if 'exception' not in x.metadata]
     lock_file = get_lock_file(db1, langchain_mode)
@@ -2349,7 +2475,7 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
             if db is not None:
                 db1[0] = db
             source_files_added = get_source_files(db=db1[0], exceptions=exceptions)
-            return None, langchain_mode, db1, x, y, source_files_added
         else:
             from gpt_langchain import get_persist_directory
             persist_directory = get_persist_directory(langchain_mode)
@@ -2367,10 +2493,10 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
                             hf_embedding_model=hf_embedding_model)
             dbs[langchain_mode] = db
             # NOTE we do not return db, because function call always same code path
-            # return dbs[langchain_mode], x, y
             # db in this code path is updated in place
             source_files_added = get_source_files(db=dbs[langchain_mode], exceptions=exceptions)
-            return None, langchain_mode, x, y, source_files_added
 def get_db(db1, langchain_mode, dbs=None):

 from iterators import TimeoutIterator
 from gradio_utils.css import get_css
+from gradio_utils.prompt_form import make_chatbots
 # This is a hack to prevent Gradio from phoning home when it gets imported
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
 from prompter import prompt_type_to_model_name, prompt_types_strings, inv_prompt_type_to_model_lower, non_hf_types, \
     get_prompt
 from utils import get_githash, flatten_list, zip_data, s3up, clear_torch_cache, get_torch_allocated, system_info_print, \
+    ping, get_short_name, makedirs, get_kwargs, remove, system_info, ping_gpu, get_url, get_local_ip
 from gen import get_model, languages_covered, evaluate, score_qa, langchain_modes, inputs_kwargs_list, scratch_base_dir, \
     get_max_max_new_tokens, get_minmax_top_k_docs, history_to_context, langchain_actions
 from evaluate_params import eval_func_param_names, no_default_param_names, eval_func_param_names_defaults
     allow_upload = allow_upload_to_user_data or allow_upload_to_my_data
     kwargs.update(locals())
+    # import control
+    if kwargs['langchain_mode'] != 'Disabled':
+        from gpt_langchain import file_types, have_arxiv
+    else:
+        have_arxiv = False
+        file_types = []
     if 'mbart-' in kwargs['model_lower']:
         instruction_label_nochat = "Text to translate"
     else:
                       """
     else:
         description = more_info
+    description_bottom = "If this host is busy, try [Multi-Model](https://gpt.h2o.ai), [Falcon 40B](http://falcon.h2o.ai), [HF Spaces1](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot) or [HF Spaces2](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot2)<br>"
     if is_hf:
         description_bottom += '''<a href="https://huggingface.co/spaces/h2oai/h2ogpt-chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" style="white-space: nowrap" alt="Duplicate Space"></a>'''
         theme_kwargs = dict()
     if kwargs['gradio_size'] == 'xsmall':
         theme_kwargs.update(dict(spacing_size=spacing_xsm, text_size=text_xsm, radius_size=radius_xsm))
+    elif kwargs['gradio_size'] in [None, 'small']:
         theme_kwargs.update(dict(spacing_size=gr.themes.sizes.spacing_sm, text_size=gr.themes.sizes.text_sm,
                                  radius_size=gr.themes.sizes.spacing_sm))
     elif kwargs['gradio_size'] == 'large':
         model_options_state = gr.State([model_options])
         lora_options_state = gr.State([lora_options])
         server_options_state = gr.State([server_options])
+        my_db_state = gr.State([None, None])
         chat_state = gr.State({})
+        docs_state00 = kwargs['document_choice'] + [DocumentChoices.All.name]
         docs_state0 = []
         [docs_state0.append(x) for x in docs_state00 if x not in docs_state0]
+        docs_state = gr.State(docs_state0)
+        viewable_docs_state0 = []
+        viewable_docs_state = gr.State(viewable_docs_state0)
         gr.Markdown(f"""
             {get_h2o_title(title, description) if kwargs['h2ocolors'] else get_simple_title(title, description)}
             """)
         res_value = "Response Score: NA" if not kwargs[
             'model_lock'] else "Response Scores: %s" % nas
+        if kwargs['langchain_mode'] != LangChainMode.DISABLED.value:
+            extra_prompt_form = ".  For summarization, empty submission uses first top_k_docs documents."
+        else:
+            extra_prompt_form = ""
+        if kwargs['input_lines'] > 1:
+            instruction_label = "Shift-Enter to Submit, Enter for more lines%s" % extra_prompt_form
+        else:
+            instruction_label = "Enter to Submit, Shift-Enter for more lines%s" % extra_prompt_form
+        normal_block = gr.Row(visible=not base_wanted, equal_height=False)
         with normal_block:
+            side_bar = gr.Column(elem_id="col_container", scale=1, min_width=100)
+            with side_bar:
+                with gr.Accordion("Chats", open=False, visible=True):
+                    radio_chats = gr.Radio(value=None, label="Saved Chats", show_label=False,
+                                           visible=True, interactive=True,
+                                           type='value')
+                upload_visible = kwargs['langchain_mode'] != 'Disabled' and allow_upload
+                with gr.Accordion("Upload", open=False, visible=upload_visible):
+                    with gr.Column():
+                        with gr.Row(equal_height=False):
+                            file_types_str = '[' + ' '.join(file_types) + ' URL ArXiv TEXT' + ']'
+                            fileup_output = gr.File(label=f'Upload {file_types_str}',
+                                                    show_label=False,
+                                                    file_types=file_types,
+                                                    file_count="multiple",
+                                                    scale=1,
+                                                    min_width=0,
+                                                    elem_id="warning", elem_classes="feedback")
+                    url_visible = kwargs['langchain_mode'] != 'Disabled' and allow_upload and enable_url_upload
+                    url_label = 'URL/ArXiv' if have_arxiv else 'URL'
+                    url_text = gr.Textbox(label=url_label,
+                                          # placeholder="Enter Submits",
+                                          max_lines=1,
+                                          interactive=True)
+                    text_visible = kwargs['langchain_mode'] != 'Disabled' and allow_upload and enable_text_upload
+                    user_text_text = gr.Textbox(label='Paste Text',
+                                                # placeholder="Enter Submits",
+                                                interactive=True,
+                                                visible=text_visible)
+                    github_textbox = gr.Textbox(label="Github URL", visible=False)  # FIXME WIP
+                database_visible = kwargs['langchain_mode'] != 'Disabled'
+                with gr.Accordion("Database", open=False, visible=database_visible):
+                    if is_hf:
+                        # don't show 'wiki' since only usually useful for internal testing at moment
+                        no_show_modes = ['Disabled', 'wiki']
+                    else:
+                        no_show_modes = ['Disabled']
+                    allowed_modes = visible_langchain_modes.copy()
+                    allowed_modes = [x for x in allowed_modes if x in dbs]
+                    allowed_modes += ['ChatLLM', 'LLM']
+                    if allow_upload_to_my_data and 'MyData' not in allowed_modes:
+                        allowed_modes += ['MyData']
+                    if allow_upload_to_user_data and 'UserData' not in allowed_modes:
+                        allowed_modes += ['UserData']
+                    langchain_mode = gr.Radio(
+                        [x for x in langchain_modes if x in allowed_modes and x not in no_show_modes],
+                        value=kwargs['langchain_mode'],
+                        label="Collections",
+                        show_label=True,
+                        visible=kwargs['langchain_mode'] != 'Disabled',
+                        min_width=100)
+                    document_subset = gr.Radio([x.name for x in DocumentChoices],
+                                               label="Subset",
+                                               value=DocumentChoices.Relevant.name,
+                                               interactive=True,
+                                               )
+                    allowed_actions = [x for x in langchain_actions if x in visible_langchain_actions]
+                    langchain_action = gr.Radio(
+                        allowed_actions,
+                        value=allowed_actions[0] if len(allowed_actions) > 0 else None,
+                        label="Action",
+                        visible=True)
+            col_tabs = gr.Column(elem_id="col_container", scale=10)
+            with (col_tabs, gr.Tabs()):
+                with gr.TabItem("Chat"):
+                    if kwargs['langchain_mode'] == 'Disabled':
+                        text_output_nochat = gr.Textbox(lines=5, label=output_label0, show_copy_button=True,
+                                                        visible=not kwargs['chat'])
+                    else:
+                        # text looks a bit worse, but HTML links work
+                        text_output_nochat = gr.HTML(label=output_label0, visible=not kwargs['chat'])
+                    with gr.Row():
+                        # NOCHAT
                         instruction_nochat = gr.Textbox(
                             lines=kwargs['input_lines'],
                             label=instruction_label_nochat,
                             placeholder=kwargs['placeholder_instruction'],
+                            visible=not kwargs['chat'],
                         )
                         iinput_nochat = gr.Textbox(lines=4, label="Input context for Instruction",
+                                                   placeholder=kwargs['placeholder_input'],
+                                                   visible=not kwargs['chat'])
+                        submit_nochat = gr.Button("Submit", size='sm', visible=not kwargs['chat'])
+                        flag_btn_nochat = gr.Button("Flag", size='sm', visible=not kwargs['chat'])
+                        score_text_nochat = gr.Textbox("Response Score: NA", show_label=False,
+                                                       visible=not kwargs['chat'])
+                        submit_nochat_api = gr.Button("Submit nochat API", visible=False)
+                        inputs_dict_str = gr.Textbox(label='API input for nochat', show_label=False, visible=False)
+                        text_output_nochat_api = gr.Textbox(lines=5, label='API nochat output', visible=False,
+                                                            show_copy_button=True)
+                        # CHAT
+                        col_chat = gr.Column(visible=kwargs['chat'])
+                        with col_chat:
+                            with gr.Row():  # elem_id='prompt-form-area'):
+                                with gr.Column(scale=50):
+                                    instruction = gr.Textbox(
+                                        lines=kwargs['input_lines'],
+                                        label='Ask anything',
+                                        placeholder=instruction_label,
+                                        info=None,
+                                        elem_id='prompt-form',
+                                        container=True,
+                                    )
+                                submit_buttons = gr.Row(equal_height=False)
+                                with submit_buttons:
+                                    mw1 = 50
+                                    mw2 = 50
+                                    with gr.Column(min_width=mw1):
+                                        submit = gr.Button(value='Submit', variant='primary', scale=0, size='sm',
+                                                           min_width=mw1)
+                                        stop_btn = gr.Button(value="Stop", variant='secondary', scale=0, size='sm',
+                                                             min_width=mw1)
+                                        save_chat_btn = gr.Button("Save", size='sm', min_width=mw1)
+                                    with gr.Column(min_width=mw2):
+                                        retry_btn = gr.Button("Redo", size='sm', min_width=mw2)
+                                        undo = gr.Button("Undo", size='sm', min_width=mw2)
+                                        clear_chat_btn = gr.Button(value="Clear", size='sm', min_width=mw2)
+                            text_output, text_output2, text_outputs = make_chatbots(output_label0, output_label0_model2,
+                                                                                    **kwargs)
                             with gr.Row():
+                                with gr.Column(visible=kwargs['score_model']):
+                                    score_text = gr.Textbox(res_value,
+                                                            show_label=False,
+                                                            visible=True)
+                                    score_text2 = gr.Textbox("Response Score2: NA", show_label=False,
+                                                             visible=False and not kwargs['model_lock'])
+                with gr.TabItem("Document Selection"):
+                    document_choice = gr.Dropdown(docs_state0,
+                                                  label="Select Subset of Document(s) %s" % file_types_str,
+                                                  value='All',
+                                                  interactive=True,
+                                                  multiselect=True,
+                                                  )
+                    sources_visible = kwargs['langchain_mode'] != 'Disabled' and enable_sources_list
+                    with gr.Row():
+                        get_sources_btn = gr.Button(value="Update UI with Document(s) from DB", scale=0, size='sm',
+                                                    visible=sources_visible)
+                        show_sources_btn = gr.Button(value="Show Sources from DB", scale=0, size='sm',
+                                                     visible=sources_visible)
+                        refresh_sources_btn = gr.Button(value="Update DB with new/changed files on disk", scale=0,
+                                                        size='sm',
+                                                        visible=sources_visible and allow_upload_to_user_data)
                     sources_row = gr.Row(visible=kwargs['langchain_mode'] != 'Disabled' and enable_sources_list,
                                          equal_height=False)
                     with sources_row:
                         with gr.Column(scale=1):
                             file_source = gr.File(interactive=False,
+                                                  label="Download File w/Sources")
                         with gr.Column(scale=2):
                             sources_text = gr.HTML(label='Sources Added', interactive=False)
+                    doc_exception_text = gr.Textbox(value="", visible=True, label='Document Exceptions',
+                                                    interactive=False)
+                with gr.TabItem("Document Viewer"):
+                    with gr.Row():
+                        with gr.Column(scale=2):
+                            get_viewable_sources_btn = gr.Button(value="Update UI with Document(s) from DB", scale=0,
+                                                                 size='sm',
+                                                                 visible=sources_visible)
+                            view_document_choice = gr.Dropdown(viewable_docs_state0,
+                                                               label="Select Single Document",
+                                                               value=None,
+                                                               interactive=True,
+                                                               multiselect=False,
+                                                               )
+                        with gr.Column(scale=4):
+                            pass
+                    document = 'http://infolab.stanford.edu/pub/papers/google.pdf'
+                    doc_view = gr.HTML(visible=False)
+                    doc_view2 = gr.Dataframe(visible=False)
+                    doc_view3 = gr.JSON(visible=False)
+                    doc_view4 = gr.Markdown(visible=False)
                 with gr.TabItem("Chat History"):
+                    with gr.Row():
+                        with gr.Column(scale=1):
+                            remove_chat_btn = gr.Button(value="Remove Selected Saved Chats", visible=True, size='sm')
+                            flag_btn = gr.Button("Flag Current Chat", size='sm')
+                            export_chats_btn = gr.Button(value="Export Chats to Download", size='sm')
+                        with gr.Column(scale=4):
+                            pass
+                    with gr.Row():
+                        chats_file = gr.File(interactive=False, label="Download Exported Chats")
+                        chatsup_output = gr.File(label="Upload Chat File(s)",
+                                                 file_types=['.json'],
+                                                 file_count='multiple',
+                                                 elem_id="warning", elem_classes="feedback")
                     with gr.Row():
                         if 'mbart-' in kwargs['model_lower']:
                             src_lang = gr.Dropdown(list(languages_covered().keys()),
                             tgt_lang = gr.Dropdown(list(languages_covered().keys()),
                                                    value=kwargs['tgt_lang'],
                                                    label="Output Language")
+                    chat_exception_text = gr.Textbox(value="", visible=True, label='Chat Exceptions',
+                                                     interactive=False)
                 with gr.TabItem("Expert"):
                     with gr.Row():
                         with gr.Column():
                                                  info="Directly pre-appended without prompt processing",
                                                  interactive=not is_public)
                             chat = gr.components.Checkbox(label="Chat mode", value=kwargs['chat'],
+                                                          visible=False,  # no longer support nochat in UI
                                                           interactive=not is_public,
                                                           )
                             count_chat_tokens_btn = gr.Button(value="Count Chat Tokens",
                                     model_load8bit_checkbox = gr.components.Checkbox(
                                         label="Load 8-bit [requires support]",
                                         value=kwargs['load_8bit'], interactive=not is_public)
+                                    model_use_gpu_id_checkbox = gr.components.Checkbox(
                                         label="Choose Devices [If not Checked, use all GPUs]",
+                                        value=kwargs['use_gpu_id'], interactive=not is_public)
                                     model_gpu = gr.Dropdown(n_gpus_list,
                                                             label="GPU ID [-1 = all GPUs, if Choose is enabled]",
                                                             value=kwargs['gpu_id'], interactive=not is_public)
                                     model_load8bit_checkbox2 = gr.components.Checkbox(
                                         label="Load 8-bit 2 [requires support]",
                                         value=kwargs['load_8bit'], interactive=not is_public)
+                                    model_use_gpu_id_checkbox2 = gr.components.Checkbox(
                                         label="Choose Devices 2 [If not Checked, use all GPUs]",
                                         value=kwargs[
+                                            'use_gpu_id'], interactive=not is_public)
                                     model_gpu2 = gr.Dropdown(n_gpus_list,
                                                              label="GPU ID 2 [-1 = all GPUs, if choose is enabled]",
                                                              value=kwargs['gpu_id'], interactive=not is_public)
                             add_model_lora_server_button = gr.Button("Add new Model, Lora, Server url:port", scale=0,
                                                                      size='sm', interactive=not is_public)
                 with gr.TabItem("System"):
+                    with gr.Row():
+                        with gr.Column(scale=1):
+                            side_bar_text = gr.Textbox('on', visible=False, interactive=False)
+                            submit_buttons_text = gr.Textbox('on', visible=False, interactive=False)
+                            side_bar_btn = gr.Button("Toggle SideBar", variant="secondary", size="sm")
+                            submit_buttons_btn = gr.Button("Toggle Submit Buttons", variant="secondary", size="sm")
+                            col_tabs_scale = gr.Slider(minimum=1, maximum=20, value=10, step=1, label='Window Size')
+                            text_outputs_height = gr.Slider(minimum=100, maximum=1000, value=kwargs['height'] or 400,
+                                                            step=100, label='Chat Height')
+                            dark_mode_btn = gr.Button("Dark Mode", variant="secondary", size="sm")
+                        with gr.Column(scale=4):
+                            pass
                     admin_row = gr.Row()
                     with admin_row:
+                        with gr.Column(scale=1):
+                            admin_pass_textbox = gr.Textbox(label="Admin Password", type='password', visible=is_public)
+                            admin_btn = gr.Button(value="Admin Access", visible=is_public, size='sm')
+                        with gr.Column(scale=4):
+                            pass
                     system_row = gr.Row(visible=not is_public)
                     with system_row:
                         with gr.Column():
                             with gr.Row():
+                                system_btn = gr.Button(value='Get System Info', size='sm')
                                 system_text = gr.Textbox(label='System Info', interactive=False, show_copy_button=True)
                             with gr.Row():
                                 system_input = gr.Textbox(label='System Info Dict Password', interactive=True,
                                                           visible=not is_public)
+                                system_btn2 = gr.Button(value='Get System Info Dict', visible=not is_public, size='sm')
                                 system_text2 = gr.Textbox(label='System Info Dict', interactive=False,
                                                           visible=not is_public, show_copy_button=True)
                             with gr.Row():
+                                system_btn3 = gr.Button(value='Get Hash', visible=not is_public, size='sm')
                                 system_text3 = gr.Textbox(label='Hash', interactive=False,
                                                           visible=not is_public, show_copy_button=True)
                             with gr.Row():
+                                zip_btn = gr.Button("Zip", size='sm')
                                 zip_text = gr.Textbox(label="Zip file name", interactive=False)
                                 file_output = gr.File(interactive=False, label="Zip file to Download")
                             with gr.Row():
+                                s3up_btn = gr.Button("S3UP", size='sm')
                                 s3up_text = gr.Textbox(label='S3UP result', interactive=False)
+                with gr.TabItem("Terms of Service"):
                     description = ""
                     description += """<p><b> DISCLAIMERS: </b><ul><i><li>The model was trained on The Pile and other data, which may contain objectionable content.  Use at own risk.</i></li>"""
                     if kwargs['load_8bit']:
                     description += """<i><li>By using h2oGPT, you accept our <a href="https://github.com/h2oai/h2ogpt/blob/main/docs/tos.md">Terms of Service</a></i></li></ul></p>"""
                     gr.Markdown(value=description, show_label=False, interactive=False)
+                with gr.TabItem("Hosts"):
+                    gr.Markdown(f"""
+                        {description_bottom}
+                        {task_info_md}
+                        """)
         # Get flagged data
         zip_data1 = functools.partial(zip_data, root_dirs=['flagged_data_points', kwargs['save_dir']])
+        zip_event = zip_btn.click(zip_data1, inputs=None, outputs=[file_output, zip_text], queue=False,
+                                  api_name='zip_data' if allow_api else None)
+        s3up_event = s3up_btn.click(s3up, inputs=zip_text, outputs=s3up_text, queue=False,
+                                    api_name='s3up_data' if allow_api else None)
         def clear_file_list():
             return None
                 return tuple([gr.update(interactive=True)] * len(args))
         # Add to UserData
+        update_db_func = functools.partial(update_user_db,
+                                           dbs=dbs,
+                                           db_type=db_type,
+                                           use_openai_embedding=use_openai_embedding,
+                                           hf_embedding_model=hf_embedding_model,
+                                           enable_captions=enable_captions,
+                                           captions_model=captions_model,
+                                           enable_ocr=enable_ocr,
+                                           caption_loader=caption_loader,
+                                           verbose=kwargs['verbose'],
+                                           user_path=kwargs['user_path'],
+                                           n_jobs=kwargs['n_jobs'],
+                                           )
+        add_file_outputs = [fileup_output, langchain_mode]
+        add_file_kwargs = dict(fn=update_db_func,
+                               inputs=[fileup_output, my_db_state, chunk, chunk_size, langchain_mode],
+                               outputs=add_file_outputs + [sources_text, doc_exception_text],
                                queue=queue,
+                               api_name='add_file' if allow_api and allow_upload_to_user_data else None)
         # then no need for add buttons, only single changeable db
+        eventdb1a = fileup_output.upload(make_non_interactive, inputs=add_file_outputs, outputs=add_file_outputs,
+                                         show_progress='minimal')
+        eventdb1 = eventdb1a.then(**add_file_kwargs, show_progress='full')
+        eventdb1b = eventdb1.then(make_interactive, inputs=add_file_outputs, outputs=add_file_outputs,
+                                  show_progress='minimal')
         # note for update_user_db_func output is ignored for db
         def clear_textbox():
             return gr.Textbox.update(value='')
+        update_user_db_url_func = functools.partial(update_db_func, is_url=True)
+        add_url_outputs = [url_text, langchain_mode]
         add_url_kwargs = dict(fn=update_user_db_url_func,
+                              inputs=[url_text, my_db_state, chunk, chunk_size, langchain_mode],
+                              outputs=add_url_outputs + [sources_text, doc_exception_text],
                               queue=queue,
+                              api_name='add_url' if allow_api and allow_upload_to_user_data else None)
+        eventdb2a = url_text.submit(fn=dummy_fun, inputs=url_text, outputs=url_text, queue=queue,
+                                    show_progress='minimal')
         # work around https://github.com/gradio-app/gradio/issues/4733
         eventdb2b = eventdb2a.then(make_non_interactive, inputs=add_url_outputs, outputs=add_url_outputs,
                                    show_progress='minimal')
+        eventdb2 = eventdb2b.then(**add_url_kwargs, show_progress='full')
+        eventdb2c = eventdb2.then(make_interactive, inputs=add_url_outputs, outputs=add_url_outputs,
+                                  show_progress='minimal')
+        update_user_db_txt_func = functools.partial(update_db_func, is_txt=True)
+        add_text_outputs = [user_text_text, langchain_mode]
         add_text_kwargs = dict(fn=update_user_db_txt_func,
+                               inputs=[user_text_text, my_db_state, chunk, chunk_size, langchain_mode],
+                               outputs=add_text_outputs + [sources_text, doc_exception_text],
                                queue=queue,
+                               api_name='add_text' if allow_api and allow_upload_to_user_data else None
                                )
+        eventdb3a = user_text_text.submit(fn=dummy_fun, inputs=user_text_text, outputs=user_text_text, queue=queue,
+                                          show_progress='minimal')
         eventdb3b = eventdb3a.then(make_non_interactive, inputs=add_text_outputs, outputs=add_text_outputs,
                                    show_progress='minimal')
+        eventdb3 = eventdb3b.then(**add_text_kwargs, show_progress='full')
+        eventdb3c = eventdb3.then(make_interactive, inputs=add_text_outputs, outputs=add_text_outputs,
+                                  show_progress='minimal')
+        db_events = [eventdb1a, eventdb1, eventdb1b,
+                     eventdb2a, eventdb2, eventdb2b, eventdb2c,
+                     eventdb3a, eventdb3b, eventdb3, eventdb3c]
         get_sources1 = functools.partial(get_sources, dbs=dbs, docs_state0=docs_state0)
         # if change collection source, must clear doc selections from it to avoid inconsistency
         def clear_doc_choice():
+            return gr.Dropdown.update(choices=docs_state0, value=DocumentChoices.All.name)
+        langchain_mode.change(clear_doc_choice, inputs=None, outputs=document_choice, queue=False)
+        def resize_col_tabs(x):
+            return gr.Dropdown.update(scale=x)
+        col_tabs_scale.change(fn=resize_col_tabs, inputs=col_tabs_scale, outputs=col_tabs)
+        def resize_chatbots(x, num_model_lock=0):
+            if num_model_lock == 0:
+                num_model_lock = 3  # 2 + 1 (which is dup of first)
+            else:
+                num_model_lock = 2 + num_model_lock
+            return tuple([gr.update(height=x)] * num_model_lock)
+        resize_chatbots_func = functools.partial(resize_chatbots, num_model_lock=len(text_outputs))
+        text_outputs_height.change(fn=resize_chatbots_func, inputs=text_outputs_height,
+                                   outputs=[text_output, text_output2] + text_outputs)
         def update_dropdown(x):
             return gr.Dropdown.update(choices=x, value=[docs_state0[0]])
+        get_sources_args = dict(fn=get_sources1, inputs=[my_db_state, langchain_mode],
+                                outputs=[file_source, docs_state],
+                                queue=queue,
+                                api_name='get_sources' if allow_api else None)
+        eventdb7 = get_sources_btn.click(**get_sources_args) \
             .then(fn=update_dropdown, inputs=docs_state, outputs=document_choice)
         # show button, else only show when add.  Could add to above get_sources for download/dropdown, but bit much maybe
         show_sources1 = functools.partial(get_source_files_given_langchain_mode, dbs=dbs)
         eventdb8 = show_sources_btn.click(fn=show_sources1, inputs=[my_db_state, langchain_mode], outputs=sources_text,
                                           api_name='show_sources' if allow_api else None)
+        def update_viewable_dropdown(x):
+            return gr.Dropdown.update(choices=x,
+                                      value=viewable_docs_state0[0] if len(viewable_docs_state0) > 0 else None)
+        get_viewable_sources1 = functools.partial(get_sources, dbs=dbs, docs_state0=viewable_docs_state0)
+        get_viewable_sources_args = dict(fn=get_viewable_sources1, inputs=[my_db_state, langchain_mode],
+                                         outputs=[file_source, viewable_docs_state],
+                                         queue=queue,
+                                         api_name='get_viewable_sources' if allow_api else None)
+        eventdb12 = get_viewable_sources_btn.click(**get_viewable_sources_args) \
+            .then(fn=update_viewable_dropdown, inputs=viewable_docs_state,
+                  outputs=view_document_choice)
+        def show_doc(file):
+            dummy1 = gr.update(visible=False, value=None)
+            dummy_ret = dummy1, dummy1, dummy1, dummy1
+            if not isinstance(file, str):
+                return dummy_ret
+            if file.endswith('.md'):
+                try:
+                    with open(file, 'rt') as f:
+                        content = f.read()
+                    return dummy1, dummy1, dummy1, gr.update(visible=True, value=content)
+                except:
+                    return dummy_ret
+            if file.endswith('.py'):
+                try:
+                    with open(file, 'rt') as f:
+                        content = f.read()
+                    content = f"```python\n{content}\n```"
+                    return dummy1, dummy1, dummy1, gr.update(visible=True, value=content)
+                except:
+                    return dummy_ret
+            if file.endswith('.txt') or file.endswith('.rst') or file.endswith('.rtf') or file.endswith('.toml'):
+                try:
+                    with open(file, 'rt') as f:
+                        content = f.read()
+                    content = f"```text\n{content}\n```"
+                    return dummy1, dummy1, dummy1, gr.update(visible=True, value=content)
+                except:
+                    return dummy_ret
+            func = None
+            if file.endswith(".csv"):
+                func = pd.read_csv
+            elif file.endswith(".pickle"):
+                func = pd.read_pickle
+            elif file.endswith(".xls") or file.endswith("xlsx"):
+                func = pd.read_excel
+            elif file.endswith('.json'):
+                func = pd.read_json
+            elif file.endswith('.xml'):
+                func = pd.read_xml
+            if func is not None:
+                try:
+                    df = func(file).head(100)
+                except:
+                    return dummy_ret
+                return dummy1, gr.update(visible=True, value=df), dummy1, dummy1
+            port = int(os.getenv('GRADIO_SERVER_PORT', '7860'))
+            import pathlib
+            absolute_path_string = os.path.abspath(file)
+            url_path = pathlib.Path(absolute_path_string).as_uri()
+            url = get_url(absolute_path_string, from_str=True)
+            img_url = url.replace("""<a href=""", """<img src=""")
+            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
+                return gr.update(visible=True, value=img_url), dummy1, dummy1, dummy1
+            elif file.endswith('.pdf') or 'arxiv.org/pdf' in file:
+                if file.startswith('http') or file.startswith('https'):
+                    # if file is online, then might as well use google(?)
+                    document1 = file
+                    return gr.update(visible=True, value=f"""<iframe width="1000" height="800" src="https://docs.google.com/viewerng/viewer?url={document1}&embedded=true" frameborder="0" height="100%" width="100%">
+</iframe>
+"""), dummy1, dummy1, dummy1
+                else:
+                    ip = get_local_ip()
+                    document1 = url_path.replace('file://', f'http://{ip}:{port}/')
+                    # document1 = url
+                    return gr.update(visible=True, value=f"""<object data="{document1}" type="application/pdf">
+    <iframe src="https://docs.google.com/viewer?url={document1}&embedded=true"></iframe>
+</object>"""), dummy1, dummy1, dummy1
+            else:
+                return dummy_ret
+        view_document_choice.select(fn=show_doc, inputs=view_document_choice,
+                                    outputs=[doc_view, doc_view2, doc_view3, doc_view4])
         # Get inputs to evaluate() and make_db()
         # don't deepcopy, can contain model itself
         all_kwargs = kwargs.copy()
                                     **kwargs_evaluate
                                     )
         dark_mode_btn.click(
             None,
             None,
             queue=False,
         )
+        def visible_toggle(x):
+            x = 'off' if x == 'on' else 'on'
+            return x, gr.Column.update(visible=True if x == 'on' else False)
+        side_bar_btn.click(fn=visible_toggle,
+                           inputs=side_bar_text,
+                           outputs=[side_bar_text, side_bar],
+                           queue=False)
+        submit_buttons_btn.click(fn=visible_toggle,
+                                 inputs=submit_buttons_text,
+                                 outputs=[submit_buttons_text, submit_buttons],
+                                 queue=False)
         # examples after submit or any other buttons for chat or no chat
         if kwargs['examples'] is not None and kwargs['show_examples']:
             prompt_type1 = args_list[eval_func_param_names.index('prompt_type')]
             langchain_mode1 = args_list[eval_func_param_names.index('langchain_mode')]
             langchain_action1 = args_list[eval_func_param_names.index('langchain_action')]
+            document_subset1 = args_list[eval_func_param_names.index('document_subset')]
             document_choice1 = args_list[eval_func_param_names.index('document_choice')]
             if not prompt_type1:
                 # shouldn't have to specify if CLI launched model
                 return history
             if user_message1 in ['', None, '\n']:
                 if langchain_action1 in LangChainAction.QUERY.value and \
+                        DocumentChoices.All.name != document_subset1 \
                         or \
                         langchain_mode1 in [LangChainMode.CHAT_LLM.value, LangChainMode.LLM.value]:
                     # reject non-retry submit/enter
             args_list = args_list[:-3]  # only keep rest needed for evaluate()
             langchain_mode1 = args_list[eval_func_param_names.index('langchain_mode')]
             langchain_action1 = args_list[eval_func_param_names.index('langchain_action')]
+            document_subset1 = args_list[eval_func_param_names.index('document_subset')]
             document_choice1 = args_list[eval_func_param_names.index('document_choice')]
             if not history:
                 print("No history", flush=True)
                 history[-1][1] = None
             elif not instruction1:
                 if langchain_action1 in LangChainAction.QUERY.value and \
+                        DocumentChoices.All.name != document_choice1 \
                         or \
                         langchain_mode1 in [LangChainMode.CHAT_LLM.value, LangChainMode.LLM.value]:
                     # if not retrying, then reject empty query
                          )
         bot_args = dict(fn=bot,
                         inputs=inputs_list + [model_state, my_db_state] + [text_output],
+                        outputs=[text_output, chat_exception_text],
                         )
         retry_bot_args = dict(fn=functools.partial(bot, retry=True),
                               inputs=inputs_list + [model_state, my_db_state] + [text_output],
+                              outputs=[text_output, chat_exception_text],
                               )
         retry_user_args = dict(fn=functools.partial(user, retry=True),
                                inputs=inputs_list + [text_output],
                           )
         bot_args2 = dict(fn=bot,
                          inputs=inputs_list2 + [model_state2, my_db_state] + [text_output2],
+                         outputs=[text_output2, chat_exception_text],
                          )
         retry_bot_args2 = dict(fn=functools.partial(bot, retry=True),
                                inputs=inputs_list2 + [model_state2, my_db_state] + [text_output2],
+                               outputs=[text_output2, chat_exception_text],
                                )
         retry_user_args2 = dict(fn=functools.partial(user, retry=True),
                                 inputs=inputs_list2 + [text_output2],
                              )
         all_bot_args = dict(fn=functools.partial(all_bot, model_states1=model_states),
                             inputs=inputs_list + [my_db_state] + text_outputs,
+                            outputs=text_outputs + [chat_exception_text],
                             )
         all_retry_bot_args = dict(fn=functools.partial(all_bot, model_states1=model_states, retry=True),
                                   inputs=inputs_list + [my_db_state] + text_outputs,
+                                  outputs=text_outputs + [chat_exception_text],
                                   )
         all_retry_user_args = dict(fn=functools.partial(all_user, retry=True,
                                                         sanitize_user_prompt=kwargs['sanitize_user_prompt'],
                         return False
             return is_same
+        def save_chat(*args, chat_is_list=False):
             args_list = list(args)
+            if not chat_is_list:
+                # list of chatbot histories,
+                # can't pass in list with list of chatbot histories and state due to gradio limits
+                chat_list = args_list[:-1]
+            else:
+                assert len(args_list) == 2
+                chat_list = args_list[0]
+            # if old chat file with single chatbot, get into shape
+            if isinstance(chat_list, list) and len(chat_list) > 0 and isinstance(chat_list[0], list) and len(
+                    chat_list[0]) == 2 and isinstance(chat_list[0][0], str) and isinstance(chat_list[0][1], str):
+                chat_list = [chat_list]
             # remove None histories
             chat_list_not_none = [x for x in chat_list if x and len(x) > 0 and len(x[0]) == 2 and x[0][1] is not None]
+            chat_list_none = [x for x in chat_list if x not in chat_list_not_none]
+            if len(chat_list_none) > 0 and len(chat_list_not_none) == 0:
+                raise ValueError("Invalid chat file")
+            # dict with keys of short chat names, values of list of list of chatbot histories
+            chat_state1 = args_list[-1]
             short_chats = list(chat_state1.keys())
             if len(chat_list_not_none) > 0:
                 # make short_chat key from only first history, based upon question that is same anyways
                     if not already_exists:
                         chat_state1[short_chat] = chat_list.copy()
                 # clear chat_list so saved and then new conversation starts
+                # FIXME: seems less confusing to clear, since have clear button right next
+                # chat_list = [[]] * len(chat_list)
+            if not chat_is_list:
+                ret_list = chat_list + [chat_state1]
+            else:
+                ret_list = [chat_list] + [chat_state1]
             return tuple(ret_list)
         def switch_chat(chat_key, chat_state1, num_model_lock=0):
             chosen_chat = chat_state1[chat_key]
             # deal with possible different size of chat list vs. current list
             .then(clear_scores, outputs=[score_text, score_text2, score_text_nochat])
         def remove_chat(chat_key, chat_state1):
+            if isinstance(chat_key, str):
+                chat_state1.pop(chat_key, None)
+            return gr.update(choices=list(chat_state1.keys()), value=None), chat_state1
+        remove_chat_event = remove_chat_btn.click(remove_chat,
+                                                  inputs=[radio_chats, chat_state], outputs=[radio_chats, chat_state],
+                                                  queue=False)
         def get_chats1(chat_state1):
             base = 'chats'
                 f.write(json.dumps(chat_state1, indent=2))
             return filename
+        export_chat_event = export_chats_btn.click(get_chats1, inputs=chat_state, outputs=chats_file, queue=False,
+                                                   api_name='export_chats' if allow_api else None)
+        def add_chats_from_file(file, chat_state1, radio_chats1, chat_exception_text1):
             if not file:
+                return None, chat_state1, gr.update(choices=list(chat_state1.keys()), value=None), chat_exception_text1
             if isinstance(file, str):
                 files = [file]
             else:
                 files = file
             if not files:
+                return None, chat_state1, gr.update(choices=list(chat_state1.keys()), value=None), chat_exception_text1
+            chat_exception_list = []
             for file1 in files:
                 try:
                     if hasattr(file1, 'name'):
                         new_chats = json.loads(f.read())
                         for chat1_k, chat1_v in new_chats.items():
                             # ignore chat1_k, regenerate and de-dup to avoid loss
+                            _, chat_state1 = save_chat(chat1_v, chat_state1, chat_is_list=True)
                 except BaseException as e:
                     t, v, tb = sys.exc_info()
                     ex = ''.join(traceback.format_exception(t, v, tb))
+                    ex_str = "File %s exception: %s" % (file1, str(e))
+                    print(ex_str, flush=True)
+                    chat_exception_list.append(ex_str)
+                    chat_exception_text1 = '\n'.join(chat_exception_list)
+            return None, chat_state1, gr.update(choices=list(chat_state1.keys()), value=None), chat_exception_text1
         # note for update_user_db_func output is ignored for db
+        chatup_change_event = chatsup_output.change(add_chats_from_file,
+                                                    inputs=[chatsup_output, chat_state, radio_chats,
+                                                            chat_exception_text],
+                                                    outputs=[chatsup_output, chat_state, radio_chats,
+                                                             chat_exception_text],
+                                                    queue=False,
+                                                    api_name='add_to_chats' if allow_api else None)
+        clear_chat_event = clear_chat_btn.click(fn=clear_texts,
+                                                inputs=[text_output, text_output2] + text_outputs,
+                                                outputs=[text_output, text_output2] + text_outputs,
+                                                queue=False, api_name='clear' if allow_api else None) \
             .then(deselect_radio_chats, inputs=None, outputs=radio_chats, queue=False) \
             .then(clear_scores, outputs=[score_text, score_text2, score_text_nochat])
+        def update_radio_chats(chat_state1):
+            # reverse so newest at top
+            choices = list(chat_state1.keys()).copy()
+            choices.reverse()
+            return gr.update(choices=choices, value=None)
+        clear_event = save_chat_btn.click(save_chat,
+                                          inputs=[text_output, text_output2] + text_outputs + [chat_state],
+                                          outputs=[text_output, text_output2] + text_outputs + [chat_state],
+                                          api_name='save_chat' if allow_api else None) \
             .then(update_radio_chats, inputs=chat_state, outputs=radio_chats,
                   api_name='update_chats' if allow_api else None) \
             .then(clear_scores, outputs=[score_text, score_text2, score_text_nochat])
             .then(clear_torch_cache)
         def load_model(model_name, lora_weights, server_name, model_state_old, prompt_type_old, load_8bit,
+                       use_gpu_id, gpu_id):
             # ensure no API calls reach here
             if is_public:
                 raise RuntimeError("Illegal access for %s" % model_name)
             all_kwargs1 = all_kwargs.copy()
             all_kwargs1['base_model'] = model_name.strip()
             all_kwargs1['load_8bit'] = load_8bit
+            all_kwargs1['use_gpu_id'] = use_gpu_id
             all_kwargs1['gpu_id'] = int(gpu_id)  # detranscribe
             model_lower = model_name.strip().lower()
             if model_lower in inv_prompt_type_to_model_lower:
         get_prompt_str_func1 = functools.partial(get_prompt_str, which=1)
         get_prompt_str_func2 = functools.partial(get_prompt_str, which=2)
+        prompt_type.change(fn=get_prompt_str_func1, inputs=[prompt_type, prompt_dict], outputs=prompt_dict, queue=False)
+        prompt_type2.change(fn=get_prompt_str_func2, inputs=[prompt_type2, prompt_dict2], outputs=prompt_dict2,
+                            queue=False)
         def dropdown_prompt_type_list(x):
             return gr.Dropdown.update(value=x)
         load_model_args = dict(fn=load_model,
                                inputs=[model_choice, lora_choice, server_choice, model_state, prompt_type,
+                                       model_load8bit_checkbox, model_use_gpu_id_checkbox, model_gpu],
                                outputs=[model_state, model_used, lora_used, server_used,
                                         # if prompt_type changes, prompt_dict will change via change rule
                                         prompt_type, max_new_tokens, min_new_tokens,
         prompt_update_args = dict(fn=dropdown_prompt_type_list, inputs=prompt_type, outputs=prompt_type)
         chatbot_update_args = dict(fn=chatbot_list, inputs=[text_output, model_used], outputs=text_output)
         nochat_update_args = dict(fn=chatbot_list, inputs=[text_output_nochat, model_used], outputs=text_output_nochat)
+        load_model_event = load_model_button.click(**load_model_args,
+                                                   api_name='load_model' if allow_api and is_public else None) \
+            .then(**prompt_update_args) \
+            .then(**chatbot_update_args) \
+            .then(**nochat_update_args) \
+            .then(clear_torch_cache)
         load_model_args2 = dict(fn=load_model,
                                 inputs=[model_choice2, lora_choice2, server_choice2, model_state2, prompt_type2,
+                                        model_load8bit_checkbox2, model_use_gpu_id_checkbox2, model_gpu2],
                                 outputs=[model_state2, model_used2, lora_used2, server_used2,
                                          # if prompt_type2 changes, prompt_dict2 will change via change rule
                                          prompt_type2, max_new_tokens2, min_new_tokens2
                                          ])
         prompt_update_args2 = dict(fn=dropdown_prompt_type_list, inputs=prompt_type2, outputs=prompt_type2)
         chatbot_update_args2 = dict(fn=chatbot_list, inputs=[text_output2, model_used2], outputs=text_output2)
+        load_model_event2 = load_model_button2.click(**load_model_args2,
+                                                     api_name='load_model2' if allow_api and is_public else None) \
+            .then(**prompt_update_args2) \
+            .then(**chatbot_update_args2) \
+            .then(clear_torch_cache)
         def dropdown_model_lora_server_list(model_list0, model_x,
                                             lora_list0, lora_x,
                                                         server_options_state],
                                                queue=False)
+        go_event = go_btn.click(lambda: gr.update(visible=False), None, go_btn, api_name="go" if allow_api else None,
+                                queue=False) \
             .then(lambda: gr.update(visible=True), None, normal_block, queue=False) \
             .then(**load_model_args, queue=False).then(**prompt_update_args, queue=False)
         def get_hash():
             return kwargs['git_hash']
+        system_event = system_btn3.click(get_hash,
+                                         outputs=system_text3,
+                                         api_name='system_hash' if allow_api else None,
+                                         queue=False,
+                                         )
         def count_chat_tokens(model_state1, chat1, prompt_type1, prompt_dict1,
                               memory_restriction_level1=0,
         count_chat_tokens_func = functools.partial(count_chat_tokens,
                                                    memory_restriction_level1=memory_restriction_level,
                                                    keep_sources_in_context1=kwargs['keep_sources_in_context'])
+        count_tokens_event = count_chat_tokens_btn.click(fn=count_chat_tokens,
+                                                         inputs=[model_state, text_output, prompt_type, prompt_dict],
+                                                         outputs=chat_token_count,
+                                                         api_name='count_tokens' if allow_api else None)
+        # don't pass text_output, don't want to clear output, just stop it
+        # cancel only stops outer generation, not inner generation or non-generation
+        stop_btn.click(lambda: None, None, None,
+                       cancels=submits1 + submits2 + submits3 + submits4 +
+                               [submit_event_nochat, submit_event_nochat2] +
+                               [eventdb1, eventdb2, eventdb3] +
+                               [eventdb7, eventdb8, eventdb9, eventdb12] +
+                               db_events +
+                               [clear_event] +
+                               [submit_event_nochat_api, submit_event_nochat] +
+                               [load_model_event, load_model_event2] +
+                               [count_tokens_event]
+                       ,
+                       queue=False, api_name='stop' if allow_api else None).then(clear_torch_cache, queue=False)
         demo.load(None, None, None, _js=get_dark_js() if kwargs['h2ocolors'] and False else None)  # light best
 def get_sources(db1, langchain_mode, dbs=None, docs_state0=None):
+    set_userid(db1)
     if langchain_mode in ['ChatLLM', 'LLM']:
         source_files_added = "NA"
         source_list = []
     return sources_file, source_list
+def set_userid(db1):
+    # can only call this after function called so for specific userr, not in gr.State() that occurs during app init
+    assert db1 is not None and len(db1) == 2
+    if db1[1] is None:
+        #  uuid in db is used as user ID
+        db1[1] = str(uuid.uuid4())
+def update_user_db(file, db1, chunk, chunk_size, langchain_mode, dbs=None, **kwargs):
+    set_userid(db1)
+    if file is None:
+        raise RuntimeError("Don't use change, use input")
     try:
+        return _update_user_db(file, db1=db1, chunk=chunk, chunk_size=chunk_size,
+                               langchain_mode=langchain_mode, dbs=dbs,
+                               **kwargs)
     except BaseException as e:
         print(traceback.format_exc(), flush=True)
         # gradio has issues if except, so fail semi-gracefully, else would hang forever in processing textbox
           </body>
         </html>
         """.format(ex_str)
+        doc_exception_text = str(e)
+        return None, langchain_mode, source_files_added, doc_exception_text
     finally:
         clear_torch_cache()
 def get_lock_file(db1, langchain_mode):
+    set_userid(db1)
     assert len(db1) == 2 and db1[1] is not None and isinstance(db1[1], str)
     user_id = db1[1]
     base_path = 'locks'
     return lock_file
+def _update_user_db(file,
+                    db1=None,
+                    chunk=None, chunk_size=None,
+                    dbs=None, db_type=None, langchain_mode='UserData',
                     user_path=None,
                     use_openai_embedding=None,
                     hf_embedding_model=None,
                     verbose=None,
                     is_url=None, is_txt=None,
                     n_jobs=-1):
+    assert db1 is not None
+    assert chunk is not None
+    assert chunk_size is not None
     assert use_openai_embedding is not None
     assert hf_embedding_model is not None
     assert caption_loader is not None
     assert enable_ocr is not None
     assert verbose is not None
+    set_userid(db1)
     if dbs is None:
         dbs = {}
     assert isinstance(dbs, dict), "Wrong type for dbs: %s" % str(type(dbs))
     if not isinstance(file, (list, tuple, typing.Generator)) and isinstance(file, str):
         file = [file]
+    if langchain_mode == LangChainMode.DISABLED.value:
+        return None, langchain_mode, get_source_files(), ""
+    if langchain_mode in [LangChainMode.CHAT_LLM.value, LangChainMode.CHAT_LLM.value]:
+        # then switch to MyData, so langchain_mode also becomes way to select where upload goes
+        # but default to mydata if nothing chosen, since safest
+        langchain_mode = LangChainMode.MY_DATA.value
     if langchain_mode == 'UserData' and user_path is not None:
         # move temp files from gradio upload to stable location
         for fili, fil in enumerate(file):
                            caption_loader=caption_loader,
                            )
     exceptions = [x for x in sources if x.metadata.get('exception')]
+    exceptions_strs = [x.metadata['exception'] for x in exceptions]
     sources = [x for x in sources if 'exception' not in x.metadata]
     lock_file = get_lock_file(db1, langchain_mode)
             if db is not None:
                 db1[0] = db
             source_files_added = get_source_files(db=db1[0], exceptions=exceptions)
+            return None, langchain_mode, source_files_added, '\n'.join(exceptions_strs)
         else:
             from gpt_langchain import get_persist_directory
             persist_directory = get_persist_directory(langchain_mode)
                             hf_embedding_model=hf_embedding_model)
             dbs[langchain_mode] = db
             # NOTE we do not return db, because function call always same code path
+            # return dbs[langchain_mode]
             # db in this code path is updated in place
             source_files_added = get_source_files(db=dbs[langchain_mode], exceptions=exceptions)
+            return None, langchain_mode, source_files_added, '\n'.join(exceptions_strs)
 def get_db(db1, langchain_mode, dbs=None):

gradio_themes.py CHANGED Viewed

@@ -133,6 +133,11 @@ class H2oTheme(Soft):
             background_fill_primary_dark="*block_background_fill",
             block_radius="0 0 8px 8px",
             checkbox_label_text_color_selected_dark='#000000',
         )
@@ -173,6 +178,9 @@ class SoftTheme(Soft):
             font=font,
             font_mono=font_mono,
         )
 h2o_logo = '<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" width="100%" height="100%"' \

             background_fill_primary_dark="*block_background_fill",
             block_radius="0 0 8px 8px",
             checkbox_label_text_color_selected_dark='#000000',
+            #checkbox_label_text_size="*text_xs",  # too small for iPhone etc. but good if full large screen zoomed to fit
+            checkbox_label_text_size="*text_sm",
+            #radio_circle="""url("data:image/svg+xml,%3csvg viewBox='0 0 32 32' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='32' cy='32' r='1'/%3e%3c/svg%3e")""",
+            #checkbox_border_width=1,
+            #heckbox_border_width_dark=1,
         )
             font=font,
             font_mono=font_mono,
         )
+        super().set(
+            checkbox_label_text_size="*text_sm",
+        )
 h2o_logo = '<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" width="100%" height="100%"' \

gradio_utils/__pycache__/css.cpython-310.pyc CHANGED Viewed

Binary files a/gradio_utils/__pycache__/css.cpython-310.pyc and b/gradio_utils/__pycache__/css.cpython-310.pyc differ

gradio_utils/__pycache__/prompt_form.cpython-310.pyc CHANGED Viewed

Binary files a/gradio_utils/__pycache__/prompt_form.cpython-310.pyc and b/gradio_utils/__pycache__/prompt_form.cpython-310.pyc differ

gradio_utils/css.py CHANGED Viewed

@@ -12,7 +12,10 @@ def get_css(kwargs) -> str:
 def make_css_base() -> str:
-    return """
     @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600&display=swap');
     body.dark{#warning {background-color: #555555};}

 def make_css_base() -> str:
+    css1 = """
+        #col_container {margin-left: auto; margin-right: auto; text-align: left;}
+        """
+    return css1 + """
     @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600&display=swap');
     body.dark{#warning {background-color: #555555};}

gradio_utils/prompt_form.py CHANGED Viewed

@@ -93,30 +93,3 @@ def make_chatbots(output_label0, output_label0_model2, **kwargs):
         text_output2 = gr.Chatbot(label=output_label0_model2,
                                   visible=False and not kwargs['model_lock'], height=kwargs['height'] or 400)
     return text_output, text_output2, text_outputs
-def make_prompt_form(kwargs, LangChainMode):
-    if kwargs['langchain_mode'] != LangChainMode.DISABLED.value:
-        extra_prompt_form = ".  For summarization, empty submission uses first top_k_docs documents."
-    else:
-        extra_prompt_form = ""
-    if kwargs['input_lines'] > 1:
-        instruction_label = "Shift-Enter to Submit, Enter for more lines%s" % extra_prompt_form
-    else:
-        instruction_label = "Enter to Submit, Shift-Enter for more lines%s" % extra_prompt_form
-    with gr.Row():#elem_id='prompt-form-area'):
-        with gr.Column(scale=50):
-            instruction = gr.Textbox(
-                lines=kwargs['input_lines'],
-                label='Ask anything',
-                placeholder=instruction_label,
-                info=None,
-                elem_id='prompt-form',
-                container=True,
-            )
-        with gr.Row():
-            submit = gr.Button(value='Submit', variant='primary', scale=0, size='sm')
-            stop_btn = gr.Button(value="Stop", variant='secondary', scale=0, size='sm')
-    return instruction, submit, stop_btn

         text_output2 = gr.Chatbot(label=output_label0_model2,
                                   visible=False and not kwargs['model_lock'], height=kwargs['height'] or 400)
     return text_output, text_output2, text_outputs

loaders.py CHANGED Viewed

@@ -1,40 +1,48 @@
-def get_loaders(model_name, reward_type, llama_type=None):
     # NOTE: Some models need specific new prompt_type
     # E.g. t5_xxl_true_nli_mixture has input format: "premise: PREMISE_TEXT hypothesis: HYPOTHESIS_TEXT".)
     if llama_type is None:
         llama_type = "llama" in model_name.lower()
     if llama_type:
         from transformers import LlamaForCausalLM, LlamaTokenizer
-        model_loader = LlamaForCausalLM
-        tokenizer_loader = LlamaTokenizer
     elif 'distilgpt2' in model_name.lower():
         from transformers import AutoModelForCausalLM, AutoTokenizer
-        return AutoModelForCausalLM, AutoTokenizer
     elif 'gpt2' in model_name.lower():
         from transformers import GPT2LMHeadModel, GPT2Tokenizer
-        return GPT2LMHeadModel, GPT2Tokenizer
     elif 'mbart-' in model_name.lower():
         from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
-        return MBartForConditionalGeneration, MBart50TokenizerFast
     elif 't5' == model_name.lower() or \
          't5-' in model_name.lower() or \
          'flan-' in model_name.lower():
         from transformers import AutoTokenizer, T5ForConditionalGeneration
-        return T5ForConditionalGeneration, AutoTokenizer
     elif 'bigbird' in model_name:
         from transformers import BigBirdPegasusForConditionalGeneration, AutoTokenizer
-        return BigBirdPegasusForConditionalGeneration, AutoTokenizer
     elif 'bart-large-cnn-samsum' in model_name or 'flan-t5-base-samsum' in model_name:
         from transformers import pipeline
         return pipeline, "summarization"
     elif reward_type or 'OpenAssistant/reward-model'.lower() in model_name.lower():
         from transformers import AutoModelForSequenceClassification, AutoTokenizer
-        return AutoModelForSequenceClassification, AutoTokenizer
     else:
         from transformers import AutoTokenizer, AutoModelForCausalLM
         model_loader = AutoModelForCausalLM
         tokenizer_loader = AutoTokenizer
-    return model_loader, tokenizer_loader
 def get_tokenizer(tokenizer_loader, tokenizer_base_model, local_files_only, resume_download, use_auth_token):

+import functools
+def get_loaders(model_name, reward_type, llama_type=None, load_gptq=''):
     # NOTE: Some models need specific new prompt_type
     # E.g. t5_xxl_true_nli_mixture has input format: "premise: PREMISE_TEXT hypothesis: HYPOTHESIS_TEXT".)
+    if load_gptq:
+        from transformers import AutoTokenizer
+        from auto_gptq import AutoGPTQForCausalLM
+        use_triton = False
+        functools.partial(AutoGPTQForCausalLM.from_quantized, quantize_config=None, use_triton=use_triton)
+        return AutoGPTQForCausalLM.from_quantized, AutoTokenizer
     if llama_type is None:
         llama_type = "llama" in model_name.lower()
     if llama_type:
         from transformers import LlamaForCausalLM, LlamaTokenizer
+        return LlamaForCausalLM.from_pretrained, LlamaTokenizer
     elif 'distilgpt2' in model_name.lower():
         from transformers import AutoModelForCausalLM, AutoTokenizer
+        return AutoModelForCausalLM.from_pretrained, AutoTokenizer
     elif 'gpt2' in model_name.lower():
         from transformers import GPT2LMHeadModel, GPT2Tokenizer
+        return GPT2LMHeadModel.from_pretrained, GPT2Tokenizer
     elif 'mbart-' in model_name.lower():
         from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+        return MBartForConditionalGeneration.from_pretrained, MBart50TokenizerFast
     elif 't5' == model_name.lower() or \
          't5-' in model_name.lower() or \
          'flan-' in model_name.lower():
         from transformers import AutoTokenizer, T5ForConditionalGeneration
+        return T5ForConditionalGeneration.from_pretrained, AutoTokenizer
     elif 'bigbird' in model_name:
         from transformers import BigBirdPegasusForConditionalGeneration, AutoTokenizer
+        return BigBirdPegasusForConditionalGeneration.from_pretrained, AutoTokenizer
     elif 'bart-large-cnn-samsum' in model_name or 'flan-t5-base-samsum' in model_name:
         from transformers import pipeline
         return pipeline, "summarization"
     elif reward_type or 'OpenAssistant/reward-model'.lower() in model_name.lower():
         from transformers import AutoModelForSequenceClassification, AutoTokenizer
+        return AutoModelForSequenceClassification.from_pretrained, AutoTokenizer
     else:
         from transformers import AutoTokenizer, AutoModelForCausalLM
         model_loader = AutoModelForCausalLM
         tokenizer_loader = AutoTokenizer
+        return model_loader.from_pretrained, tokenizer_loader
 def get_tokenizer(tokenizer_loader, tokenizer_base_model, local_files_only, resume_download, use_auth_token):

prompter.py CHANGED Viewed

@@ -23,9 +23,6 @@ prompt_type_to_model_name = {
         'gpt2',
         'distilgpt2',
         'mosaicml/mpt-7b-storywriter',
-        'mosaicml/mpt-7b-instruct',  # internal code handles instruct
-        'mosaicml/mpt-7b-chat',  # NC, internal code handles instruct
-        'mosaicml/mpt-30b-instruct',  # internal code handles instruct
     ],
     'gptj': ['gptj', 'gpt4all_llama'],
     'prompt_answer': [
@@ -41,6 +38,7 @@ prompt_type_to_model_name = {
         'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2',
         'h2oai/h2ogpt-gm-oasst1-en-xgen-7b-8k',
         'h2oai/h2ogpt-gm-oasst1-multilang-xgen-7b-8k',
     ],
     'prompt_answer_openllama': [
         'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt',
@@ -49,7 +47,7 @@ prompt_type_to_model_name = {
         'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b',
         'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b',
     ],
-    'instruct': [],
     'instruct_with_end': ['databricks/dolly-v2-12b'],
     'quality': [],
     'human_bot': [
@@ -74,8 +72,11 @@ prompt_type_to_model_name = {
     "wizard_mega": ['openaccess-ai-collective/wizard-mega-13b'],
     "instruct_simple": ['JosephusCheung/Guanaco'],
     "wizard_vicuna": ['ehartford/Wizard-Vicuna-13B-Uncensored'],
-    "wizard2": ['llama', 'mosaicml/mpt-30b-instruct'],
     "vicuna11": ['lmsys/vicuna-33b-v1.3'],
     # could be plain, but default is correct prompt_type for default TheBloke model ggml-wizardLM-7B.q4_2.bin
 }
 if os.getenv('OPENAI_API_KEY'):
@@ -293,7 +294,7 @@ Current Time: {}
         humanstr = prompt_tokens
         botstr = answer_tokens
         terminate_response = [humanstr, PreResponse, eos]
-        chat_sep = ''
         chat_turn_sep = eos
     elif prompt_type in [PromptType.prompt_answer_openllama.value, str(PromptType.prompt_answer_openllama.value),
                          PromptType.prompt_answer_openllama.name]:
@@ -309,7 +310,7 @@ Current Time: {}
         humanstr = prompt_tokens
         botstr = answer_tokens
         terminate_response = [humanstr, PreResponse, eos]
-        chat_sep = ''
         chat_turn_sep = eos
     elif prompt_type in [PromptType.open_assistant.value, str(PromptType.open_assistant.value),
                          PromptType.open_assistant.name]:
@@ -520,6 +521,67 @@ ASSISTANT:
             # normally LLM adds space after this, because was how trained.
             # if add space here, non-unique tokenization will often make LLM produce wrong output
             PreResponse = PreResponse
     else:
         raise RuntimeError("No such prompt_type=%s" % prompt_type)

         'gpt2',
         'distilgpt2',
         'mosaicml/mpt-7b-storywriter',
     ],
     'gptj': ['gptj', 'gpt4all_llama'],
     'prompt_answer': [
         'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2',
         'h2oai/h2ogpt-gm-oasst1-en-xgen-7b-8k',
         'h2oai/h2ogpt-gm-oasst1-multilang-xgen-7b-8k',
+        'TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2-GPTQ',
     ],
     'prompt_answer_openllama': [
         'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt',
         'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b',
         'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b',
     ],
+    'instruct': ['TheBloke/llama-30b-supercot-SuperHOT-8K-fp16'],  # https://huggingface.co/TheBloke/llama-30b-supercot-SuperHOT-8K-fp16#prompting
     'instruct_with_end': ['databricks/dolly-v2-12b'],
     'quality': [],
     'human_bot': [
     "wizard_mega": ['openaccess-ai-collective/wizard-mega-13b'],
     "instruct_simple": ['JosephusCheung/Guanaco'],
     "wizard_vicuna": ['ehartford/Wizard-Vicuna-13B-Uncensored'],
+    "wizard2": ['llama'],
+    "mptinstruct": ['mosaicml/mpt-30b-instruct', 'mosaicml/mpt-7b-instruct', 'mosaicml/mpt-30b-instruct'],
+    "mptchat": ['mosaicml/mpt-7b-chat', 'mosaicml/mpt-30b-chat', 'TheBloke/mpt-30B-chat-GGML'],
     "vicuna11": ['lmsys/vicuna-33b-v1.3'],
+    "falcon": ['tiiuae/falcon-40b-instruct', 'tiiuae/falcon-40b', 'tiiuae/falcon-7b-instruct', 'tiiuae/falcon-7b'],
     # could be plain, but default is correct prompt_type for default TheBloke model ggml-wizardLM-7B.q4_2.bin
 }
 if os.getenv('OPENAI_API_KEY'):
         humanstr = prompt_tokens
         botstr = answer_tokens
         terminate_response = [humanstr, PreResponse, eos]
+        chat_sep = eos
         chat_turn_sep = eos
     elif prompt_type in [PromptType.prompt_answer_openllama.value, str(PromptType.prompt_answer_openllama.value),
                          PromptType.prompt_answer_openllama.name]:
         humanstr = prompt_tokens
         botstr = answer_tokens
         terminate_response = [humanstr, PreResponse, eos]
+        chat_sep = eos
         chat_turn_sep = eos
     elif prompt_type in [PromptType.open_assistant.value, str(PromptType.open_assistant.value),
                          PromptType.open_assistant.name]:
             # normally LLM adds space after this, because was how trained.
             # if add space here, non-unique tokenization will often make LLM produce wrong output
             PreResponse = PreResponse
+    elif prompt_type in [PromptType.mptinstruct.value, str(PromptType.mptinstruct.value),
+                         PromptType.mptinstruct.name]:
+        # https://huggingface.co/mosaicml/mpt-30b-instruct#formatting
+        promptA = promptB = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n' if not (
+                chat and reduced) else ''
+        PreInstruct = """
+### Instruction
+"""
+        PreInput = """
+### Input
+"""
+        PreResponse = """
+### Response
+"""
+        terminate_response = None
+        chat_turn_sep = chat_sep = '\n'
+        humanstr = PreInstruct
+        botstr = PreResponse
+    elif prompt_type in [PromptType.mptchat.value, str(PromptType.mptchat.value),
+                         PromptType.mptchat.name]:
+        # https://huggingface.co/TheBloke/mpt-30B-chat-GGML#prompt-template
+        promptA = promptB = """<|im_start|>system\nA conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.\n<|im_end|>""" if not (
+                chat and reduced) else ''
+        PreInstruct = """<|im_start|>user
+"""
+        PreInput = None
+        PreResponse = """<|im_end|><|im_start|>assistant
+"""
+        terminate_response = ['<|im_end|>']
+        chat_sep = ''
+        chat_turn_sep = '<|im_end|>'
+        humanstr = PreInstruct
+        botstr = PreResponse
+    elif prompt_type in [PromptType.falcon.value, str(PromptType.falcon.value),
+                         PromptType.falcon.name]:
+        promptA = promptB = "" if not (chat and reduced) else ''
+        PreInstruct = """User: """
+        PreInput = None
+        PreResponse = """Assistant:"""
+        terminate_response = ['\nUser', "<|endoftext|>"]
+        chat_sep = '\n\n'
+        chat_turn_sep = '\n\n'
+        humanstr = PreInstruct
+        botstr = PreResponse
+        if making_context:
+            # when making context, want it to appear as-if LLM generated, which starts with space after :
+            PreResponse = 'Assistant: '
+        else:
+            # normally LLM adds space after this, because was how trained.
+            # if add space here, non-unique tokenization will often make LLM produce wrong output
+            PreResponse = PreResponse
+        # generates_leading_space = True
     else:
         raise RuntimeError("No such prompt_type=%s" % prompt_type)

requirements.txt CHANGED Viewed

@@ -6,7 +6,7 @@ huggingface_hub==0.15.1
 appdirs==1.4.4
 fire==0.5.0
 docutils==0.20.1
-torch==2.0.1
 evaluate==0.4.0
 rouge_score==0.1.2
 sacrebleu==2.3.1
@@ -19,7 +19,7 @@ matplotlib==3.7.1
 loralib==0.1.1
 bitsandbytes==0.39.0
 accelerate==0.20.3
-git+https://github.com/huggingface/peft.git@0b62b4378b4ce9367932c73540349da9a41bdea8
 transformers==4.30.2
 tokenizers==0.13.3
 APScheduler==3.10.1
@@ -45,8 +45,8 @@ pytest-xdist==3.2.1
 nltk==3.8.1
 textstat==0.7.3
 # pandoc==2.3
-#pypandoc==1.11
-pypandoc_binary==1.11
 openpyxl==3.1.2
 lm_dataformat==0.0.20
 bioc==2.0

 appdirs==1.4.4
 fire==0.5.0
 docutils==0.20.1
+torch==2.0.1; sys_platform != "darwin" and platform_machine != "arm64"
 evaluate==0.4.0
 rouge_score==0.1.2
 sacrebleu==2.3.1
 loralib==0.1.1
 bitsandbytes==0.39.0
 accelerate==0.20.3
+git+https://github.com/huggingface/peft.git@06fd06a4d2e8ed8c3a253c67d9c3cb23e0f497ad
 transformers==4.30.2
 tokenizers==0.13.3
 APScheduler==3.10.1
 nltk==3.8.1
 textstat==0.7.3
 # pandoc==2.3
+pypandoc==1.11; sys_platform == "darwin" and platform_machine == "arm64"
+pypandoc_binary==1.11; platform_machine == "x86_64"
 openpyxl==3.1.2
 lm_dataformat==0.0.20
 bioc==2.0

utils.py CHANGED Viewed

@@ -97,6 +97,8 @@ def get_device():
     import torch
     if torch.cuda.is_available():
         device = "cuda"
     else:
         device = "cpu"
@@ -138,7 +140,7 @@ def system_info():
         gpu_memory_frac_dict = {k: gpu_memory_free_dict[k] / gpu_memory_total_dict[k] for k in gpu_memory_total_dict}
         for k, v in gpu_memory_frac_dict.items():
             system[f'GPU_M/%s' % k] = v
-    except ModuleNotFoundError:
         pass
     system['hash'] = get_githash()
@@ -926,3 +928,60 @@ class FakeTokenizer:
     def __call__(self, x, *args, **kwargs):
         return self.encode(x, *args, **kwargs)

     import torch
     if torch.cuda.is_available():
         device = "cuda"
+    elif torch.backends.mps.is_built():
+        device = "mps"
     else:
         device = "cpu"
         gpu_memory_frac_dict = {k: gpu_memory_free_dict[k] / gpu_memory_total_dict[k] for k in gpu_memory_total_dict}
         for k, v in gpu_memory_frac_dict.items():
             system[f'GPU_M/%s' % k] = v
+    except (KeyError, ModuleNotFoundError):
         pass
     system['hash'] = get_githash()
     def __call__(self, x, *args, **kwargs):
         return self.encode(x, *args, **kwargs)
+def get_local_ip():
+    import socket
+    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+    try:
+        # doesn't even have to be reachable
+        s.connect(('10.255.255.255', 1))
+        IP = s.getsockname()[0]
+    except Exception:
+        IP = '127.0.0.1'
+    finally:
+        s.close()
+    return IP
+try:
+    assert pkg_resources.get_distribution('langchain') is not None
+    have_langchain = True
+except (pkg_resources.DistributionNotFound, AssertionError):
+    have_langchain = False
+import distutils.spawn
+have_tesseract = distutils.spawn.find_executable("tesseract")
+have_libreoffice = distutils.spawn.find_executable("libreoffice")
+import pkg_resources
+try:
+    assert pkg_resources.get_distribution('arxiv') is not None
+    assert pkg_resources.get_distribution('pymupdf') is not None
+    have_arxiv = True
+except (pkg_resources.DistributionNotFound, AssertionError):
+    have_arxiv = False
+try:
+    assert pkg_resources.get_distribution('pymupdf') is not None
+    have_pymupdf = True
+except (pkg_resources.DistributionNotFound, AssertionError):
+    have_pymupdf = False
+try:
+    assert pkg_resources.get_distribution('selenium') is not None
+    have_selenium = True
+except (pkg_resources.DistributionNotFound, AssertionError):
+    have_selenium = False
+try:
+    assert pkg_resources.get_distribution('playwright') is not None
+    have_playwright = True
+except (pkg_resources.DistributionNotFound, AssertionError):
+    have_playwright = False
+# disable, hangs too often
+have_playwright = False