bishmoy commited on
Commit
777c2c7
1 Parent(s): 5628a76

Added experimental Arxiv Support

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +1 -1
  2. app.py +52 -17
  3. colbert/indexes/arxiv_colbert/0.codes.pt +1 -1
  4. colbert/indexes/arxiv_colbert/0.residuals.pt +1 -1
  5. colbert/indexes/arxiv_colbert/1.codes.pt +1 -1
  6. colbert/indexes/arxiv_colbert/1.residuals.pt +1 -1
  7. colbert/indexes/arxiv_colbert/10.codes.pt +2 -2
  8. colbert/indexes/arxiv_colbert/10.metadata.json +2 -2
  9. colbert/indexes/arxiv_colbert/10.residuals.pt +2 -2
  10. colbert/indexes/arxiv_colbert/11.codes.pt +2 -2
  11. colbert/indexes/arxiv_colbert/11.metadata.json +2 -2
  12. colbert/indexes/arxiv_colbert/11.residuals.pt +2 -2
  13. colbert/indexes/arxiv_colbert/12.codes.pt +2 -2
  14. colbert/indexes/arxiv_colbert/12.metadata.json +2 -2
  15. colbert/indexes/arxiv_colbert/12.residuals.pt +2 -2
  16. colbert/indexes/arxiv_colbert/13.codes.pt +2 -2
  17. colbert/indexes/arxiv_colbert/13.metadata.json +3 -3
  18. colbert/indexes/arxiv_colbert/13.residuals.pt +2 -2
  19. colbert/indexes/arxiv_colbert/2.codes.pt +2 -2
  20. colbert/indexes/arxiv_colbert/2.metadata.json +1 -1
  21. colbert/indexes/arxiv_colbert/2.residuals.pt +2 -2
  22. colbert/indexes/arxiv_colbert/3.codes.pt +1 -1
  23. colbert/indexes/arxiv_colbert/3.metadata.json +1 -1
  24. colbert/indexes/arxiv_colbert/3.residuals.pt +1 -1
  25. colbert/indexes/arxiv_colbert/4.codes.pt +1 -1
  26. colbert/indexes/arxiv_colbert/4.metadata.json +2 -2
  27. colbert/indexes/arxiv_colbert/4.residuals.pt +1 -1
  28. colbert/indexes/arxiv_colbert/5.codes.pt +2 -2
  29. colbert/indexes/arxiv_colbert/5.metadata.json +2 -2
  30. colbert/indexes/arxiv_colbert/5.residuals.pt +2 -2
  31. colbert/indexes/arxiv_colbert/6.codes.pt +2 -2
  32. colbert/indexes/arxiv_colbert/6.metadata.json +2 -2
  33. colbert/indexes/arxiv_colbert/6.residuals.pt +2 -2
  34. colbert/indexes/arxiv_colbert/7.codes.pt +2 -2
  35. colbert/indexes/arxiv_colbert/7.metadata.json +2 -2
  36. colbert/indexes/arxiv_colbert/7.residuals.pt +2 -2
  37. colbert/indexes/arxiv_colbert/8.codes.pt +2 -2
  38. colbert/indexes/arxiv_colbert/8.metadata.json +2 -2
  39. colbert/indexes/arxiv_colbert/8.residuals.pt +2 -2
  40. colbert/indexes/arxiv_colbert/9.codes.pt +2 -2
  41. colbert/indexes/arxiv_colbert/9.metadata.json +2 -2
  42. colbert/indexes/arxiv_colbert/9.residuals.pt +2 -2
  43. colbert/indexes/arxiv_colbert/buckets.pt +1 -1
  44. colbert/indexes/arxiv_colbert/centroids.pt +1 -1
  45. colbert/indexes/arxiv_colbert/collection.json +2 -2
  46. colbert/indexes/arxiv_colbert/docid_metadata_map.json +2 -2
  47. colbert/indexes/arxiv_colbert/doclens.10.json +0 -0
  48. colbert/indexes/arxiv_colbert/doclens.11.json +0 -0
  49. colbert/indexes/arxiv_colbert/doclens.12.json +0 -0
  50. colbert/indexes/arxiv_colbert/doclens.13.json +0 -0
README.md CHANGED
@@ -11,4 +11,4 @@ license: cc0-1.0
11
  ---
12
 
13
  ## Arxiv-CS-RAG
14
- Index Last Updated : 2024-03-03
 
11
  ---
12
 
13
  ## Arxiv-CS-RAG
14
+ Index Last Updated : 2024-03-10
app.py CHANGED
@@ -7,6 +7,9 @@ import re
7
  from datetime import datetime
8
  import json
9
 
 
 
 
10
  retrieve_results = 10
11
  show_examples = False
12
  llm_models_to_choose = ['mistralai/Mixtral-8x7B-Instruct-v0.1','mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None']
@@ -18,6 +21,7 @@ generate_kwargs = dict(
18
  do_sample = False,
19
  )
20
 
 
21
  RAG = RAGPretrainedModel.from_index("colbert/indexes/arxiv_colbert")
22
 
23
  try:
@@ -28,8 +32,9 @@ try:
28
  except:
29
  gr.Warning("Retriever not working!")
30
 
 
31
  mark_text = '# 🔍 Search Results\n'
32
- header_text = "# ArXivCS RAG \n"
33
 
34
  try:
35
  with open("README.md", "r") as f:
@@ -39,10 +44,24 @@ try:
39
  date = match.group().split(': ')[1]
40
  formatted_date = datetime.strptime(date, '%Y-%m-%d').strftime('%d %b %Y')
41
  header_text += f'Index Last Updated: {formatted_date}\n'
42
-
43
  except:
44
- pass
 
 
 
 
 
 
 
 
 
 
 
 
45
 
 
 
46
  if show_examples:
47
  with open("sample_outputs.json", "r") as f:
48
  sample_outputs = json.load(f)
@@ -91,27 +110,43 @@ with gr.Blocks(theme = gr.themes.Soft()) as demo:
91
  with gr.Row(equal_height = True):
92
  llm_model = gr.Dropdown(choices = llm_models_to_choose, value = 'mistralai/Mistral-7B-Instruct-v0.2', label = 'LLM Model')
93
  llm_results = gr.Slider(minimum=4, maximum=10, value=5, step=1, interactive=True, label="Top n results as context")
94
- stream_results = gr.Checkbox(value = True, label = "Stream output")
 
95
 
96
  output_text = gr.Textbox(show_label = True, container = True, label = 'LLM Answer', visible = True, placeholder = output_placeholder)
97
  input = gr.Textbox(show_label = False, visible = False)
98
  gr_md = gr.Markdown(mark_text + md_text_initial)
99
 
100
- def update_with_rag_md(message, llm_results_use = 5, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2'):
101
- rag_out = get_rag(message)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  md_text_updated = mark_text
103
  for i in range(retrieve_results):
104
  rag_answer = rag_out[i]
105
- title = rag_answer['document_metadata']['title'].replace('\n','')
106
-
107
- date = rag_answer['document_metadata']['_time']
108
- paper_title = f'''### {date} | [{title}](https://arxiv.org/abs/{rag_answer['document_id']}) | [⬇️](https://arxiv.org/pdf/{rag_answer['document_id']})\n'''
109
- paper_abs = rag_answer['content']
110
- authors = rag_answer['document_metadata']['authors'].replace('\n','')
111
- authors_formatted = f'*{authors}*' + ' \n\n'
112
-
113
- md_text_updated += paper_title + authors_formatted + paper_abs + '\n---------------\n'+ '\n'
114
- prompt = get_prompt_text(message, '\n\n'.join(rag_cleaner(out) for out in rag_out[:llm_results_use]), llm_model_picked = llm_model_picked)
115
  return md_text_updated, prompt
116
 
117
  def ask_llm(prompt, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2', stream_outputs = False):
@@ -144,6 +179,6 @@ with gr.Blocks(theme = gr.themes.Soft()) as demo:
144
  return stream
145
 
146
 
147
- msg.submit(update_with_rag_md, [msg, llm_results, llm_model], [gr_md, input]).success(ask_llm, [input, llm_model, stream_results], output_text)
148
 
149
  demo.queue().launch()
 
7
  from datetime import datetime
8
  import json
9
 
10
+ import arxiv
11
+ from utils import get_md_text_abstract, search_cleaner, get_arxiv_live_search
12
+
13
  retrieve_results = 10
14
  show_examples = False
15
  llm_models_to_choose = ['mistralai/Mixtral-8x7B-Instruct-v0.1','mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None']
 
21
  do_sample = False,
22
  )
23
 
24
+ ## RAG Model
25
  RAG = RAGPretrainedModel.from_index("colbert/indexes/arxiv_colbert")
26
 
27
  try:
 
32
  except:
33
  gr.Warning("Retriever not working!")
34
 
35
+ ## Header
36
  mark_text = '# 🔍 Search Results\n'
37
+ header_text = "# ArXiv CS RAG \n"
38
 
39
  try:
40
  with open("README.md", "r") as f:
 
44
  date = match.group().split(': ')[1]
45
  formatted_date = datetime.strptime(date, '%Y-%m-%d').strftime('%d %b %Y')
46
  header_text += f'Index Last Updated: {formatted_date}\n'
47
+ index_info = f"Semantic Search - up to {formatted_date}"
48
  except:
49
+ index_info = "Semantic Search"
50
+
51
+ database_choices = [index_info,'Arxiv Search - Latest - (EXPERIMENTAL)']
52
+
53
+ ## Arxiv API
54
+ arx_client = arxiv.Client()
55
+ is_arxiv_available = True
56
+ check_arxiv_result = get_arxiv_live_search("What is Mistral?", arx_client, retrieve_results)
57
+ if len(check_arxiv_result) == 0:
58
+ is_arxiv_available = False
59
+ print("Arxiv search not working, switching to default search ...")
60
+ database_choices = [index_info]
61
+
62
 
63
+
64
+ ## Show examples (disabled)
65
  if show_examples:
66
  with open("sample_outputs.json", "r") as f:
67
  sample_outputs = json.load(f)
 
110
  with gr.Row(equal_height = True):
111
  llm_model = gr.Dropdown(choices = llm_models_to_choose, value = 'mistralai/Mistral-7B-Instruct-v0.2', label = 'LLM Model')
112
  llm_results = gr.Slider(minimum=4, maximum=10, value=5, step=1, interactive=True, label="Top n results as context")
113
+ database_src = gr.Dropdown(choices = database_choices, value = index_info, label = 'Search Source')
114
+ stream_results = gr.Checkbox(value = True, label = "Stream output", visible = False)
115
 
116
  output_text = gr.Textbox(show_label = True, container = True, label = 'LLM Answer', visible = True, placeholder = output_placeholder)
117
  input = gr.Textbox(show_label = False, visible = False)
118
  gr_md = gr.Markdown(mark_text + md_text_initial)
119
 
120
+ def update_with_rag_md(message, llm_results_use = 5, database_choice = index_info, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2'):
121
+ prompt_text_from_data = ""
122
+ database_to_use = database_choice
123
+ if database_choice == index_info:
124
+ rag_out = get_rag(message)
125
+ else:
126
+ arxiv_search_success = True
127
+ try:
128
+ rag_out = get_arxiv_live_search(message, arx_client, retrieve_results)
129
+ if len(rag_out) == 0:
130
+ arxiv_search_success = False
131
+ except:
132
+ arxiv_search_success = False
133
+
134
+
135
+ if not arxiv_search_success:
136
+ gr.Warning("Arxiv Search not working, switching to semantic search ...")
137
+ rag_out = get_rag(message)
138
+ database_to_use = index_info
139
+
140
  md_text_updated = mark_text
141
  for i in range(retrieve_results):
142
  rag_answer = rag_out[i]
143
+ if i < llm_results_use:
144
+ md_text_paper, prompt_text = get_md_text_abstract(rag_answer, source = database_to_use, return_prompt_formatting = True)
145
+ prompt_text_from_data += f"{i+1}. {prompt_text}"
146
+ else:
147
+ md_text_paper = get_md_text_abstract(rag_answer, source = database_to_use)
148
+ md_text_updated += md_text_paper
149
+ prompt = get_prompt_text(message, prompt_text_from_data, llm_model_picked = llm_model_picked)
 
 
 
150
  return md_text_updated, prompt
151
 
152
  def ask_llm(prompt, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2', stream_outputs = False):
 
179
  return stream
180
 
181
 
182
+ msg.submit(update_with_rag_md, [msg, llm_results, database_src, llm_model], [gr_md, input]).success(ask_llm, [input, llm_model, stream_results], output_text)
183
 
184
  demo.queue().launch()
colbert/indexes/arxiv_colbert/0.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02b786be4899edd43c88a271b10d650c702685b0de0198fd35d11cf776711d12
3
  size 17224796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9233ec6d9d53438a7d87ac1427e363ce0021b9b5856d14ee02b05ec420482a34
3
  size 17224796
colbert/indexes/arxiv_colbert/0.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:596b8f4f2a014c4f32e404fd71ef9f06b68501fa6c978f09ea09256a212999ca
3
  size 137790512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a4fb10b5e65af293fd55722c825a34711e80173b928d56af051af2f7cf2c2a2
3
  size 137790512
colbert/indexes/arxiv_colbert/1.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f032bc6b476431e756f4ebbfbeb7fe41c50e3ae5c00f8d4ebdf2ec89ba57966
3
  size 18657244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00ebcc4851d6d9ebe0773b49edb01229a9b64ce01b5d21c7598d00166f13be18
3
  size 18657244
colbert/indexes/arxiv_colbert/1.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e35a3ef5f4327be903c8ba7d8e36321326b6372f7afc3e99f5dbc8851cd8253
3
  size 149249776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19e66224c0cd2080c5ae292121711c73e7b4b364c364fe32ac13b25eadc9a80c
3
  size 149249776
colbert/indexes/arxiv_colbert/10.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07855f999051c8cf69c84e70e5c98250cd36fed529bff2051aca48daa66ed21e
3
- size 21300641
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90cd30a8d65798482e301f84e5588129814b66dd3c03a41e1fecbe62261d530e
3
+ size 21302241
colbert/indexes/arxiv_colbert/10.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 250000,
3
  "num_passages": 25000,
4
- "num_embeddings": 5324869,
5
- "embedding_offset": 49558824
6
  }
 
1
  {
2
  "passage_offset": 250000,
3
  "num_passages": 25000,
4
+ "num_embeddings": 5325266,
5
+ "embedding_offset": 49559057
6
  }
colbert/indexes/arxiv_colbert/10.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f95236110cc436b548b255001cf1fd18b707bd2098db6cd77b4c5749e7b6c78c
3
- size 170396981
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e9425a008bfdf1583cf39a17822ecc3d9bfb5a23de64667bcd8c8dbe7399887
3
+ size 170409717
colbert/indexes/arxiv_colbert/11.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df3a9605cf26f8a94310238d4b3953ae915a7514732c1b2260b13b659e076ee3
3
- size 21343585
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f62a9a28737ccdcfe088d3e8cab11a347b8a4bc61ea9fe4614fb411b0c3c3aa
3
+ size 21343713
colbert/indexes/arxiv_colbert/11.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 275000,
3
  "num_passages": 25000,
4
- "num_embeddings": 5335612,
5
- "embedding_offset": 54883693
6
  }
 
1
  {
2
  "passage_offset": 275000,
3
  "num_passages": 25000,
4
+ "num_embeddings": 5335645,
5
+ "embedding_offset": 54884323
6
  }
colbert/indexes/arxiv_colbert/11.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10f19e52e503c4cbc514d0cc039b6700477bc1c986b26c6f5625978feda90353
3
- size 170740789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e25eee695d53dab6bc3e7b02abb63cd82ed14297d270119cc785beb825b672b5
3
+ size 170741813
colbert/indexes/arxiv_colbert/12.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e740d1ae131a2b18a355017a2c464f53e927750a299176eeb05281f236752de
3
- size 21517665
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241ed1eba6c00d899e24195658138dd4706621cffa4e93366fa023ca18de3f24
3
+ size 21518177
colbert/indexes/arxiv_colbert/12.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 300000,
3
  "num_passages": 25000,
4
- "num_embeddings": 5379134,
5
- "embedding_offset": 60219305
6
  }
 
1
  {
2
  "passage_offset": 300000,
3
  "num_passages": 25000,
4
+ "num_embeddings": 5379263,
5
+ "embedding_offset": 60219968
6
  }
colbert/indexes/arxiv_colbert/12.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6cf0ab13a172649ebac79caff4904b95f381ce1e92d9e7f983cda10005e1df6
3
- size 172133493
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d252697525846a8a3d04375376b60aa8249f5848a48810354694a7befbde47c
3
+ size 172137589
colbert/indexes/arxiv_colbert/13.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b993050459a61fb92c0dc83c13eca96b579baa076032dab5c96bd3f20cacc5c
3
- size 14703969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da50be3ade8c22b1d885e21c0255862c8b35d785a847f015caeb267d361429d6
3
+ size 16133153
colbert/indexes/arxiv_colbert/13.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 325000,
3
- "num_passages": 18284,
4
- "num_embeddings": 3675697,
5
- "embedding_offset": 65598439
6
  }
 
1
  {
2
  "passage_offset": 325000,
3
+ "num_passages": 19957,
4
+ "num_embeddings": 4033002,
5
+ "embedding_offset": 65599231
6
  }
colbert/indexes/arxiv_colbert/13.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74abf66bdbb8b8ca205bbf28ecd62c5c99fea5c9ffccdc86684c019e032c2acc
3
- size 117623477
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ddb758cc21cae775847adf144dc81f49d6deaa328cbb3a4b4decfc8dc55da85
3
+ size 129057269
colbert/indexes/arxiv_colbert/2.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb9c97c10f869355677d800cb3d1480991522736005813635f09a1657250b7d5
3
- size 19123036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a37f8390b58ace74df2176722ae65c31f1ba3986bc69f90b233d3fc8c32491
3
+ size 19122908
colbert/indexes/arxiv_colbert/2.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 50000,
3
  "num_passages": 25000,
4
- "num_embeddings": 4780476,
5
  "embedding_offset": 8969935
6
  }
 
1
  {
2
  "passage_offset": 50000,
3
  "num_passages": 25000,
4
+ "num_embeddings": 4780440,
5
  "embedding_offset": 8969935
6
  }
colbert/indexes/arxiv_colbert/2.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04c15e645fbcee5378246466d9fc206b491f3e0daad7e58d44682a450cb419c4
3
- size 152976432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ea1f7004b9dd497306bdca872fc181e803a937203253d38081a62ad27f2b2af
3
+ size 152975280
colbert/indexes/arxiv_colbert/3.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c3b7b142607749941de818debc51a9dc14757ad069c84e3dc796f644202f399
3
  size 19531484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:127a154192c8444def0e5f11b9ffb845e60947513cff266b6d399bb8e0939a6f
3
  size 19531484
colbert/indexes/arxiv_colbert/3.metadata.json CHANGED
@@ -2,5 +2,5 @@
2
  "passage_offset": 75000,
3
  "num_passages": 25000,
4
  "num_embeddings": 4882584,
5
- "embedding_offset": 13750411
6
  }
 
2
  "passage_offset": 75000,
3
  "num_passages": 25000,
4
  "num_embeddings": 4882584,
5
+ "embedding_offset": 13750375
6
  }
colbert/indexes/arxiv_colbert/3.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaaff7e6e361f26f8be33201d391d83f6a58491d8d236f8531e46e430a96c857
3
  size 156243888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e98eed69ae9b86900bed76837111982a91f80ba51be8d3ce0e10477df7cdaf67
3
  size 156243888
colbert/indexes/arxiv_colbert/4.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26b7424ec40d6aee6cd938daf31f0c6769b76e9c57f2d83f0d3876b0f331b524
3
  size 19962524
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:647e2266da43a402faac17f51c870b96b7c1e915c33758dc3cbf6bf6edbf96ae
3
  size 19962524
colbert/indexes/arxiv_colbert/4.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 100000,
3
  "num_passages": 25000,
4
- "num_embeddings": 4990348,
5
- "embedding_offset": 18632995
6
  }
 
1
  {
2
  "passage_offset": 100000,
3
  "num_passages": 25000,
4
+ "num_embeddings": 4990349,
5
+ "embedding_offset": 18632959
6
  }
colbert/indexes/arxiv_colbert/4.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c2b8a4b85ba90d2c4fc76bb6d0c75cf7222ee878088dd80d1d8a41f0560c612
3
  size 159692336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e428585dc9a4611abe8e789c3f34ac992d2f7398e9203b183a06d14179c07431
3
  size 159692336
colbert/indexes/arxiv_colbert/5.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6e121cf813e9c01dd1db42391f3ded109d2ff23eef42c0cfd2291a561ba3b15
3
- size 20303068
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6d3754ba1252b5943ff2502d84dc5857d43b281a4decab589a58d5052ab44e2
3
+ size 20303580
colbert/indexes/arxiv_colbert/5.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 125000,
3
  "num_passages": 25000,
4
- "num_embeddings": 5075480,
5
- "embedding_offset": 23623343
6
  }
 
1
  {
2
  "passage_offset": 125000,
3
  "num_passages": 25000,
4
+ "num_embeddings": 5075614,
5
+ "embedding_offset": 23623308
6
  }
colbert/indexes/arxiv_colbert/5.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe127677d3ac6f547045bc4e08c7d02f342ee0fb74b9930e28a4fb0def30906e
3
- size 162416560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84115a366b7cb6aaf6f971a916d064ac73e878a9cb63960190268e78807c3b7c
3
+ size 162420848
colbert/indexes/arxiv_colbert/6.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b4596a5628764e7a81c77e77831be5a63d18870c2060a311b30856e240b457b
3
- size 20472796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c2e2c32c99abc9191959eb90fdc3e251fca517c78efeddc499d402640e5fb2c
3
+ size 20473180
colbert/indexes/arxiv_colbert/6.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 150000,
3
  "num_passages": 25000,
4
- "num_embeddings": 5117915,
5
- "embedding_offset": 28698823
6
  }
 
1
  {
2
  "passage_offset": 150000,
3
  "num_passages": 25000,
4
+ "num_embeddings": 5118011,
5
+ "embedding_offset": 28698922
6
  }
colbert/indexes/arxiv_colbert/6.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1fa2fd34658f6099bdf17f5154c054adae2f6969e646d57a6785165f0739c8f
3
- size 163774448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36c449b807be2ee56d03a1a1d3bace6a7702b3682715f011c682712cd172b120
3
+ size 163777520
colbert/indexes/arxiv_colbert/7.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd442e5168eedc3762451461ae72c680d870e70a2e3a74328541d69137e14eb2
3
- size 20836892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c03f4544c82f93eda118eb8a4cce6bf9636a4bc05cb248b4bcbe3b4c08dc3e3f
3
+ size 20836636
colbert/indexes/arxiv_colbert/7.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 175000,
3
  "num_passages": 25000,
4
- "num_embeddings": 5208942,
5
- "embedding_offset": 33816738
6
  }
 
1
  {
2
  "passage_offset": 175000,
3
  "num_passages": 25000,
4
+ "num_embeddings": 5208871,
5
+ "embedding_offset": 33816933
6
  }
colbert/indexes/arxiv_colbert/7.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:089557df32c0678b171d6fa8cd077032be52ec1362952a2e402157edd39f12e4
3
- size 166687344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2692e7da6ced1b077c1338e2725085c140c84c6868e31a8c28a7924b7383cd57
3
+ size 166685040
colbert/indexes/arxiv_colbert/8.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:180f6b3a35464ca0c2911df592d817271afa30695c6fcc42745f7db24a6514fc
3
- size 21021340
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d621ba8459bf75fedc6c3800c57e01f6c94ecd067396c3a9f5dbcbd851a100c2
3
+ size 21021404
colbert/indexes/arxiv_colbert/8.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 200000,
3
  "num_passages": 25000,
4
- "num_embeddings": 5255043,
5
- "embedding_offset": 39025680
6
  }
 
1
  {
2
  "passage_offset": 200000,
3
  "num_passages": 25000,
4
+ "num_embeddings": 5255060,
5
+ "embedding_offset": 39025804
6
  }
colbert/indexes/arxiv_colbert/8.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d3b5159f26fec80325a56228f945d92bd15b27e1d3a704a2acd992075c0e71a
3
- size 168162544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d371e1d450e2092cc841c9aa2a3eba8927cc2e2f98867f52ae183a87d27247df
3
+ size 168163120
colbert/indexes/arxiv_colbert/9.codes.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be0ccfd90a5385296aa821d90f9ff15c78d7ef2ea2967adc14a1a75a171cb69a
3
- size 21113564
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a94be0d3ccf5606f49ca0c11f1007a028b14faaaeb77730ad65b1877f26a5e4f
3
+ size 21113948
colbert/indexes/arxiv_colbert/9.metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "passage_offset": 225000,
3
  "num_passages": 25000,
4
- "num_embeddings": 5278101,
5
- "embedding_offset": 44280723
6
  }
 
1
  {
2
  "passage_offset": 225000,
3
  "num_passages": 25000,
4
+ "num_embeddings": 5278193,
5
+ "embedding_offset": 44280864
6
  }
colbert/indexes/arxiv_colbert/9.residuals.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4a7d456fa97a60a90d0bb1c8d6caa05033364f6a5b75917ffd6bf7a85e3717a
3
- size 168900400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04f3d5d2d05dceb31430cdcaf7f92151140e72d05a669c463e2354fcea65c5b3
3
+ size 168903344
colbert/indexes/arxiv_colbert/buckets.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:804931d6454d91dbcd74a19b2345189aa49b5056fce6582f7ede479a9730f52c
3
  size 1432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a58ea162ec2bbbc1dd21141742cbc82dbfe3a13d1d14c44f64e1603ee27b51
3
  size 1432
colbert/indexes/arxiv_colbert/centroids.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bc89b1ea0d60711634d5ec75d28580a750a7e073732157bec3b5319456ac401
3
  size 33555622
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f8cc9ba855afe896e60ababdb0296303783440ba6505f6c9d533fd2bb165e2d
3
  size 33555622
colbert/indexes/arxiv_colbert/collection.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbda587dd17f68c76673cd326294004eee932d542fd927c043789fb61db65bba
3
- size 409810003
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:666e887d3dc9aa45fa575c60547a4d151822e840feca87f48d17740ac0891f89
3
+ size 411963323
colbert/indexes/arxiv_colbert/docid_metadata_map.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a61ea67a0fb6939795f31877e69713cb83db43ec64148726841bedcb5816d9c0
3
- size 84148235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76b05799713b16935bf43c6ac1f45bd466803c2cbc4201c367b09bc2ac0f4653
3
+ size 84578105
colbert/indexes/arxiv_colbert/doclens.10.json CHANGED
The diff for this file is too large to render. See raw diff
 
colbert/indexes/arxiv_colbert/doclens.11.json CHANGED
The diff for this file is too large to render. See raw diff
 
colbert/indexes/arxiv_colbert/doclens.12.json CHANGED
The diff for this file is too large to render. See raw diff
 
colbert/indexes/arxiv_colbert/doclens.13.json CHANGED
The diff for this file is too large to render. See raw diff