Spaces:
Running
Running
Added experimental Arxiv Support
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +1 -1
- app.py +52 -17
- colbert/indexes/arxiv_colbert/0.codes.pt +1 -1
- colbert/indexes/arxiv_colbert/0.residuals.pt +1 -1
- colbert/indexes/arxiv_colbert/1.codes.pt +1 -1
- colbert/indexes/arxiv_colbert/1.residuals.pt +1 -1
- colbert/indexes/arxiv_colbert/10.codes.pt +2 -2
- colbert/indexes/arxiv_colbert/10.metadata.json +2 -2
- colbert/indexes/arxiv_colbert/10.residuals.pt +2 -2
- colbert/indexes/arxiv_colbert/11.codes.pt +2 -2
- colbert/indexes/arxiv_colbert/11.metadata.json +2 -2
- colbert/indexes/arxiv_colbert/11.residuals.pt +2 -2
- colbert/indexes/arxiv_colbert/12.codes.pt +2 -2
- colbert/indexes/arxiv_colbert/12.metadata.json +2 -2
- colbert/indexes/arxiv_colbert/12.residuals.pt +2 -2
- colbert/indexes/arxiv_colbert/13.codes.pt +2 -2
- colbert/indexes/arxiv_colbert/13.metadata.json +3 -3
- colbert/indexes/arxiv_colbert/13.residuals.pt +2 -2
- colbert/indexes/arxiv_colbert/2.codes.pt +2 -2
- colbert/indexes/arxiv_colbert/2.metadata.json +1 -1
- colbert/indexes/arxiv_colbert/2.residuals.pt +2 -2
- colbert/indexes/arxiv_colbert/3.codes.pt +1 -1
- colbert/indexes/arxiv_colbert/3.metadata.json +1 -1
- colbert/indexes/arxiv_colbert/3.residuals.pt +1 -1
- colbert/indexes/arxiv_colbert/4.codes.pt +1 -1
- colbert/indexes/arxiv_colbert/4.metadata.json +2 -2
- colbert/indexes/arxiv_colbert/4.residuals.pt +1 -1
- colbert/indexes/arxiv_colbert/5.codes.pt +2 -2
- colbert/indexes/arxiv_colbert/5.metadata.json +2 -2
- colbert/indexes/arxiv_colbert/5.residuals.pt +2 -2
- colbert/indexes/arxiv_colbert/6.codes.pt +2 -2
- colbert/indexes/arxiv_colbert/6.metadata.json +2 -2
- colbert/indexes/arxiv_colbert/6.residuals.pt +2 -2
- colbert/indexes/arxiv_colbert/7.codes.pt +2 -2
- colbert/indexes/arxiv_colbert/7.metadata.json +2 -2
- colbert/indexes/arxiv_colbert/7.residuals.pt +2 -2
- colbert/indexes/arxiv_colbert/8.codes.pt +2 -2
- colbert/indexes/arxiv_colbert/8.metadata.json +2 -2
- colbert/indexes/arxiv_colbert/8.residuals.pt +2 -2
- colbert/indexes/arxiv_colbert/9.codes.pt +2 -2
- colbert/indexes/arxiv_colbert/9.metadata.json +2 -2
- colbert/indexes/arxiv_colbert/9.residuals.pt +2 -2
- colbert/indexes/arxiv_colbert/buckets.pt +1 -1
- colbert/indexes/arxiv_colbert/centroids.pt +1 -1
- colbert/indexes/arxiv_colbert/collection.json +2 -2
- colbert/indexes/arxiv_colbert/docid_metadata_map.json +2 -2
- colbert/indexes/arxiv_colbert/doclens.10.json +0 -0
- colbert/indexes/arxiv_colbert/doclens.11.json +0 -0
- colbert/indexes/arxiv_colbert/doclens.12.json +0 -0
- colbert/indexes/arxiv_colbert/doclens.13.json +0 -0
README.md
CHANGED
@@ -11,4 +11,4 @@ license: cc0-1.0
|
|
11 |
---
|
12 |
|
13 |
## Arxiv-CS-RAG
|
14 |
-
Index Last Updated : 2024-03-
|
|
|
11 |
---
|
12 |
|
13 |
## Arxiv-CS-RAG
|
14 |
+
Index Last Updated : 2024-03-10
|
app.py
CHANGED
@@ -7,6 +7,9 @@ import re
|
|
7 |
from datetime import datetime
|
8 |
import json
|
9 |
|
|
|
|
|
|
|
10 |
retrieve_results = 10
|
11 |
show_examples = False
|
12 |
llm_models_to_choose = ['mistralai/Mixtral-8x7B-Instruct-v0.1','mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None']
|
@@ -18,6 +21,7 @@ generate_kwargs = dict(
|
|
18 |
do_sample = False,
|
19 |
)
|
20 |
|
|
|
21 |
RAG = RAGPretrainedModel.from_index("colbert/indexes/arxiv_colbert")
|
22 |
|
23 |
try:
|
@@ -28,8 +32,9 @@ try:
|
|
28 |
except:
|
29 |
gr.Warning("Retriever not working!")
|
30 |
|
|
|
31 |
mark_text = '# 🔍 Search Results\n'
|
32 |
-
header_text = "#
|
33 |
|
34 |
try:
|
35 |
with open("README.md", "r") as f:
|
@@ -39,10 +44,24 @@ try:
|
|
39 |
date = match.group().split(': ')[1]
|
40 |
formatted_date = datetime.strptime(date, '%Y-%m-%d').strftime('%d %b %Y')
|
41 |
header_text += f'Index Last Updated: {formatted_date}\n'
|
42 |
-
|
43 |
except:
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
|
|
|
|
46 |
if show_examples:
|
47 |
with open("sample_outputs.json", "r") as f:
|
48 |
sample_outputs = json.load(f)
|
@@ -91,27 +110,43 @@ with gr.Blocks(theme = gr.themes.Soft()) as demo:
|
|
91 |
with gr.Row(equal_height = True):
|
92 |
llm_model = gr.Dropdown(choices = llm_models_to_choose, value = 'mistralai/Mistral-7B-Instruct-v0.2', label = 'LLM Model')
|
93 |
llm_results = gr.Slider(minimum=4, maximum=10, value=5, step=1, interactive=True, label="Top n results as context")
|
94 |
-
|
|
|
95 |
|
96 |
output_text = gr.Textbox(show_label = True, container = True, label = 'LLM Answer', visible = True, placeholder = output_placeholder)
|
97 |
input = gr.Textbox(show_label = False, visible = False)
|
98 |
gr_md = gr.Markdown(mark_text + md_text_initial)
|
99 |
|
100 |
-
def update_with_rag_md(message, llm_results_use = 5, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2'):
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
md_text_updated = mark_text
|
103 |
for i in range(retrieve_results):
|
104 |
rag_answer = rag_out[i]
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
md_text_updated += paper_title + authors_formatted + paper_abs + '\n---------------\n'+ '\n'
|
114 |
-
prompt = get_prompt_text(message, '\n\n'.join(rag_cleaner(out) for out in rag_out[:llm_results_use]), llm_model_picked = llm_model_picked)
|
115 |
return md_text_updated, prompt
|
116 |
|
117 |
def ask_llm(prompt, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2', stream_outputs = False):
|
@@ -144,6 +179,6 @@ with gr.Blocks(theme = gr.themes.Soft()) as demo:
|
|
144 |
return stream
|
145 |
|
146 |
|
147 |
-
msg.submit(update_with_rag_md, [msg, llm_results, llm_model], [gr_md, input]).success(ask_llm, [input, llm_model, stream_results], output_text)
|
148 |
|
149 |
demo.queue().launch()
|
|
|
7 |
from datetime import datetime
|
8 |
import json
|
9 |
|
10 |
+
import arxiv
|
11 |
+
from utils import get_md_text_abstract, search_cleaner, get_arxiv_live_search
|
12 |
+
|
13 |
retrieve_results = 10
|
14 |
show_examples = False
|
15 |
llm_models_to_choose = ['mistralai/Mixtral-8x7B-Instruct-v0.1','mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None']
|
|
|
21 |
do_sample = False,
|
22 |
)
|
23 |
|
24 |
+
## RAG Model
|
25 |
RAG = RAGPretrainedModel.from_index("colbert/indexes/arxiv_colbert")
|
26 |
|
27 |
try:
|
|
|
32 |
except:
|
33 |
gr.Warning("Retriever not working!")
|
34 |
|
35 |
+
## Header
|
36 |
mark_text = '# 🔍 Search Results\n'
|
37 |
+
header_text = "# ArXiv CS RAG \n"
|
38 |
|
39 |
try:
|
40 |
with open("README.md", "r") as f:
|
|
|
44 |
date = match.group().split(': ')[1]
|
45 |
formatted_date = datetime.strptime(date, '%Y-%m-%d').strftime('%d %b %Y')
|
46 |
header_text += f'Index Last Updated: {formatted_date}\n'
|
47 |
+
index_info = f"Semantic Search - up to {formatted_date}"
|
48 |
except:
|
49 |
+
index_info = "Semantic Search"
|
50 |
+
|
51 |
+
database_choices = [index_info,'Arxiv Search - Latest - (EXPERIMENTAL)']
|
52 |
+
|
53 |
+
## Arxiv API
|
54 |
+
arx_client = arxiv.Client()
|
55 |
+
is_arxiv_available = True
|
56 |
+
check_arxiv_result = get_arxiv_live_search("What is Mistral?", arx_client, retrieve_results)
|
57 |
+
if len(check_arxiv_result) == 0:
|
58 |
+
is_arxiv_available = False
|
59 |
+
print("Arxiv search not working, switching to default search ...")
|
60 |
+
database_choices = [index_info]
|
61 |
+
|
62 |
|
63 |
+
|
64 |
+
## Show examples (disabled)
|
65 |
if show_examples:
|
66 |
with open("sample_outputs.json", "r") as f:
|
67 |
sample_outputs = json.load(f)
|
|
|
110 |
with gr.Row(equal_height = True):
|
111 |
llm_model = gr.Dropdown(choices = llm_models_to_choose, value = 'mistralai/Mistral-7B-Instruct-v0.2', label = 'LLM Model')
|
112 |
llm_results = gr.Slider(minimum=4, maximum=10, value=5, step=1, interactive=True, label="Top n results as context")
|
113 |
+
database_src = gr.Dropdown(choices = database_choices, value = index_info, label = 'Search Source')
|
114 |
+
stream_results = gr.Checkbox(value = True, label = "Stream output", visible = False)
|
115 |
|
116 |
output_text = gr.Textbox(show_label = True, container = True, label = 'LLM Answer', visible = True, placeholder = output_placeholder)
|
117 |
input = gr.Textbox(show_label = False, visible = False)
|
118 |
gr_md = gr.Markdown(mark_text + md_text_initial)
|
119 |
|
120 |
+
def update_with_rag_md(message, llm_results_use = 5, database_choice = index_info, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2'):
|
121 |
+
prompt_text_from_data = ""
|
122 |
+
database_to_use = database_choice
|
123 |
+
if database_choice == index_info:
|
124 |
+
rag_out = get_rag(message)
|
125 |
+
else:
|
126 |
+
arxiv_search_success = True
|
127 |
+
try:
|
128 |
+
rag_out = get_arxiv_live_search(message, arx_client, retrieve_results)
|
129 |
+
if len(rag_out) == 0:
|
130 |
+
arxiv_search_success = False
|
131 |
+
except:
|
132 |
+
arxiv_search_success = False
|
133 |
+
|
134 |
+
|
135 |
+
if not arxiv_search_success:
|
136 |
+
gr.Warning("Arxiv Search not working, switching to semantic search ...")
|
137 |
+
rag_out = get_rag(message)
|
138 |
+
database_to_use = index_info
|
139 |
+
|
140 |
md_text_updated = mark_text
|
141 |
for i in range(retrieve_results):
|
142 |
rag_answer = rag_out[i]
|
143 |
+
if i < llm_results_use:
|
144 |
+
md_text_paper, prompt_text = get_md_text_abstract(rag_answer, source = database_to_use, return_prompt_formatting = True)
|
145 |
+
prompt_text_from_data += f"{i+1}. {prompt_text}"
|
146 |
+
else:
|
147 |
+
md_text_paper = get_md_text_abstract(rag_answer, source = database_to_use)
|
148 |
+
md_text_updated += md_text_paper
|
149 |
+
prompt = get_prompt_text(message, prompt_text_from_data, llm_model_picked = llm_model_picked)
|
|
|
|
|
|
|
150 |
return md_text_updated, prompt
|
151 |
|
152 |
def ask_llm(prompt, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2', stream_outputs = False):
|
|
|
179 |
return stream
|
180 |
|
181 |
|
182 |
+
msg.submit(update_with_rag_md, [msg, llm_results, database_src, llm_model], [gr_md, input]).success(ask_llm, [input, llm_model, stream_results], output_text)
|
183 |
|
184 |
demo.queue().launch()
|
colbert/indexes/arxiv_colbert/0.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17224796
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9233ec6d9d53438a7d87ac1427e363ce0021b9b5856d14ee02b05ec420482a34
|
3 |
size 17224796
|
colbert/indexes/arxiv_colbert/0.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 137790512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a4fb10b5e65af293fd55722c825a34711e80173b928d56af051af2f7cf2c2a2
|
3 |
size 137790512
|
colbert/indexes/arxiv_colbert/1.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18657244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00ebcc4851d6d9ebe0773b49edb01229a9b64ce01b5d21c7598d00166f13be18
|
3 |
size 18657244
|
colbert/indexes/arxiv_colbert/1.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 149249776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19e66224c0cd2080c5ae292121711c73e7b4b364c364fe32ac13b25eadc9a80c
|
3 |
size 149249776
|
colbert/indexes/arxiv_colbert/10.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90cd30a8d65798482e301f84e5588129814b66dd3c03a41e1fecbe62261d530e
|
3 |
+
size 21302241
|
colbert/indexes/arxiv_colbert/10.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 250000,
|
3 |
"num_passages": 25000,
|
4 |
-
"num_embeddings":
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 250000,
|
3 |
"num_passages": 25000,
|
4 |
+
"num_embeddings": 5325266,
|
5 |
+
"embedding_offset": 49559057
|
6 |
}
|
colbert/indexes/arxiv_colbert/10.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e9425a008bfdf1583cf39a17822ecc3d9bfb5a23de64667bcd8c8dbe7399887
|
3 |
+
size 170409717
|
colbert/indexes/arxiv_colbert/11.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f62a9a28737ccdcfe088d3e8cab11a347b8a4bc61ea9fe4614fb411b0c3c3aa
|
3 |
+
size 21343713
|
colbert/indexes/arxiv_colbert/11.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 275000,
|
3 |
"num_passages": 25000,
|
4 |
-
"num_embeddings":
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 275000,
|
3 |
"num_passages": 25000,
|
4 |
+
"num_embeddings": 5335645,
|
5 |
+
"embedding_offset": 54884323
|
6 |
}
|
colbert/indexes/arxiv_colbert/11.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e25eee695d53dab6bc3e7b02abb63cd82ed14297d270119cc785beb825b672b5
|
3 |
+
size 170741813
|
colbert/indexes/arxiv_colbert/12.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:241ed1eba6c00d899e24195658138dd4706621cffa4e93366fa023ca18de3f24
|
3 |
+
size 21518177
|
colbert/indexes/arxiv_colbert/12.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 300000,
|
3 |
"num_passages": 25000,
|
4 |
-
"num_embeddings":
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 300000,
|
3 |
"num_passages": 25000,
|
4 |
+
"num_embeddings": 5379263,
|
5 |
+
"embedding_offset": 60219968
|
6 |
}
|
colbert/indexes/arxiv_colbert/12.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d252697525846a8a3d04375376b60aa8249f5848a48810354694a7befbde47c
|
3 |
+
size 172137589
|
colbert/indexes/arxiv_colbert/13.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da50be3ade8c22b1d885e21c0255862c8b35d785a847f015caeb267d361429d6
|
3 |
+
size 16133153
|
colbert/indexes/arxiv_colbert/13.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 325000,
|
3 |
-
"num_passages":
|
4 |
-
"num_embeddings":
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 325000,
|
3 |
+
"num_passages": 19957,
|
4 |
+
"num_embeddings": 4033002,
|
5 |
+
"embedding_offset": 65599231
|
6 |
}
|
colbert/indexes/arxiv_colbert/13.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ddb758cc21cae775847adf144dc81f49d6deaa328cbb3a4b4decfc8dc55da85
|
3 |
+
size 129057269
|
colbert/indexes/arxiv_colbert/2.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2a37f8390b58ace74df2176722ae65c31f1ba3986bc69f90b233d3fc8c32491
|
3 |
+
size 19122908
|
colbert/indexes/arxiv_colbert/2.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 50000,
|
3 |
"num_passages": 25000,
|
4 |
-
"num_embeddings":
|
5 |
"embedding_offset": 8969935
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 50000,
|
3 |
"num_passages": 25000,
|
4 |
+
"num_embeddings": 4780440,
|
5 |
"embedding_offset": 8969935
|
6 |
}
|
colbert/indexes/arxiv_colbert/2.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ea1f7004b9dd497306bdca872fc181e803a937203253d38081a62ad27f2b2af
|
3 |
+
size 152975280
|
colbert/indexes/arxiv_colbert/3.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 19531484
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:127a154192c8444def0e5f11b9ffb845e60947513cff266b6d399bb8e0939a6f
|
3 |
size 19531484
|
colbert/indexes/arxiv_colbert/3.metadata.json
CHANGED
@@ -2,5 +2,5 @@
|
|
2 |
"passage_offset": 75000,
|
3 |
"num_passages": 25000,
|
4 |
"num_embeddings": 4882584,
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
2 |
"passage_offset": 75000,
|
3 |
"num_passages": 25000,
|
4 |
"num_embeddings": 4882584,
|
5 |
+
"embedding_offset": 13750375
|
6 |
}
|
colbert/indexes/arxiv_colbert/3.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 156243888
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e98eed69ae9b86900bed76837111982a91f80ba51be8d3ce0e10477df7cdaf67
|
3 |
size 156243888
|
colbert/indexes/arxiv_colbert/4.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 19962524
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:647e2266da43a402faac17f51c870b96b7c1e915c33758dc3cbf6bf6edbf96ae
|
3 |
size 19962524
|
colbert/indexes/arxiv_colbert/4.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 100000,
|
3 |
"num_passages": 25000,
|
4 |
-
"num_embeddings":
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 100000,
|
3 |
"num_passages": 25000,
|
4 |
+
"num_embeddings": 4990349,
|
5 |
+
"embedding_offset": 18632959
|
6 |
}
|
colbert/indexes/arxiv_colbert/4.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 159692336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e428585dc9a4611abe8e789c3f34ac992d2f7398e9203b183a06d14179c07431
|
3 |
size 159692336
|
colbert/indexes/arxiv_colbert/5.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6d3754ba1252b5943ff2502d84dc5857d43b281a4decab589a58d5052ab44e2
|
3 |
+
size 20303580
|
colbert/indexes/arxiv_colbert/5.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 125000,
|
3 |
"num_passages": 25000,
|
4 |
-
"num_embeddings":
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 125000,
|
3 |
"num_passages": 25000,
|
4 |
+
"num_embeddings": 5075614,
|
5 |
+
"embedding_offset": 23623308
|
6 |
}
|
colbert/indexes/arxiv_colbert/5.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84115a366b7cb6aaf6f971a916d064ac73e878a9cb63960190268e78807c3b7c
|
3 |
+
size 162420848
|
colbert/indexes/arxiv_colbert/6.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c2e2c32c99abc9191959eb90fdc3e251fca517c78efeddc499d402640e5fb2c
|
3 |
+
size 20473180
|
colbert/indexes/arxiv_colbert/6.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 150000,
|
3 |
"num_passages": 25000,
|
4 |
-
"num_embeddings":
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 150000,
|
3 |
"num_passages": 25000,
|
4 |
+
"num_embeddings": 5118011,
|
5 |
+
"embedding_offset": 28698922
|
6 |
}
|
colbert/indexes/arxiv_colbert/6.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36c449b807be2ee56d03a1a1d3bace6a7702b3682715f011c682712cd172b120
|
3 |
+
size 163777520
|
colbert/indexes/arxiv_colbert/7.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c03f4544c82f93eda118eb8a4cce6bf9636a4bc05cb248b4bcbe3b4c08dc3e3f
|
3 |
+
size 20836636
|
colbert/indexes/arxiv_colbert/7.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 175000,
|
3 |
"num_passages": 25000,
|
4 |
-
"num_embeddings":
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 175000,
|
3 |
"num_passages": 25000,
|
4 |
+
"num_embeddings": 5208871,
|
5 |
+
"embedding_offset": 33816933
|
6 |
}
|
colbert/indexes/arxiv_colbert/7.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2692e7da6ced1b077c1338e2725085c140c84c6868e31a8c28a7924b7383cd57
|
3 |
+
size 166685040
|
colbert/indexes/arxiv_colbert/8.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d621ba8459bf75fedc6c3800c57e01f6c94ecd067396c3a9f5dbcbd851a100c2
|
3 |
+
size 21021404
|
colbert/indexes/arxiv_colbert/8.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 200000,
|
3 |
"num_passages": 25000,
|
4 |
-
"num_embeddings":
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 200000,
|
3 |
"num_passages": 25000,
|
4 |
+
"num_embeddings": 5255060,
|
5 |
+
"embedding_offset": 39025804
|
6 |
}
|
colbert/indexes/arxiv_colbert/8.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d371e1d450e2092cc841c9aa2a3eba8927cc2e2f98867f52ae183a87d27247df
|
3 |
+
size 168163120
|
colbert/indexes/arxiv_colbert/9.codes.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a94be0d3ccf5606f49ca0c11f1007a028b14faaaeb77730ad65b1877f26a5e4f
|
3 |
+
size 21113948
|
colbert/indexes/arxiv_colbert/9.metadata.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"passage_offset": 225000,
|
3 |
"num_passages": 25000,
|
4 |
-
"num_embeddings":
|
5 |
-
"embedding_offset":
|
6 |
}
|
|
|
1 |
{
|
2 |
"passage_offset": 225000,
|
3 |
"num_passages": 25000,
|
4 |
+
"num_embeddings": 5278193,
|
5 |
+
"embedding_offset": 44280864
|
6 |
}
|
colbert/indexes/arxiv_colbert/9.residuals.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04f3d5d2d05dceb31430cdcaf7f92151140e72d05a669c463e2354fcea65c5b3
|
3 |
+
size 168903344
|
colbert/indexes/arxiv_colbert/buckets.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17a58ea162ec2bbbc1dd21141742cbc82dbfe3a13d1d14c44f64e1603ee27b51
|
3 |
size 1432
|
colbert/indexes/arxiv_colbert/centroids.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 33555622
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f8cc9ba855afe896e60ababdb0296303783440ba6505f6c9d533fd2bb165e2d
|
3 |
size 33555622
|
colbert/indexes/arxiv_colbert/collection.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:666e887d3dc9aa45fa575c60547a4d151822e840feca87f48d17740ac0891f89
|
3 |
+
size 411963323
|
colbert/indexes/arxiv_colbert/docid_metadata_map.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76b05799713b16935bf43c6ac1f45bd466803c2cbc4201c367b09bc2ac0f4653
|
3 |
+
size 84578105
|
colbert/indexes/arxiv_colbert/doclens.10.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
colbert/indexes/arxiv_colbert/doclens.11.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
colbert/indexes/arxiv_colbert/doclens.12.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
colbert/indexes/arxiv_colbert/doclens.13.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|