ccm commited on
Commit
c6cda2e
β€’
1 Parent(s): 5837337

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +41 -0
main.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio
2
+ import numpy
3
+ import pandas
4
+ import sentence_transformers
5
+ import datasets
6
+ import faiss
7
+
8
+ model = sentence_transformers.SentenceTransformer('allenai-specter')
9
+
10
+ data = datasets.load_dataset("ccm/publications")['train'].to_pandas()
11
+
12
+ dimensionality = len(data['embedding'][0])
13
+ index = faiss.IndexFlatL2(dimensionality)
14
+
15
+ vectors = numpy.stack(data['embedding'].to_list(), axis=0)
16
+
17
+ index.add(vectors)
18
+
19
+ def search(query):
20
+ k=5
21
+ query = numpy.expand_dims(model.encode(query), axis=0)
22
+ _, I = top_five = index.search(query, k)
23
+ top_five = data.loc[I[0]]
24
+ search_results = ""
25
+
26
+ for i in range(k):
27
+ search_results += str(i+1) + ". "
28
+ search_results += '"' + top_five["bibtex"].values[i]["title"] + '" '
29
+ search_results += top_five["bibtex"].values[i]["citation"]
30
+ if top_five["pub_url"].values[i] is not None:
31
+ search_results += " [Full Paper](" + top_five["pub_url"].values[i] + ")"
32
+ search_results += "\n"
33
+ return search_results
34
+
35
+ with gradio.Blocks() as demo:
36
+ query = gradio.Textbox(placeholder="Enter search terms...")
37
+ btn = gradio.Button("Search")
38
+ results = gradio.Markdown()
39
+ btn.click(fn=search, inputs=[query], outputs=results)
40
+
41
+ demo.launch(debug=True)