Kohaku-Blueleaf commited on
Commit
293494b
1 Parent(s): 7af4f26

flash attn installation in runtime

Browse files
Files changed (2) hide show
  1. app.py +8 -0
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,3 +1,11 @@
 
 
 
 
 
 
 
 
1
  import os
2
  from time import time_ns
3
 
 
1
+ import subprocess
2
+
3
+ subprocess.run(
4
+ "pip install flash-attn --no-build-isolation",
5
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
6
+ shell=True,
7
+ )
8
+
9
  import os
10
  from time import time_ns
11
 
requirements.txt CHANGED
@@ -4,5 +4,4 @@ llama-cpp-python
4
  gradio
5
  requests
6
  sentencepiece
7
- spaces
8
- https://github.com/tridao/flash-attention-wheels/releases/download/v2.3.5.post7/flash_attn_wheels_test-2.3.5.post7+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 
4
  gradio
5
  requests
6
  sentencepiece
7
+ spaces