praeclarumjj3 commited on
Commit
14f943f
1 Parent(s): 21d9813

Compile Worked, fix path data

Browse files
Files changed (2) hide show
  1. Dockerfile +18 -19
  2. oneformer/data/tokenizer.py +10 -5
Dockerfile CHANGED
@@ -31,32 +31,31 @@ RUN chmod -R 777 $WORKDIR
31
 
32
  COPY requirements.txt $WORKDIR/requirements.txt
33
  COPY . .
34
- RUN cat $WORKDIR/oneformer/data/bpe_simple_vocab_16e6.txt
35
 
36
- # RUN pip install --no-cache-dir --upgrade -r $WORKDIR/requirements.txt
37
 
38
- # ARG TORCH_CUDA_ARCH_LIST=7.5+PTX
39
 
40
 
41
- # RUN pip install ninja
42
 
43
- # USER root
44
- # RUN chown -R user:user /usr
45
- # RUN chmod -R 777 /usr
46
- # RUN chown -R user:user $HOME
47
- # RUN chmod -R 777 $HOME
48
- # RUN chown -R user:user $WORKDIR
49
- # RUN chmod -R 777 $WORKDIR
50
 
51
- # USER user
52
- # RUN ln -s $WORKDIR/oneformer/modeling/pixel_decoder/ops/ $WORKDIR/ && ls && cd ops/ && FORCE_CUDA=1 python setup.py build --build-base=$WORKDIR/ install --user && cd ..
53
- # RUN sh deform_setup.sh
54
 
55
- # USER user
56
- # RUN sh deform_setup.sh
57
 
58
- # USER user
59
 
60
- # EXPOSE 7860
61
 
62
- # ENTRYPOINT ["python", "gradio_app.py"]
 
31
 
32
  COPY requirements.txt $WORKDIR/requirements.txt
33
  COPY . .
 
34
 
35
+ RUN pip install --no-cache-dir --upgrade -r $WORKDIR/requirements.txt
36
 
37
+ ARG TORCH_CUDA_ARCH_LIST=7.5+PTX
38
 
39
 
40
+ RUN pip install ninja
41
 
42
+ USER root
43
+ RUN chown -R user:user /usr
44
+ RUN chmod -R 777 /usr
45
+ RUN chown -R user:user $HOME
46
+ RUN chmod -R 777 $HOME
47
+ RUN chown -R user:user $WORKDIR
48
+ RUN chmod -R 777 $WORKDIR
49
 
50
+ USER user
51
+ RUN ln -s $WORKDIR/oneformer/modeling/pixel_decoder/ops/ $WORKDIR/ && ls && cd ops/ && FORCE_CUDA=1 python setup.py build --build-base=$WORKDIR/ install --user && cd ..
52
+ RUN sh deform_setup.sh
53
 
54
+ USER user
55
+ RUN sh deform_setup.sh
56
 
57
+ USER user
58
 
59
+ EXPOSE 7860
60
 
61
+ ENTRYPOINT ["python", "gradio_app.py"]
oneformer/data/tokenizer.py CHANGED
@@ -24,7 +24,6 @@
24
  # Modified by Jiarui Xu
25
  # -------------------------------------------------------------------------
26
 
27
- import wget
28
  import gzip
29
  import html
30
  import os
@@ -37,9 +36,7 @@ import torch
37
 
38
  @lru_cache()
39
  def default_bpe():
40
- url = 'https://github.com/SHI-Labs/OneFormer/blob/main/oneformer/data/bpe_simple_vocab_16e6.txt.gz'
41
- wget.download(url, out=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'bpe_simple_vocab_16e6.txt.gz'))
42
- return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'bpe_simple_vocab_16e6.txt.gz')
43
 
44
  @lru_cache()
45
  def bytes_to_unicode():
@@ -124,7 +121,15 @@ class SimpleTokenizer(object):
124
  def __init__(self, bpe_path: str = default_bpe()):
125
  self.byte_encoder = bytes_to_unicode()
126
  self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
127
- merges = gzip.open(bpe_path).read().decode('utf-8').split('\n')
 
 
 
 
 
 
 
 
128
  merges = merges[1:49152 - 256 - 2 + 1]
129
  merges = [tuple(merge.split()) for merge in merges]
130
  vocab = list(bytes_to_unicode().values())
 
24
  # Modified by Jiarui Xu
25
  # -------------------------------------------------------------------------
26
 
 
27
  import gzip
28
  import html
29
  import os
 
36
 
37
  @lru_cache()
38
  def default_bpe():
39
+ return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'bpe_simple_vocab_16e6.txt')
 
 
40
 
41
  @lru_cache()
42
  def bytes_to_unicode():
 
121
  def __init__(self, bpe_path: str = default_bpe()):
122
  self.byte_encoder = bytes_to_unicode()
123
  self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
124
+
125
+ with open(bpe_path) as f:
126
+ contents = f.readlines()
127
+ merges = []
128
+ for cnt in contents:
129
+ merges.append(cnt.split('\n')[0])
130
+ merges.append("")
131
+
132
+ # merges = gzip.open(bpe_path).read().decode('utf-8').split('\n')
133
  merges = merges[1:49152 - 256 - 2 + 1]
134
  merges = [tuple(merge.split()) for merge in merges]
135
  vocab = list(bytes_to_unicode().values())