OneFormerMentoria

Running

praeclarumjj3 commited on Nov 17, 2022

Commit

14f943f

•

1 Parent(s): 21d9813

Compile Worked, fix path data

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -31,32 +31,31 @@ RUN chmod -R 777 $WORKDIR
 COPY requirements.txt $WORKDIR/requirements.txt
 COPY . .
-RUN cat $WORKDIR/oneformer/data/bpe_simple_vocab_16e6.txt
-# RUN pip install --no-cache-dir --upgrade -r $WORKDIR/requirements.txt
-# ARG TORCH_CUDA_ARCH_LIST=7.5+PTX
-# RUN pip install ninja
-# USER root
-# RUN chown -R user:user /usr
-# RUN chmod -R 777 /usr
-# RUN chown -R user:user $HOME
-# RUN chmod -R 777 $HOME
-# RUN chown -R user:user $WORKDIR
-# RUN chmod -R 777 $WORKDIR
-# USER user
-# RUN ln -s $WORKDIR/oneformer/modeling/pixel_decoder/ops/ $WORKDIR/ && ls && cd ops/ && FORCE_CUDA=1 python setup.py build --build-base=$WORKDIR/ install --user && cd ..
-# RUN sh deform_setup.sh
-# USER user
-# RUN sh deform_setup.sh
-# USER user
-# EXPOSE 7860
-# ENTRYPOINT ["python", "gradio_app.py"]

 COPY requirements.txt $WORKDIR/requirements.txt
 COPY . .
+RUN pip install --no-cache-dir --upgrade -r $WORKDIR/requirements.txt
+ARG TORCH_CUDA_ARCH_LIST=7.5+PTX
+RUN pip install ninja
+USER root
+RUN chown -R user:user /usr
+RUN chmod -R 777 /usr
+RUN chown -R user:user $HOME
+RUN chmod -R 777 $HOME
+RUN chown -R user:user $WORKDIR
+RUN chmod -R 777 $WORKDIR
+USER user
+RUN ln -s $WORKDIR/oneformer/modeling/pixel_decoder/ops/ $WORKDIR/ && ls && cd ops/ && FORCE_CUDA=1 python setup.py build --build-base=$WORKDIR/ install --user && cd ..
+RUN sh deform_setup.sh
+USER user
+RUN sh deform_setup.sh
+USER user
+EXPOSE 7860
+ENTRYPOINT ["python", "gradio_app.py"]

oneformer/data/tokenizer.py CHANGED Viewed

@@ -24,7 +24,6 @@
 # Modified by Jiarui Xu
 # -------------------------------------------------------------------------
-import wget
 import gzip
 import html
 import os
@@ -37,9 +36,7 @@ import torch
 @lru_cache()
 def default_bpe():
-    url = 'https://github.com/SHI-Labs/OneFormer/blob/main/oneformer/data/bpe_simple_vocab_16e6.txt.gz'
-    wget.download(url, out=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'bpe_simple_vocab_16e6.txt.gz'))
-    return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'bpe_simple_vocab_16e6.txt.gz')
 @lru_cache()
 def bytes_to_unicode():
@@ -124,7 +121,15 @@ class SimpleTokenizer(object):
     def __init__(self, bpe_path: str = default_bpe()):
         self.byte_encoder = bytes_to_unicode()
         self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
-        merges = gzip.open(bpe_path).read().decode('utf-8').split('\n')
         merges = merges[1:49152 - 256 - 2 + 1]
         merges = [tuple(merge.split()) for merge in merges]
         vocab = list(bytes_to_unicode().values())

 # Modified by Jiarui Xu
 # -------------------------------------------------------------------------
 import gzip
 import html
 import os
 @lru_cache()
 def default_bpe():
+    return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'bpe_simple_vocab_16e6.txt')
 @lru_cache()
 def bytes_to_unicode():
     def __init__(self, bpe_path: str = default_bpe()):
         self.byte_encoder = bytes_to_unicode()
         self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
+        with open(bpe_path) as f:
+            contents = f.readlines()
+        merges = []
+        for cnt in contents:
+            merges.append(cnt.split('\n')[0])
+        merges.append("")
+        # merges = gzip.open(bpe_path).read().decode('utf-8').split('\n')
         merges = merges[1:49152 - 256 - 2 + 1]
         merges = [tuple(merge.split()) for merge in merges]
         vocab = list(bytes_to_unicode().values())