andito
/

s2s

Inference Endpoints

Model card Files Files and versions Community

andito HF staff commited on Sep 19

Commit

3abafc4

•

1 Parent(s): f6f039f

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

handler.py +50 -34
test.py +57 -4

handler.py CHANGED Viewed

@@ -7,6 +7,7 @@ import numpy as np
 from queue import Queue, Empty
 import threading
 import base64
 class EndpointHandler:
     def __init__(self, path=""):
@@ -22,7 +23,7 @@ class EndpointHandler:
             self.parler_tts_handler_kwargs,
             self.melo_tts_handler_kwargs,
             self.chat_tts_handler_kwargs,
-        ) = get_default_arguments(mode='none', lm_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct', log_level='DEBUG')
         setup_logger(self.module_kwargs.log_level)
         prepare_all_args(
@@ -57,65 +58,80 @@ class EndpointHandler:
         # Add a new queue for collecting the final output
         self.final_output_queue = Queue()
-    def _collect_output(self):
         while True:
             try:
-                output = self.queues_and_events['send_audio_chunks_queue'].get(timeout=5)  # 2-second timeout
                 if isinstance(output, (str, bytes)) and output in (b"END", "END"):
-                    self.final_output_queue.put("END")
                     break
                 elif isinstance(output, np.ndarray):
-                    self.final_output_queue.put(output.tobytes())
                 else:
-                    self.final_output_queue.put(output)
             except Empty:
-                # If no output for 2 seconds, assume processing is complete
-                self.final_output_queue.put("END")
                 break
-    def __call__(self, data: Dict[str, Any]) -> Generator[Dict[str, Any], None, None]:
-        """
-        Args:
-            data (Dict[str, Any]): The input data containing the necessary arguments.
-        Returns:
-            Generator[Dict[str, Any], None, None]: A generator yielding output chunks from the model or pipeline.
-        """
-        # Start a thread to collect the final output
-        self.output_collector_thread = threading.Thread(target=self._collect_output)
-        self.output_collector_thread.start()
         input_type = data.get("input_type", "text")
         input_data = data.get("inputs", "")
         if input_type == "speech":
-            # Convert input audio data to numpy array
             audio_array = np.frombuffer(input_data, dtype=np.int16)
-            # Put audio data into the recv_audio_chunks_queue
             self.queues_and_events['recv_audio_chunks_queue'].put(audio_array.tobytes())
         elif input_type == "text":
-            # Put text data directly into the text_prompt_queue
             self.queues_and_events['text_prompt_queue'].put(input_data)
         else:
             raise ValueError(f"Unsupported input type: {input_type}")
-        # Collect all output chunks
-        output_chunks = []
-        while True:
-            chunk = self.final_output_queue.get()
-            if chunk == "END":
-                break
-            output_chunks.append(chunk)
-        # Combine all audio chunks into a single byte string
-        combined_audio = b''.join(output_chunks)
-        # Encode the combined audio as Base64
-        base64_audio = base64.b64encode(combined_audio).decode('utf-8')
-        return {"output": base64_audio}
     def cleanup(self):
         # Stop the pipeline

 from queue import Queue, Empty
 import threading
 import base64
+import uuid
 class EndpointHandler:
     def __init__(self, path=""):
             self.parler_tts_handler_kwargs,
             self.melo_tts_handler_kwargs,
             self.chat_tts_handler_kwargs,
+        ) = get_default_arguments(mode='none', log_level='DEBUG')
         setup_logger(self.module_kwargs.log_level)
         prepare_all_args(
         # Add a new queue for collecting the final output
         self.final_output_queue = Queue()
+        self.sessions = {}  # Store session information
+    def _collect_output(self, session_id):
         while True:
             try:
+                output = self.queues_and_events['send_audio_chunks_queue'].get(timeout=2)
                 if isinstance(output, (str, bytes)) and output in (b"END", "END"):
+                    self.sessions[session_id]['status'] = 'completed'
                     break
                 elif isinstance(output, np.ndarray):
+                    self.sessions[session_id]['chunks'].append(output.tobytes())
                 else:
+                    self.sessions[session_id]['chunks'].append(output)
             except Empty:
+                self.sessions[session_id]['status'] = 'completed'
                 break
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        request_type = data.get("request_type", "start")
+        if request_type == "start":
+            return self._handle_start_request(data)
+        elif request_type == "continue":
+            return self._handle_continue_request(data)
+        else:
+            raise ValueError(f"Unsupported request type: {request_type}")
+    def _handle_start_request(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        session_id = str(uuid.uuid4())
+        self.sessions[session_id] = {
+            'status': 'processing',
+            'chunks': [],
+            'last_sent_index': 0
+        }
         input_type = data.get("input_type", "text")
         input_data = data.get("inputs", "")
         if input_type == "speech":
             audio_array = np.frombuffer(input_data, dtype=np.int16)
             self.queues_and_events['recv_audio_chunks_queue'].put(audio_array.tobytes())
         elif input_type == "text":
             self.queues_and_events['text_prompt_queue'].put(input_data)
         else:
             raise ValueError(f"Unsupported input type: {input_type}")
+        # Start output collection in a separate thread
+        threading.Thread(target=self._collect_output, args=(session_id,)).start()
+        return {"session_id": session_id, "status": "processing"}
+    def _handle_continue_request(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        session_id = data.get("session_id")
+        if not session_id or session_id not in self.sessions:
+            raise ValueError("Invalid or missing session_id")
+        session = self.sessions[session_id]
+        chunks_to_send = session['chunks'][session['last_sent_index']:]
+        session['last_sent_index'] = len(session['chunks'])
+        if chunks_to_send:
+            combined_audio = b''.join(chunks_to_send)
+            base64_audio = base64.b64encode(combined_audio).decode('utf-8')
+            return {
+                "session_id": session_id,
+                "status": session['status'],
+                "output": base64_audio
+            }
+        else:
+            return {
+                "session_id": session_id,
+                "status": session['status'],
+                "output": None
+            }
     def cleanup(self):
         # Stop the pipeline

test.py CHANGED Viewed

@@ -1,7 +1,60 @@
 from handler import EndpointHandler
-endpoint = EndpointHandler('')
-for x in endpoint({'text': 'how are you?'}):
-    print('passed')
-    print(x)

 from handler import EndpointHandler
+import requests
+import base64
+import numpy as np
+import sounddevice as sd
+import time
+my_handler = EndpointHandler('')
+def play_audio(audio_data, sample_rate=16000):
+    sd.play(audio_data, sample_rate)
+    sd.wait()
+def stream_audio(session_id):
+    audio_chunks = []
+    while True:
+        continue_payload = {
+            "request_type": "continue",
+            "session_id": session_id
+        }
+        response = my_handler(continue_payload)
+        if response["status"] == "completed" and response["output"] is None:
+            break
+        if response["output"]:
+            audio_bytes = base64.b64decode(response["output"])
+            audio_np = np.frombuffer(audio_bytes, dtype=np.int16)
+            audio_chunks.append(audio_np)
+            # Play the chunk immediately (optional)
+            play_audio(audio_np)
+        time.sleep(0.01)  # Small delay to prevent overwhelming the server
+    return np.concatenate(audio_chunks) if audio_chunks else None
+# Test with text input
+text_payload = {
+    "request_type": "start",
+    "inputs": "Tell me a cool fact about Messi.",
+    "input_type": "text",
+}
+start_response = my_handler(text_payload)
+if "session_id" in start_response:
+    print(f"Session started. Session ID: {start_response['session_id']}")
+    print("Streaming audio response...")
+    full_audio = stream_audio(start_response['session_id'])
+    if full_audio is not None:
+        print("Received complete audio response. Playing...")
+    else:
+        print("No audio received.")
+else:
+    print("Error:", start_response)