using UnityEngine; using Unity.Sentis; using UnityEngine.Video; using UnityEngine.UI; using System.IO; using Lays = Unity.Sentis.Layers; using System.Collections.Generic; /* * Hand Landmarks Inference * ======================== * * Basic inference script for blaze hand landmarks * * Put this script on the Main Camera * Drag the sentis file onto the modelAsset field * Create a RawImage of in the scene * Put a link to that image in previewUI * Put a video in Assets/StreamingAssets folder and put the name of it int videoName * Or put a test image in inputImage * Set inputType to appropriate input */ public class RunHandLandmark : MonoBehaviour { //Draw the *.sentis or *.onnx model asset here: public ModelAsset asset; string modelName = "hand_landmark.sentis"; //Drag a link to a raw image here: public RawImage previewUI = null; // Put your bounding box sprite image here public Sprite boxSprite; // 6 optional sprite images (left eye, right eye, nose, mouth, left ear, right ear) public Sprite[] markerTextures; public string videoName = "chatting.mp4"; public Texture2D inputImage; public InputType inputType = InputType.Video; //Resolution of preview image or video Vector2Int resolution = new Vector2Int(640, 640); WebCamTexture webcam; VideoPlayer video; const BackendType backend = BackendType.GPUCompute; RenderTexture targetTexture; public enum InputType { Image, Video, Webcam }; IWorker worker; //Holds image size const int size = 224; Model model; //webcam device name: const string deviceName = ""; bool closing = false; public struct BoundingBox { public float centerX; public float centerY; public float width; public float height; } List boxPool = new(); void Start() { //(Note: if using a webcam on mobile get permissions here first) targetTexture = new RenderTexture(resolution.x, resolution.y, 0); previewUI.texture = targetTexture; SetupInput(); SetupModel(); SetupEngine(); } void SetupModel() { model = ModelLoader.Load(asset); //model = ModelLoader.Load(Path.Join(Application.streamingAssetsPath ,modelName)); } public void SetupEngine() { worker = WorkerFactory.CreateWorker(backend, model); } void SetupInput() { switch (inputType) { case InputType.Webcam: { webcam = new WebCamTexture(deviceName, resolution.x, resolution.y); webcam.requestedFPS = 30; webcam.Play(); break; } case InputType.Video: { video = gameObject.AddComponent();//new VideoPlayer(); video.renderMode = VideoRenderMode.APIOnly; video.source = VideoSource.Url; video.url = Application.streamingAssetsPath + "/"+videoName; video.isLooping = true; video.Play(); break; } default: { Graphics.Blit(inputImage, targetTexture); } break; } } void Update() { if (inputType == InputType.Webcam) { // Format video input if (!webcam.didUpdateThisFrame) return; var aspect1 = (float)webcam.width / webcam.height; var aspect2 = (float)resolution.x / resolution.y; var gap = aspect2 / aspect1; var vflip = webcam.videoVerticallyMirrored; var scale = new Vector2(gap, vflip ? -1 : 1); var offset = new Vector2((1 - gap) / 2, vflip ? 1 : 0); Graphics.Blit(webcam, targetTexture, scale, offset); } if (inputType == InputType.Video) { var aspect1 = (float)video.width / video.height; var aspect2 = (float)resolution.x / resolution.y; var gap = aspect2 / aspect1; var vflip = false; var scale = new Vector2(gap, vflip ? -1 : 1); var offset = new Vector2((1 - gap) / 2, vflip ? 1 : 0); Graphics.Blit(video.texture, targetTexture, scale, offset); } if (inputType == InputType.Image) { Graphics.Blit(inputImage, targetTexture); } if (Input.GetKeyDown(KeyCode.Escape)) { closing = true; Application.Quit(); } if (Input.GetKeyDown(KeyCode.P)) { previewUI.enabled = !previewUI.enabled; } } void LateUpdate() { if (!closing) { RunInference(targetTexture); } } void DrawLandmarks(TensorFloat landmarks, Vector2 scale) { //Draw the landmarks on the hand for (int j = 0; j < 21; j++) { var marker = new BoundingBox { centerX = landmarks[0, j * 3] * scale.x - (size / 2) * scale.x, centerY = landmarks[0, j * 3 + 1] * scale.y - (size/2) * scale.y, width = 8f * scale.x, height = 8f * scale.y, }; DrawBox(marker, j < markerTextures.Length ? markerTextures[j] : boxSprite, j); } } void RunInference(Texture source) { var transform = new TextureTransform(); transform.SetDimensions(size, size, 3); transform.SetTensorLayout(0, 1, 2, 3); using var image = TextureConverter.ToTensor(source, transform); worker.Execute(image); using var landmarks = worker.PeekOutput("Identity") as TensorFloat; ClearAnnotations(); Vector2 markerScale = previewUI.rectTransform.rect.size/ size; landmarks.CompleteOperationsAndDownload(); DrawLandmarks(landmarks, markerScale); bool showExtraInformation = false; if (showExtraInformation) { using var A = worker.PeekOutput("Identity_1") as TensorFloat; using var B = worker.PeekOutput("Identity_2") as TensorFloat; A.CompleteOperationsAndDownload(); B.CompleteOperationsAndDownload(); Debug.Log("A,B=" + A[0, 0] + "," + B[0, 0]); } } public void DrawBox(BoundingBox box, Sprite sprite, int ID) { GameObject panel = null; if (ID >= boxPool.Count) { panel = new GameObject("landmark"); panel.AddComponent(); panel.AddComponent(); panel.transform.SetParent(previewUI.transform, false); boxPool.Add(panel); } else { panel = boxPool[ID]; panel.SetActive(true); } var img = panel.GetComponent(); img.color = Color.white; img.sprite = sprite; img.type = Image.Type.Sliced; panel.transform.localPosition = new Vector3(box.centerX, -box.centerY); RectTransform rt = panel.GetComponent(); rt.sizeDelta = new Vector2(box.width, box.height); } public void ClearAnnotations() { for (int i = 0; i < boxPool.Count; i++) { boxPool[i].SetActive(false); } } void CleanUp() { closing = true; if (webcam) Destroy(webcam); if (video) Destroy(video); RenderTexture.active = null; targetTexture.Release(); worker?.Dispose(); worker = null; } void OnDestroy() { CleanUp(); } }