init_screenshot_project

2 months ago · 9fd5385071
--- a/+ 21
+++ b/+ 21
@ -0,0 +1,21 @@
 MIT License

 Copyright (c) 2024 EzraZephyr

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/image/cropped_Left_1725688066.jpg
+++ b/image/cropped_Left_1725688066.jpg
--- a/main.py
+++ b/main.py
--- a/utils_en/GUI.py
+++ b/utils_en/GUI.py
@ -0,0 +1,108 @@
 import cv2
 import tkinter as tk
 from tkinter import filedialog, messagebox
 from video_recognition import start_camera, upload_and_process_video, show_frame
 from process_images import HandGestureProcessor

 current_mode = None
 current_cap = None
 # To track the current mode and camera resources

 def create_gui():
    root = tk.Tk()
    root.title("Gesture Recognition")
    root.geometry("800x600")

    canvas = tk.Canvas(root, width=640, height=480)
    canvas.pack()
    # Create a canvas to display video content

    camera_button = tk.Button(
        root,
        text="Use Camera for Real-time Recognition",
        command=lambda: switch_to_camera(canvas)
    )
    camera_button.pack(pady=10)
    # Button to start real-time recognition using the camera

    video_button = tk.Button(
        root,
        text="Upload Video File for Processing",
        command=lambda: select_and_process_video(canvas, root)
    )
    video_button.pack(pady=10)
    # Button to upload and process video files

    root.mainloop()

 def switch_to_camera(canvas):
    global current_mode, current_cap

    stop_current_operation()
    # Stop the current operation and release the camera

    current_mode = "camera"
    canvas.delete("all")
    # Set the current mode to camera and clear the Canvas

    current_cap = cv2.VideoCapture(0)
    if not current_cap.isOpened():
        messagebox.showerror("Error", "Cannot open camera")
        current_mode = None
        return
    # Start the camera

    start_camera(canvas, current_cap)
    # Pass the canvas and current_cap to start the camera

 def select_and_process_video(canvas, root):
    global current_mode, current_cap

    stop_current_operation()
    current_mode = "video"
    canvas.delete("all")

    video_path = filedialog.askopenfilename(
        title="Select a Video File",
        filetypes=(("MP4 files", "*.mp4"), ("AVI files", "*.avi"), ("All files", "*.*"))
    )
    # Select a video file

    if video_path:
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            messagebox.showerror("Error", "Cannot open video file")
            return
        # Get video width and height, and adjust Canvas size

        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        cap.release()
        canvas.config(width=frame_width, height=frame_height)
        root.geometry(f"{frame_width + 160}x{frame_height + 200}")  # Adjust window size
        # Get video dimensions and dynamically adjust the canvas size

        error_message = upload_and_process_video(canvas, video_path)
        if error_message:
            messagebox.showerror("Error", error_message)
        # Upload and process the video file

 def stop_current_operation():
    global current_cap

    if current_cap and current_cap.isOpened():
        current_cap.release()
        cv2.destroyAllWindows()
        current_cap = None
        # Stop the current operation, release camera resources, and close all windows

 def start_camera(canvas, cap):
    if not cap.isOpened():
        return "Cannot open camera"

    gesture_processor = HandGestureProcessor()
    show_frame(canvas, cap, gesture_processor)
    # Start the camera for real-time gesture recognition

 if __name__ == "__main__":
    create_gui()
--- a/utils_en/init.py
+++ b/utils_en/init.py
--- a/utils_en/pycache/gesture_data.cpython-39.pyc
+++ b/utils_en/pycache/gesture_data.cpython-39.pyc
--- a/utils_en/pycache/hand_gesture.cpython-39.pyc
+++ b/utils_en/pycache/hand_gesture.cpython-39.pyc
--- a/utils_en/pycache/index_finger.cpython-39.pyc
+++ b/utils_en/pycache/index_finger.cpython-39.pyc
--- a/utils_en/pycache/kalman_filter.cpython-39.pyc
+++ b/utils_en/pycache/kalman_filter.cpython-39.pyc
--- a/utils_en/pycache/model.cpython-39.pyc
+++ b/utils_en/pycache/model.cpython-39.pyc
--- a/utils_en/pycache/process_images.cpython-39.pyc
+++ b/utils_en/pycache/process_images.cpython-39.pyc
--- a/utils_en/pycache/video_recognition.cpython-39.pyc
+++ b/utils_en/pycache/video_recognition.cpython-39.pyc
--- a/utils_en/finger_drawer.py
+++ b/utils_en/finger_drawer.py
@ -0,0 +1,35 @@
 import cv2

 class FingerDrawer:
    @staticmethod
    def draw_finger_points(image, hand_21, temp_handness, width, height):

        cz0 = hand_21.landmark[0].z
        index_finger_tip_str = ''

        for i in range(21):
            cx = int(hand_21.landmark[i].x * width)
            cy = int(hand_21.landmark[i].y * height)
            cz = hand_21.landmark[i].z
            depth_z = cz0 - cz
            radius = max(int(6 * (1 + depth_z * 5)), 0)
            # Adjust the radius of the circle based on depth


            if i == 0:
                image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)
            elif i == 8:
                image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)
                index_finger_tip_str += f'{temp_handness}:{depth_z:.2f}, '
            elif i in [1, 5, 9, 13, 17]:
                image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)
            elif i in [2, 6, 10, 14, 18]:
                image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)
            elif i in [3, 7, 11, 15, 19]:
                image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)
            elif i in [4, 12, 16, 20]:
                image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)
            # Draw circles of different colors based on each group of joints and adjust according to the
            # depth relative to the wrist

        return image, index_finger_tip_str
--- a/utils_en/gesture_data.py
+++ b/utils_en/gesture_data.py
@ -0,0 +1,43 @@
 from collections import deque

 class HandState:
    def __init__(self):
        self.gesture_locked = {'Left': False, 'Right': False}
        self.gesture_start_time = {'Left': 0, 'Right': 0}
        self.buffer_start_time = {'Left': 0, 'Right': 0}
        self.start_drag_time = {'Left': 0, 'Right': 0}
        self.dragging = {'Left': False, 'Right': False}
        self.drag_point = {'Left': (0, 0), 'Right': (0, 0)}
        self.buffer_duration = {'Left': 0.25, 'Right': 0.25}
        self.is_index_finger_up = {'Left': False, 'Right': False}
        self.index_finger_second = {'Left': 0, 'Right': 0}
        self.index_finger_tip = {'Left': 0, 'Right': 0}
        self.trajectory = {'Left': [], 'Right': []}
        self.square_queue = deque()
        self.wait_time = 1.5
        self.kalman_wait_time = 0.5
        self.wait_box = 2
        self.rect_draw_time = {'Left': 0, 'Right': 0}
        self.last_drawn_box = {'Left': None, 'Right': None}

    def clear_hand_states(self, detected_hand='Both'):

        hands_to_clear = {'Left', 'Right'}
        if detected_hand == 'Both':
            hands_to_clear = hands_to_clear
        else:
            hands_to_clear -= {detected_hand}

        for h in hands_to_clear:
            self.gesture_locked[h] = False
            self.gesture_start_time[h] = 0
            self.buffer_start_time[h] = 0
            self.dragging[h] = False
            self.drag_point[h] = (0, 0)
            self.buffer_duration[h] = 0.25
            self.is_index_finger_up[h] = False
            self.trajectory[h].clear()
            self.start_drag_time[h] = 0
            self.rect_draw_time[h] = 0
            self.last_drawn_box[h] = None
        # Used to record information for the left and right hands separately to avoid data conflicts
--- a/utils_en/gesture_process.py
+++ b/utils_en/gesture_process.py
@ -0,0 +1,24 @@
 import cv2
 import time
 from hand_gesture import HandGestureHandler

 class HandGestureProcessor:
    def __init__(self):
        self.hand_handler = HandGestureHandler()

    def process_image(self, image):

        start_time = time.time()
        height, width = image.shape[:2]
        image = cv2.flip(image, 1)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # Get image dimensions, flip, and convert color space

        image = self.hand_handler.handle_hand_gestures(image, width, height)

        spend_time = time.time() - start_time
        FPS = 1.0 / spend_time if spend_time > 0 else 0
        image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
        # Calculate and display the frame rate

        return image
--- a/utils_en/gesture_recognition.ipynb
+++ b/utils_en/gesture_recognition.ipynb
@ -0,0 +1,406 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-09-07T05:10:50.912839Z",
     "start_time": "2024-09-07T05:10:44.776680Z"
    }
   },
   "source": [
    "import cv2\n",
    "import time\n",
    "import mediapipe\n",
    "import numpy as np\n",
    "from collections import deque\n",
    "from filterpy.kalman import KalmanFilter"
   ],
   "outputs": [],
   "execution_count": 1
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:10:50.928940Z",
     "start_time": "2024-09-07T05:10:50.913896Z"
    }
   },
   "cell_type": "code",
   "source": [
    "gesture_locked = {'Left':False,'Right':False}\n",
    "gesture_start_time = {'Left':0,'Right':0}\n",
    "buffer_start_time = {'Left':0,'Right':0}\n",
    "start_drag_time = {'Left':0,'Right':0}\n",
    "dragging = {'Left':False,'Right':False}\n",
    "drag_point = {'Left':(0, 0),'Right':(0, 0)}\n",
    "buffer_duration = {'Left':0.25,'Right':0.25}\n",
    "is_index_finger_up = {'Left':False,'Right':False}\n",
    "index_finger_second = {'Left':0,'Right':0}\n",
    "index_finger_tip = {'Left':0,'Right':0}\n",
    "trajectory = {'Left':[],'Right':[]}\n",
    "square_queue = deque()\n",
    "wait_time = 1.5\n",
    "kalman_wait_time = 0.5\n",
    "wait_box = 2\n",
    "rect_draw_time = {'Left':0,'Right':0}\n",
    "last_drawn_box = {'Left':None,'Right':None}\n",
    "elapsed_time = {'Left':0,'Right':0}"
   ],
   "id": "40aada17ccd31fe",
   "outputs": [],
   "execution_count": 2
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:10:55.708038Z",
     "start_time": "2024-09-07T05:10:55.691926Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def clear_hand_states(detected_hand='Both'):\n",
    "    global gesture_locked, gesture_start_time, buffer_start_time, dragging, drag_point, buffer_duration, is_index_finger_up, trajectory, wait_time, kalman_wait_time, start_drag_time, rect_draw_time, last_drawn_box, wait_box, elapsed_time\n",
    "\n",
    "    hands_to_clear = {'Left', 'Right'}\n",
    "    if detected_hand == 'Both':\n",
    "        hands_to_clear = hands_to_clear\n",
    "    else:\n",
    "        hands_to_clear -= {detected_hand}\n",
    "        # Reverse check for left and right hands\n",
    "\n",
    "    for h in hands_to_clear:\n",
    "        gesture_locked[h] = False\n",
    "        gesture_start_time[h] = 0\n",
    "        buffer_start_time[h] = 0\n",
    "        dragging[h] = False\n",
    "        drag_point[h] = (0, 0)\n",
    "        buffer_duration[h] = 0.25\n",
    "        is_index_finger_up[h] = False\n",
    "        trajectory[h].clear()\n",
    "        start_drag_time[h] = 0\n",
    "        rect_draw_time[h] = 0\n",
    "        last_drawn_box[h] = None\n",
    "        elapsed_time[h] = 0\n",
    "        # Clear states for hands that are not detected"
   ],
   "id": "2ee9323bb1c25cc0",
   "outputs": [],
   "execution_count": 3
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:10:56.547939Z",
     "start_time": "2024-09-07T05:10:56.532265Z"
    }
   },
   "cell_type": "code",
   "source": [
    "kalman_filters = {\n",
    "    'Left': KalmanFilter(dim_x=4, dim_z=2),\n",
    "    'Right': KalmanFilter(dim_x=4, dim_z=2)\n",
    "}\n",
    "\n",
    "for key in kalman_filters:\n",
    "    kalman_filters[key].x = np.array([0., 0., 0., 0.])\n",
    "    kalman_filters[key].F = np.array([[1, 0, 1, 0],\n",
    "                                      [0, 1, 0, 1],\n",
    "                                      [0, 0, 1, 0],\n",
    "                                      [0, 0, 0, 1]])\n",
    "    # State transition matrix\n",
    "    kalman_filters[key].H = np.array([[1, 0, 0, 0],\n",
    "                                      [0, 1, 0, 0]])\n",
    "    # Observation matrix\n",
    "    kalman_filters[key].P *= 1000.\n",
    "    kalman_filters[key].R = 3\n",
    "    kalman_filters[key].Q = np.eye(4) * 0.01\n",
    "\n",
    "def kalman_filter_point(hand_label, x, y):\n",
    "    kf = kalman_filters[hand_label]\n",
    "    kf.predict()\n",
    "    kf.update([x, y])\n",
    "    # Update state\n",
    "    return (kf.x[0], kf.x[1])\n",
    "\n",
    "def reset_kalman_filter(hand_label, x, y):\n",
    "    kf = kalman_filters[hand_label]\n",
    "    kf.x = np.array([x, y, 0., 0.])\n",
    "    kf.P *= 1000.\n",
    "    # Reset"
   ],
   "id": "96cf431d2562e7d",
   "outputs": [],
   "execution_count": 4
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:10:57.253008Z",
     "start_time": "2024-09-07T05:10:57.231898Z"
    }
   },
   "cell_type": "code",
   "source": [
    "mp_hands = mediapipe.solutions.hands\n",
    "\n",
    "hands = mp_hands.Hands(\n",
    "    static_image_mode=False,\n",
    "    max_num_hands=2,\n",
    "    # One hand is more stable\n",
    "    min_detection_confidence=0.5,\n",
    "    min_tracking_confidence=0.5\n",
    ")\n",
    "\n",
    "mp_drawing = mediapipe.solutions.drawing_utils\n",
    "clear_hand_states()"
   ],
   "id": "edc274b7ed495122",
   "outputs": [],
   "execution_count": 5
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:10:58.920644Z",
     "start_time": "2024-09-07T05:10:58.881367Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def process_image(image):\n",
    "\n",
    "    start_time = time.time()\n",
    "    height, width = image.shape[:2]\n",
    "    image = cv2.flip(image, 1)\n",
    "    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
    "    # Preprocess the input frame\n",
    "\n",
    "    results = hands.process(image)\n",
    "\n",
    "    if results.multi_hand_landmarks:\n",
    "        # If hands are detected\n",
    "\n",
    "        handness_str = ''\n",
    "        index_finger_tip_str = ''\n",
    "\n",
    "        if len(results.multi_hand_landmarks) == 1:\n",
    "            clear_hand_states(detected_hand=results.multi_handedness[0].classification[0].label)\n",
    "            # If only one hand is detected, clear the data of the other hand to avoid conflicts that could cause instability.\n",
    "\n",
    "        for hand_idx in range(len(results.multi_hand_landmarks)):\n",
    "\n",
    "            hand_21 = results.multi_hand_landmarks[hand_idx]\n",
    "            mp_drawing.draw_landmarks(image, hand_21, mp_hands.HAND_CONNECTIONS)\n",
    "\n",
    "            temp_handness = results.multi_handedness[hand_idx].classification[0].label\n",
    "            handness_str += '{}:{}, '.format(hand_idx, temp_handness)\n",
    "            is_index_finger_up[temp_handness] = False\n",
    "            # Set to False first to prevent incorrect updates to raised when lowered\n",
    "\n",
    "            cz0 = hand_21.landmark[0].z\n",
    "            index_finger_second[temp_handness] = hand_21.landmark[7]\n",
    "            index_finger_tip[temp_handness] = hand_21.landmark[8]\n",
    "            # Index fingertip and first joint\n",
    "\n",
    "            index_x, index_y = int(index_finger_tip[temp_handness].x * width), int(index_finger_tip[temp_handness].y * height)\n",
    "\n",
    "            if all(index_finger_second[temp_handness].y < hand_21.landmark[i].y for i in range(21) if i not in [7, 8]) and index_finger_tip[temp_handness].y < index_finger_second[temp_handness].y:\n",
    "                is_index_finger_up[temp_handness] = True\n",
    "                # If the fingertip and second joint are higher than all other keypoints on the hand, consider it as a \"pointing\" gesture. \n",
    "\n",
    "            if is_index_finger_up[temp_handness]:\n",
    "                if not gesture_locked[temp_handness]:\n",
    "                    if gesture_start_time[temp_handness] == 0:\n",
    "                        gesture_start_time[temp_handness] = time.time()\n",
    "                        # Record the time when the index finger is raised\n",
    "                    elif time.time() - gesture_start_time[temp_handness] > wait_time:\n",
    "                        dragging[temp_handness] = True\n",
    "                        gesture_locked[temp_handness] = True\n",
    "                        drag_point[temp_handness] = (index_x, index_y)\n",
    "                        # If the index finger is raised for longer than the set wait time, it is considered a \"pointing\" gesture.\n",
    "                    buffer_start_time[temp_handness] = 0\n",
    "                    # Refresh the buffer time whenever the index finger is raised\n",
    "            else:\n",
    "                if buffer_start_time[temp_handness] == 0:\n",
    "                    buffer_start_time[temp_handness] = time.time()\n",
    "                elif time.time() - buffer_start_time[temp_handness] > buffer_duration[temp_handness]:\n",
    "                    gesture_start_time[temp_handness] = 0\n",
    "                    gesture_locked[temp_handness] = False\n",
    "                    dragging[temp_handness] = False\n",
    "                    # If the buffer time exceeds the set limit, it indicates the end of the pointing gesture.\n",
    "                    # This prevents incorrect clearing of the pointing gesture due to recognition errors in a single frame.\n",
    "\n",
    "            if dragging[temp_handness]:\n",
    "\n",
    "                if start_drag_time[temp_handness] == 0:\n",
    "                    start_drag_time[temp_handness] = time.time()\n",
    "                    reset_kalman_filter(temp_handness, index_x, index_y)\n",
    "                    # Initialize the filter whenever a line is drawn\n",
    "\n",
    "                smooth_x, smooth_y = kalman_filter_point(temp_handness, index_x, index_y)\n",
    "                drag_point[temp_handness] = (index_x, index_y)\n",
    "                index_finger_radius = max(int(10 * (1 + (cz0 - index_finger_tip[temp_handness].z) * 5)), 0)\n",
    "                cv2.circle(image, drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)\n",
    "                # Create a circle based on the depth distance from the wrist root\n",
    "                # This is used to show that the pointing gesture has started\n",
    "                # The corresponding depth points below are scaled directly\n",
    "                drag_point_smooth = (smooth_x, smooth_y)\n",
    "\n",
    "                if time.time() - start_drag_time[temp_handness] > kalman_wait_time:\n",
    "                    trajectory[temp_handness].append(drag_point_smooth)\n",
    "                    # The Kalman filter can be very unstable when initialized, with significant noise in the first few frames\n",
    "                    # Wait until the first few frames have run before adding the coordinates to the trajectory list.\n",
    "            else:\n",
    "                if len(trajectory[temp_handness]) > 4:\n",
    "                    contour = np.array(trajectory[temp_handness], dtype=np.int32)\n",
    "                    rect = cv2.minAreaRect(contour)\n",
    "                    box = cv2.boxPoints(rect)\n",
    "                    box = np.int0(box)\n",
    "                    rect_draw_time[temp_handness] = time.time()\n",
    "                    last_drawn_box[temp_handness] = box\n",
    "                    # If the pointing gesture ends and there are at least four points in the trajectory list,\n",
    "                    # Use the minimum bounding box to adjust the irregular drawing to a rectangle.\n",
    "\n",
    "                start_drag_time[temp_handness] = 0\n",
    "                trajectory[temp_handness].clear()\n",
    "\n",
    "            for i in range(1, len(trajectory[temp_handness])):\n",
    "\n",
    "                pt1 = (int(trajectory[temp_handness][i-1][0]), int(trajectory[temp_handness][i-1][1]))\n",
    "                pt2 = (int(trajectory[temp_handness][i][0]), int(trajectory[temp_handness][i][1]))\n",
    "                cv2.line(image, pt1, pt2, (0, 0, 255), 2)\n",
    "                # Draw lines connecting trajectory points\n",
    "\n",
    "            if last_drawn_box[temp_handness] is not None:\n",
    "                elapsed_time[temp_handness] = time.time() - rect_draw_time[temp_handness]\n",
    "\n",
    "                if elapsed_time[temp_handness] < wait_box:\n",
    "                    cv2.drawContours(image, [last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)\n",
    "                    # Keep the rectangle visible for a while, otherwise, it's too fast to observe.\n",
    "\n",
    "                elif elapsed_time[temp_handness] >= wait_box - 0.1:\n",
    "\n",
    "                    box = last_drawn_box[temp_handness]\n",
    "                    x_min = max(0, min(box[:, 0]))\n",
    "                    y_min = max(0, min(box[:, 1]))\n",
    "                    x_max = min(image.shape[1], max(box[:, 0]))\n",
    "                    y_max = min(image.shape[0], max(box[:, 1]))\n",
    "                    cropped_image = image[y_min:y_max, x_min:x_max]\n",
    "                    filename = f\"../image/cropped_{temp_handness}_{int(time.time())}.jpg\"\n",
    "                    cv2.imwrite(filename, cropped_image)\n",
    "                    last_drawn_box[temp_handness] = None\n",
    "                # The drawn image cannot be cropped immediately, as it might wrongly crop the hand into it.\n",
    "                # Wait a while to give the hand time to move away before extracting the rectangle from this frame.\n",
    "\n",
    "            for i in range(21):\n",
    "\n",
    "                cx = int(hand_21.landmark[i].x * width)\n",
    "                cy = int(hand_21.landmark[i].y * height)\n",
    "                cz = hand_21.landmark[i].z\n",
    "                depth_z = cz0 - cz\n",
    "                radius = max(int(6 * (1 + depth_z * 5)), 0)\n",
    "\n",
    "                if i == 0:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)\n",
    "                if i == 8:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)\n",
    "                    index_finger_tip_str += '{}:{:.2f}, '.format(hand_idx, depth_z)\n",
    "                if i in [1, 5, 9, 13, 17]:\n",
    "                    image = cv2.circle(image, (cx, cy), radius,  (0, 0, 255), thickness=-1)\n",
    "                if i in [2, 6, 10, 14, 18]:\n",
    "                    image = cv2.circle(image, (cx, cy), radius,  (75, 0, 130), thickness=-1)\n",
    "                if i in [3, 7, 11, 15, 19]:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)\n",
    "                if i in [4, 12, 16, 20]:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)\n",
    "                # Extract each keypoint, assign corresponding colors, and set depth based on the wrist root.\n",
    "\n",
    "        scaler = 1\n",
    "        image = cv2.putText(image, handness_str, (25 * scaler, 100 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (0, 0, 255), 2)\n",
    "        image = cv2.putText(image, index_finger_tip_str, (25 * scaler, 150 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (0, 0, 255), 2,)\n",
    "\n",
    "        spend_time = time.time() - start_time\n",
    "        if spend_time > 0:\n",
    "            FPS = 1.0 / spend_time\n",
    "        else:\n",
    "            FPS = 0\n",
    "\n",
    "        image = cv2.putText(image, 'FPS ' + str(int(FPS)), (25 * scaler, 50 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (0, 0, 255), 2,)\n",
    "        # Display FPS, detected hands, and the depth value of the index fingertip relative to the wrist root.\n",
    "\n",
    "    else:\n",
    "        clear_hand_states()\n",
    "        # If no hands are detected, clear all information.\n",
    "\n",
    "    return image"
   ],
   "id": "51ff809ecaf1f899",
   "outputs": [],
   "execution_count": 6
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:15.392765Z",
     "start_time": "2024-09-07T05:10:59.535594Z"
    }
   },
   "cell_type": "code",
   "source": [
    "cap = cv2.VideoCapture(1)\n",
    "cap.open(0)\n",
    "\n",
    "while cap.isOpened():\n",
    "    success, frame = cap.read()\n",
    "    if not success:\n",
    "        print(\"Camera Error\")\n",
    "        break\n",
    "\n",
    "    frame = process_image(frame)\n",
    "    cv2.imshow('Video', frame)\n",
    "\n",
    "    if cv2.waitKey(1) & 0xFF == ord('q'):\n",
    "        break\n",
    "\n",
    "cap.release()\n",
    "cv2.destroyAllWindows()  "
   ],
   "id": "b7ce23e80ed36041",
   "outputs": [],
   "execution_count": 7
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "",
   "id": "1102d2fc75310c6e"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/utils_en/hand_gesture.py
+++ b/utils_en/hand_gesture.py
@ -0,0 +1,61 @@
 import cv2
 from model import HandTracker
 from index_finger import IndexFingerHandler
 from gesture_data import HandState
 from kalman_filter import KalmanHandler
 from utils_zh.finger_drawer import FingerDrawer

 class HandGestureHandler:
    def __init__(self):
        self.hand_state = HandState()
        self.kalman_handler = KalmanHandler()
        self.hand_tracker = HandTracker()
        self.index_handler = IndexFingerHandler(self.hand_state, self.kalman_handler)

    def handle_hand_gestures(self, image, width, height, is_video):
        results = self.hand_tracker.process(image)

        if results.multi_hand_landmarks:
            handness_str = ''
            index_finger_tip_str = ''

            if len(results.multi_hand_landmarks) == 1:
                detected_hand = results.multi_handedness[0].classification[0].label
                self.hand_state.clear_hand_states(detected_hand)
                # If only one hand is detected, clear the information of the other hand
                # to prevent data conflicts when the second hand appears.

            for hand_idx, hand_21 in enumerate(results.multi_hand_landmarks):
                self.hand_tracker.mp_drawing.draw_landmarks(
                    image, hand_21, self.hand_tracker.mp_hands.HAND_CONNECTIONS
                )
                # Draw the connections of hand keypoints

                temp_handness = results.multi_handedness[hand_idx].classification[0].label
                handness_str += f'{hand_idx}:{temp_handness}, '
                self.hand_state.is_index_finger_up[temp_handness] = False

                image = self.index_handler.handle_index_finger(
                    image, hand_21, temp_handness, width, height
                )
                # Handle the index finger

                image, index_finger_tip_str = FingerDrawer.draw_finger_points(
                    image, hand_21, temp_handness, width, height
                )

            if is_video:
                image = cv2.flip(image, 1)
            image = cv2.putText(image, handness_str, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
            image = cv2.putText(image, index_finger_tip_str, (25, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
        else:
            if is_video:
                image = cv2.flip(image, 1)
                # If it's input video from a rear-facing camera, flip the image before processing
                # to ensure correct left and right hand detection, and flip it back afterward
                # to prevent mirrored output errors.

            self.hand_state.clear_hand_states()
            # Clear hand states if no hands are detected

        return image
--- a/utils_en/index_finger.py
+++ b/utils_en/index_finger.py
@ -0,0 +1,114 @@
 import cv2
 import time
 import numpy as np

 class IndexFingerHandler:
    def __init__(self, hand_state, kalman_handler):
        self.hand_state = hand_state
        self.kalman_handler = kalman_handler
        self.wait_time = 1.5
        self.kalman_wait_time = 0.5
        self.wait_box = 2

    def handle_index_finger(self, image, hand_21, temp_handness, width, height):

        cz0 = hand_21.landmark[0].z
        self.hand_state.index_finger_second[temp_handness] = hand_21.landmark[7]
        self.hand_state.index_finger_tip[temp_handness] = hand_21.landmark[8]

        index_x = int(self.hand_state.index_finger_tip[temp_handness].x * width)
        index_y = int(self.hand_state.index_finger_tip[temp_handness].y * height)

        self.update_index_finger_state(hand_21, temp_handness, index_x, index_y)
        self.draw_index_finger_gesture(image, temp_handness, index_x, index_y, cz0)

        return image
        # Handle the index finger's state and gesture effect, and update the image

    def update_index_finger_state(self, hand_21, temp_handness, index_x, index_y):

        if all(self.hand_state.index_finger_second[temp_handness].y < hand_21.landmark[i].y
               for i in range(21) if i not in [7, 8]) and \
                self.hand_state.index_finger_tip[temp_handness].y < self.hand_state.index_finger_second[temp_handness].y:
            self.hand_state.is_index_finger_up[temp_handness] = True
            # If both the index fingertip and first joint are above other keypoints,
            # consider the index finger as raised.

        if self.hand_state.is_index_finger_up[temp_handness]:
            if not self.hand_state.gesture_locked[temp_handness]:
                if self.hand_state.gesture_start_time[temp_handness] == 0:
                    self.hand_state.gesture_start_time[temp_handness] = time.time()
                elif time.time() - self.hand_state.gesture_start_time[temp_handness] > self.wait_time:
                    self.hand_state.dragging[temp_handness] = True
                    self.hand_state.gesture_locked[temp_handness] = True
                    self.hand_state.drag_point[temp_handness] = (index_x, index_y)
                    # If the pointing gesture has lasted longer than the wait time, confirm the pointing action.
                self.hand_state.buffer_start_time[temp_handness] = 0
                # Buffer time to prevent immediate interruption due to recognition errors.
        else:
            if self.hand_state.buffer_start_time[temp_handness] == 0:
                self.hand_state.buffer_start_time[temp_handness] = time.time()
            elif time.time() - self.hand_state.buffer_start_time[temp_handness] > self.hand_state.buffer_duration[temp_handness]:
                self.hand_state.gesture_start_time[temp_handness] = 0
                self.hand_state.gesture_locked[temp_handness] = False
                self.hand_state.dragging[temp_handness] = False
                # If the interruption time of the pointing gesture exceeds the set buffer duration, formally terminate.

    def draw_index_finger_gesture(self, image, temp_handness, index_x, index_y, cz0):

        if self.hand_state.dragging[temp_handness]:
            if self.hand_state.start_drag_time[temp_handness] == 0:
                self.hand_state.start_drag_time[temp_handness] = time.time()
                self.kalman_handler.reset_kalman_filter(temp_handness, index_x, index_y)
                # If it's the first operation, record the time and reset the Kalman filter.

            smooth_x, smooth_y = self.kalman_handler.kalman_filter_point(temp_handness, index_x, index_y)
            # Use the Kalman filter to smooth the generated trajectory, reducing noise and jitter.

            self.hand_state.drag_point[temp_handness] = (index_x, index_y)
            index_finger_radius = max(int(10 * (1 + (cz0 - self.hand_state.index_finger_tip[temp_handness].z) * 5)), 0)
            cv2.circle(image, self.hand_state.drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)
            # Adjust the circle size based on the distance from the wrist root, slightly larger than FingerDrawer for visibility during gesture lock.
            drag_point_smooth = (smooth_x, smooth_y)

            if time.time() - self.hand_state.start_drag_time[temp_handness] > self.kalman_wait_time:
                self.hand_state.trajectory[temp_handness].append(drag_point_smooth)
                # Wait for the Kalman filter to stabilize data before adding coordinates to the trajectory.
        else:
            if len(self.hand_state.trajectory[temp_handness]) > 4:
                contour = np.array(self.hand_state.trajectory[temp_handness], dtype=np.int32)
                rect = cv2.minAreaRect(contour)
                box = cv2.boxPoints(rect)
                box = np.int0(box)
                # Calculate the minimum enclosing rectangle when the drag points exceed 4.
                self.hand_state.rect_draw_time[temp_handness] = time.time()
                self.hand_state.last_drawn_box[temp_handness] = box

            self.hand_state.start_drag_time[temp_handness] = 0
            self.hand_state.trajectory[temp_handness].clear()
            # Reset and clear

        for i in range(1, len(self.hand_state.trajectory[temp_handness])):
            pt1 = (int(self.hand_state.trajectory[temp_handness][i-1][0]), int(self.hand_state.trajectory[temp_handness][i-1][1]))
            pt2 = (int(self.hand_state.trajectory[temp_handness][i][0]), int(self.hand_state.trajectory[temp_handness][i][1]))
            cv2.line(image, pt1, pt2, (0, 0, 255), 2)
            # Draw the drag path

        if self.hand_state.last_drawn_box[temp_handness] is not None:
            elapsed_time = time.time() - self.hand_state.rect_draw_time[temp_handness]
            if elapsed_time < self.wait_box:
                cv2.drawContours(image, [self.hand_state.last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)
                # Keep the bounding box visible for a set period for easier observation.
            elif elapsed_time >= self.wait_box - 0.1:
                box = self.hand_state.last_drawn_box[temp_handness]
                x_min = max(0, min(box[:, 0]))
                y_min = max(0, min(box[:, 1]))
                x_max = min(image.shape[1], max(box[:, 0]))
                y_max = min(image.shape[0], max(box[:, 1]))
                cropped_image = image[y_min:y_max, x_min:x_max]
                filename = f"../image/cropped_{temp_handness}_{int(time.time())}.jpg"
                cv2.imwrite(filename, cropped_image)
                self.hand_state.last_drawn_box[temp_handness] = None
                # To avoid accidentally cropping the hand into the bounding box,
                # perform the crop in the last 0.1 seconds before the box disappears,
                # giving enough time for the hand to move away.
--- a/utils_en/kalman_filter.py
+++ b/utils_en/kalman_filter.py
@ -0,0 +1,36 @@
 import numpy as np
 from filterpy.kalman import KalmanFilter

 class KalmanHandler:
    def __init__(self):
        self.kalman_filters = {
            'Left': KalmanFilter(dim_x=4, dim_z=2),
            'Right': KalmanFilter(dim_x=4, dim_z=2)
        }
        for key in self.kalman_filters:
            self.kalman_filters[key].x = np.array([0., 0., 0., 0.])
            self.kalman_filters[key].F = np.array([[1, 0, 1, 0],
                                                   [0, 1, 0, 1],
                                                   [0, 0, 1, 0],
                                                   [0, 0, 0, 1]])
            self.kalman_filters[key].H = np.array([[1, 0, 0, 0],
                                                   [0, 1, 0, 0]])
            self.kalman_filters[key].P *= 1000.
            self.kalman_filters[key].R = 3
            self.kalman_filters[key].Q = np.eye(4) * 0.01
            # These parameters were obtained through multiple tests and have shown stable performance.

    def kalman_filter_point(self, hand_label, x, y):

        kf = self.kalman_filters[hand_label]
        kf.predict()
        kf.update([x, y])
        # Update state
        return (kf.x[0], kf.x[1])

    def reset_kalman_filter(self, hand_label, x, y):

        kf = self.kalman_filters[hand_label]
        kf.x = np.array([x, y, 0., 0.])
        kf.P *= 1000.
        # Reset
--- a/utils_en/model.py
+++ b/utils_en/model.py
@ -0,0 +1,17 @@
 import mediapipe as mp

 class HandTracker:
    def __init__(self):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=1,
            # Setting it to one would be more stable
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )
        self.mp_drawing = mp.solutions.drawing_utils

    def process(self, image):
        results = self.hands.process(image)
        return results
--- a/utils_en/process_images.py
+++ b/utils_en/process_images.py
@ -0,0 +1,24 @@
 import cv2
 import time
 from hand_gesture import HandGestureHandler

 class HandGestureProcessor:
    def __init__(self):
        self.hand_handler = HandGestureHandler()

    def process_image(self, image, is_video):

        start_time = time.time()
        height, width = image.shape[:2]
        image = cv2.flip(image, 1)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # Preprocess the incoming video frame

        image = self.hand_handler.handle_hand_gestures(image, width, height, is_video)

        spend_time = time.time() - start_time
        FPS = 1.0 / spend_time if spend_time > 0 else 0
        image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
        # Calculate and display the frame rate

        return image
--- a/utils_en/video_recognition.py
+++ b/utils_en/video_recognition.py
@ -0,0 +1,65 @@
 import cv2
 from process_images import HandGestureProcessor
 from tkinter import messagebox
 from PIL import Image, ImageTk

 def start_camera(canvas):
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        return "Cannot open camera"

    gesture_processor = HandGestureProcessor()
    show_frame(canvas, cap, gesture_processor)

 def show_frame(canvas, cap, gesture_processor):
    success, frame = cap.read()
    if success:
        processed_frame = gesture_processor.process_image(frame,False)
        img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        canvas.imgtk = imgtk
        canvas.create_image(0, 0, anchor="nw", image=imgtk)
        # 对该帧进行处理并转换为RGB显示在画布上
        canvas.after(10, show_frame, canvas, cap, gesture_processor)
        # 实现循环调用 持续处理并显示后续的每一帧
    else:
        cap.release()
        cv2.destroyAllWindows()

 def upload_and_process_video(canvas, video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return "Cannot open video file"

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    # 获取视频的参数

    output_filename = "../video/processed_output.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height))
    # 设置输出视频文件路径和编码

    gesture_processor = HandGestureProcessor()
    process_video_frame(canvas, cap, gesture_processor, out)

 def process_video_frame(canvas, cap, gesture_processor, out):
    success, frame = cap.read()
    if success:
        processed_frame = gesture_processor.process_image(frame,True)
        out.write(processed_frame)

        img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        canvas.imgtk = imgtk
        canvas.create_image(0, 0, anchor="nw", image=imgtk)
        canvas.after(10, process_video_frame, canvas, cap, gesture_processor, out)
    else:
        cap.release()
        out.release()
        cv2.destroyAllWindows()
        messagebox.showinfo("Info", "Processed video saved as processed_output.avi")
        print("Processed video saved as processed_output.avi")
--- a/utils_zh/GUI.py
+++ b/utils_zh/GUI.py
@ -0,0 +1,112 @@
 import cv2
 import tkinter as tk
 from tkinter import filedialog, messagebox
 from video_recognition import start_camera, upload_and_process_video, show_frame
 from process_images import HandGestureProcessor


 current_mode = None
 current_cap = None
 # 用于追踪当前模式和摄像头资源

 def create_gui():
    root = tk.Tk()
    root.title("Gesture Recognition")
    root.geometry("800x600")

    canvas = tk.Canvas(root, width=640, height=480)
    canvas.pack()
    # 创建显示视频内容的画布

    camera_button = tk.Button(
        root,
        text="Use Camera for Real-time Recognition",
        command=lambda: switch_to_camera(canvas)
    )
    camera_button.pack(pady=10)
    # 启动摄像头实时识别的按钮

    video_button = tk.Button(
        root,
        text="Upload Video File for Processing",
        command=lambda: select_and_process_video(canvas, root)
    )
    video_button.pack(pady=10)
    # 上传并处理视频文件的按钮

    root.mainloop()

 def switch_to_camera(canvas):
    global current_mode, current_cap

    stop_current_operation()
    # 停止当前操作并释放摄像头

    current_mode = "camera"
    canvas.delete("all")
    # 设置当前模式为摄像头并清空Canvas


    current_cap = cv2.VideoCapture(0)
    if not current_cap.isOpened():
        messagebox.showerror("Error", "Cannot open camera")
        current_mode = None
        return
    # 启动摄像头


    start_camera(canvas, current_cap)
    # 传入canvas和current_cap

 def select_and_process_video(canvas, root):
    global current_mode, current_cap

    stop_current_operation()
    current_mode = "video"
    canvas.delete("all")

    video_path = filedialog.askopenfilename(
        title="Select a Video File",
        filetypes=(("MP4 files", "*.mp4"), ("AVI files", "*.avi"), ("All files", "*.*"))
    )
    # 选择视频文件

    if video_path:
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            messagebox.showerror("Error", "Cannot open video file")
            return
        # 获取视频的宽高并调整 Canvas 大小

        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        cap.release()
        canvas.config(width=frame_width, height=frame_height)
        root.geometry(f"{frame_width + 160}x{frame_height + 200}")  # 调整窗口大小
        # 获取视频宽高并动态调整canvas的大小

        error_message = upload_and_process_video(canvas, video_path)
        if error_message:
            messagebox.showerror("Error", error_message)
        # 上传并处理视频文件

 def stop_current_operation():

    global current_cap

    if current_cap and current_cap.isOpened():
        current_cap.release()
        cv2.destroyAllWindows()
        current_cap = None
        # 停止当前操作 释放摄像头资源并关闭所有窗口

 def start_camera(canvas, cap):
    if not cap.isOpened():
        return "Cannot open camera"

    gesture_processor = HandGestureProcessor()
    show_frame(canvas, cap, gesture_processor)
    # 启动摄像头进行实时手势识别

 if __name__ == "__main__":
    create_gui()
--- a/utils_zh/init.py
+++ b/utils_zh/init.py
--- a/utils_zh/pycache/init.cpython-39.pyc
+++ b/utils_zh/pycache/init.cpython-39.pyc
--- a/utils_zh/pycache/finger_drawer.cpython-39.pyc
+++ b/utils_zh/pycache/finger_drawer.cpython-39.pyc
--- a/utils_zh/pycache/gesture_data.cpython-39.pyc
+++ b/utils_zh/pycache/gesture_data.cpython-39.pyc
--- a/utils_zh/pycache/hand_gesture.cpython-39.pyc
+++ b/utils_zh/pycache/hand_gesture.cpython-39.pyc
--- a/utils_zh/pycache/index_finger.cpython-39.pyc
+++ b/utils_zh/pycache/index_finger.cpython-39.pyc
--- a/utils_zh/pycache/kalman_filter.cpython-39.pyc
+++ b/utils_zh/pycache/kalman_filter.cpython-39.pyc
--- a/utils_zh/pycache/model.cpython-39.pyc
+++ b/utils_zh/pycache/model.cpython-39.pyc
--- a/utils_zh/pycache/process_images.cpython-39.pyc
+++ b/utils_zh/pycache/process_images.cpython-39.pyc
--- a/utils_zh/pycache/video_recognition.cpython-39.pyc
+++ b/utils_zh/pycache/video_recognition.cpython-39.pyc
--- a/utils_zh/finger_drawer.py
+++ b/utils_zh/finger_drawer.py
@ -0,0 +1,34 @@
 import cv2

 class FingerDrawer:
    @staticmethod
    def draw_finger_points(image, hand_21, temp_handness, width, height):

        cz0 = hand_21.landmark[0].z
        index_finger_tip_str = ''

        for i in range(21):
            cx = int(hand_21.landmark[i].x * width)
            cy = int(hand_21.landmark[i].y * height)
            cz = hand_21.landmark[i].z
            depth_z = cz0 - cz
            radius = max(int(6 * (1 + depth_z * 5)), 0)
            # 根据深度调整圆点的半径


            if i == 0:
                image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)
            elif i == 8:
                image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)
                index_finger_tip_str += f'{temp_handness}:{depth_z:.2f}, '
            elif i in [1, 5, 9, 13, 17]:
                image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)
            elif i in [2, 6, 10, 14, 18]:
                image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)
            elif i in [3, 7, 11, 15, 19]:
                image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)
            elif i in [4, 12, 16, 20]:
                image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)
            # 根据每组关节绘制不同颜色的圆点 同时根据距离掌根的深度信息进行调整

        return image, index_finger_tip_str
--- a/utils_zh/gesture_data.py
+++ b/utils_zh/gesture_data.py
@ -0,0 +1,43 @@
 from collections import deque

 class HandState:
    def __init__(self):
        self.gesture_locked = {'Left': False, 'Right': False}
        self.gesture_start_time = {'Left': 0, 'Right': 0}
        self.buffer_start_time = {'Left': 0, 'Right': 0}
        self.start_drag_time = {'Left': 0, 'Right': 0}
        self.dragging = {'Left': False, 'Right': False}
        self.drag_point = {'Left': (0, 0), 'Right': (0, 0)}
        self.buffer_duration = {'Left': 0.25, 'Right': 0.25}
        self.is_index_finger_up = {'Left': False, 'Right': False}
        self.index_finger_second = {'Left': 0, 'Right': 0}
        self.index_finger_tip = {'Left': 0, 'Right': 0}
        self.trajectory = {'Left': [], 'Right': []}
        self.square_queue = deque()
        self.wait_time = 1.5
        self.kalman_wait_time = 0.5
        self.wait_box = 2
        self.rect_draw_time = {'Left': 0, 'Right': 0}
        self.last_drawn_box = {'Left': None, 'Right': None}

    def clear_hand_states(self, detected_hand='Both'):

        hands_to_clear = {'Left', 'Right'}
        if detected_hand == 'Both':
            hands_to_clear = hands_to_clear
        else:
            hands_to_clear -= {detected_hand}

        for h in hands_to_clear:
            self.gesture_locked[h] = False
            self.gesture_start_time[h] = 0
            self.buffer_start_time[h] = 0
            self.dragging[h] = False
            self.drag_point[h] = (0, 0)
            self.buffer_duration[h] = 0.25
            self.is_index_finger_up[h] = False
            self.trajectory[h].clear()
            self.start_drag_time[h] = 0
            self.rect_draw_time[h] = 0
            self.last_drawn_box[h] = None
        # 用于记录左右手的信息 需要分开存放 否则可能会出现数据冲突
--- a/utils_zh/gesture_process.py
+++ b/utils_zh/gesture_process.py
@ -0,0 +1,24 @@
 import cv2
 import time
 from hand_gesture import HandGestureHandler

 class HandGestureProcessor:
    def __init__(self):
        self.hand_handler = HandGestureHandler()

    def process_image(self, image):

        start_time = time.time()
        height, width = image.shape[:2]
        image = cv2.flip(image, 1)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # 获取图像尺寸 翻转并转换颜色空间

        image = self.hand_handler.handle_hand_gestures(image, width, height)

        spend_time = time.time() - start_time
        FPS = 1.0 / spend_time if spend_time > 0 else 0
        image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
        # 计算并显示帧率

        return image
--- a/utils_zh/gesture_recognition.ipynb
+++ b/utils_zh/gesture_recognition.ipynb
@ -0,0 +1,403 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.761076Z",
     "start_time": "2024-09-07T05:11:22.404354Z"
    }
   },
   "source": [
    "import cv2\n",
    "import time\n",
    "import mediapipe\n",
    "import numpy as np\n",
    "from collections import deque\n",
    "from filterpy.kalman import KalmanFilter"
   ],
   "outputs": [],
   "execution_count": 1
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.777139Z",
     "start_time": "2024-09-07T05:11:28.761076Z"
    }
   },
   "cell_type": "code",
   "source": [
    "gesture_locked = {'Left':False,'Right':False}\n",
    "gesture_start_time = {'Left':0,'Right':0}\n",
    "buffer_start_time = {'Left':0,'Right':0}\n",
    "start_drag_time = {'Left':0,'Right':0}\n",
    "dragging = {'Left':False,'Right':False}\n",
    "drag_point = {'Left':(0, 0),'Right':(0, 0)}\n",
    "buffer_duration = {'Left':0.25,'Right':0.25}\n",
    "is_index_finger_up = {'Left':False,'Right':False}\n",
    "index_finger_second = {'Left':0,'Right':0}\n",
    "index_finger_tip = {'Left':0,'Right':0}\n",
    "trajectory = {'Left':[],'Right':[]}\n",
    "square_queue = deque()\n",
    "wait_time = 1.5\n",
    "kalman_wait_time = 0.5\n",
    "wait_box = 2\n",
    "rect_draw_time = {'Left':0,'Right':0}\n",
    "last_drawn_box = {'Left':None,'Right':None}\n",
    "elapsed_time = {'Left':0,'Right':0}"
   ],
   "id": "40aada17ccd31fe",
   "outputs": [],
   "execution_count": 2
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.824573Z",
     "start_time": "2024-09-07T05:11:28.777139Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def clear_hand_states(detected_hand ='Both'):\n",
    "    global gesture_locked, gesture_start_time, buffer_start_time, dragging, drag_point, buffer_duration,is_index_finger_up, trajectory,wait_time,kalman_wait_time, start_drag_time, rect_draw_time, last_drawn_box, wait_box, elapsed_time\n",
    "    \n",
    "    hands_to_clear = {'Left', 'Right'}\n",
    "    if detected_hand == 'Both':\n",
    "        hands_to_clear = hands_to_clear\n",
    "    else:\n",
    "        hands_to_clear -= {detected_hand}\n",
    "        # 反向判断左右手\n",
    "\n",
    "    for h in hands_to_clear:\n",
    "        gesture_locked[h] = False\n",
    "        gesture_start_time[h] = 0\n",
    "        buffer_start_time[h] = 0\n",
    "        dragging[h] = False\n",
    "        drag_point[h] = (0, 0)\n",
    "        buffer_duration[h] = 0.25\n",
    "        is_index_finger_up[h] = False\n",
    "        trajectory[h].clear()\n",
    "        start_drag_time[h] = 0\n",
    "        rect_draw_time[h] = 0\n",
    "        last_drawn_box[h] = None\n",
    "        elapsed_time[h] = 0\n",
    "        # 清空没被检测的手"
   ],
   "id": "2ee9323bb1c25cc0",
   "outputs": [],
   "execution_count": 3
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.855831Z",
     "start_time": "2024-09-07T05:11:28.824573Z"
    }
   },
   "cell_type": "code",
   "source": [
    "kalman_filters = {\n",
    "    'Left': KalmanFilter(dim_x=4, dim_z=2),\n",
    "    'Right': KalmanFilter(dim_x=4, dim_z=2)\n",
    "}\n",
    "\n",
    "for key in kalman_filters:\n",
    "    kalman_filters[key].x = np.array([0., 0., 0., 0.])\n",
    "    kalman_filters[key].F = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]])\n",
    "    # 状态转移矩阵\n",
    "    kalman_filters[key].H = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])\n",
    "    # 观测矩阵\n",
    "    kalman_filters[key].P *= 1000.\n",
    "    kalman_filters[key].R = 3\n",
    "    kalman_filters[key].Q = np.eye(4) * 0.01\n",
    "\n",
    "def kalman_filter_point(hand_label, x, y):\n",
    "    kf = kalman_filters[hand_label]\n",
    "    kf.predict()\n",
    "    kf.update([x, y])\n",
    "    # 更新状态\n",
    "    return (kf.x[0], kf.x[1])\n",
    "\n",
    "def reset_kalman_filter(hand_label, x, y):\n",
    "    kf = kalman_filters[hand_label]\n",
    "    kf.x = np.array([x, y, 0., 0.])\n",
    "    kf.P *= 1000.\n",
    "    # 重置"
   ],
   "id": "96cf431d2562e7d",
   "outputs": [],
   "execution_count": 4
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.887346Z",
     "start_time": "2024-09-07T05:11:28.855831Z"
    }
   },
   "cell_type": "code",
   "source": [
    "\n",
    "mp_hands = mediapipe.solutions.hands\n",
    "\n",
    "hands = mp_hands.Hands(\n",
    "    static_image_mode=False,\n",
    "    max_num_hands=2,\n",
    "    # 一只更稳定\n",
    "    min_detection_confidence=0.5,\n",
    "    min_tracking_confidence=0.5\n",
    ")\n",
    "\n",
    "mp_drawing = mediapipe.solutions.drawing_utils\n",
    "clear_hand_states()"
   ],
   "id": "edc274b7ed495122",
   "outputs": [],
   "execution_count": 5
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.934274Z",
     "start_time": "2024-09-07T05:11:28.887346Z"
    }
   },
   "cell_type": "code",
   "source": [
    "def process_image(image):\n",
    "\n",
    "    start_time = time.time()\n",
    "    height, width = image.shape[:2]\n",
    "    image = cv2.flip(image, 1)\n",
    "    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
    "    # 预处理帧\n",
    "    \n",
    "    results = hands.process(image)\n",
    "    \n",
    "    if results.multi_hand_landmarks:\n",
    "        # 如果检测到手\n",
    "        \n",
    "        handness_str = ''\n",
    "        index_finger_tip_str = ''\n",
    "        \n",
    "        if len(results.multi_hand_landmarks) == 1:\n",
    "            clear_hand_states(detected_hand = results.multi_handedness[0].classification[0].label)\n",
    "            # 如果只有一只手 则清空另一只手的数据 避免后续冲突导致不稳定\n",
    "        \n",
    "        for hand_idx in range(len(results.multi_hand_landmarks)):\n",
    "            \n",
    "            hand_21 = results.multi_hand_landmarks[hand_idx]\n",
    "            mp_drawing.draw_landmarks(image, hand_21, mp_hands.HAND_CONNECTIONS)\n",
    "            \n",
    "            temp_handness = results.multi_handedness[hand_idx].classification[0].label\n",
    "            handness_str += '{}:{}, '.format(hand_idx, temp_handness)\n",
    "            is_index_finger_up[temp_handness] = False\n",
    "            # 先设置为false 防止放下被错误更新为竖起\n",
    "            \n",
    "            cz0 = hand_21.landmark[0].z\n",
    "            index_finger_second[temp_handness] = hand_21.landmark[7]\n",
    "            index_finger_tip[temp_handness] = hand_21.landmark[8]\n",
    "            # 食指指尖和第一个关节\n",
    "            \n",
    "            index_x, index_y = int(index_finger_tip[temp_handness].x * width), int(index_finger_tip[temp_handness].y * height)\n",
    "\n",
    "            if all(index_finger_second[temp_handness].y < hand_21.landmark[i].y for i in range(21) if i not in [7, 8]) and index_finger_tip[temp_handness].y < index_finger_second[temp_handness].y:\n",
    "                is_index_finger_up[temp_handness] = True\n",
    "                # 如果指尖和第二个关节高度大于整只手所有关节点 则视为执行“指向”操作 \n",
    "\n",
    "            if is_index_finger_up[temp_handness]:\n",
    "                if not gesture_locked[temp_handness]:\n",
    "                    if gesture_start_time[temp_handness] == 0:\n",
    "                        gesture_start_time[temp_handness] = time.time()\n",
    "                        # 记录食指抬起的时间\n",
    "                    elif time.time() - gesture_start_time[temp_handness] > wait_time:\n",
    "                        dragging[temp_handness] = True\n",
    "                        gesture_locked[temp_handness] = True\n",
    "                        drag_point[temp_handness] = (index_x, index_y)\n",
    "                        # 如果食指抬起的时间大于预设的等待时间则视为执行“指向”操作\n",
    "                    buffer_start_time[temp_handness] = 0\n",
    "                    # 检测到食指竖起就刷新缓冲时间\n",
    "            else:\n",
    "                if buffer_start_time[temp_handness] == 0:\n",
    "                    buffer_start_time[temp_handness] = time.time()\n",
    "                elif time.time() - buffer_start_time[temp_handness] > buffer_duration[temp_handness]:\n",
    "                    gesture_start_time[temp_handness] = 0\n",
    "                    gesture_locked[temp_handness] = False\n",
    "                    dragging[temp_handness] = False\n",
    "                    # 如果缓冲时间大于设定 就证明已经结束指向操作\n",
    "                    # 这样可以防止某一帧识别有误导致指向操作被错误清除\n",
    "                    \n",
    "            if dragging[temp_handness]:\n",
    "\n",
    "                if start_drag_time[temp_handness] == 0:\n",
    "                    start_drag_time[temp_handness] = time.time()\n",
    "                    reset_kalman_filter(temp_handness, index_x, index_y)\n",
    "                    # 每次画线的时候初始化滤波器\n",
    "                    \n",
    "                smooth_x, smooth_y = kalman_filter_point(temp_handness, index_x, index_y)\n",
    "                drag_point[temp_handness] = (index_x, index_y)\n",
    "                index_finger_radius = max(int(10 * (1 + (cz0 - index_finger_tip[temp_handness].z) * 5)), 0)\n",
    "                cv2.circle(image, drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)\n",
    "                # 根据离掌根的深度距离来构建一个圆\n",
    "                # 用来显示已经开始指向操作\n",
    "                # 和下方构建的深度点位对应 直接用倍数\n",
    "                drag_point_smooth = (smooth_x, smooth_y)\n",
    "                \n",
    "                if time.time() - start_drag_time[temp_handness] > kalman_wait_time:\n",
    "                    trajectory[temp_handness].append(drag_point_smooth)\n",
    "                    # 因为kalman滤波器初始化的时候会很不稳定 前几帧通常会有较为严重的噪声\n",
    "                    # 所以直接等待前几帧运行完成之后再将点位加到轨迹列表中\n",
    "            else:\n",
    "                if len(trajectory[temp_handness]) > 4:\n",
    "                    contour = np.array(trajectory[temp_handness], dtype=np.int32)\n",
    "                    rect = cv2.minAreaRect(contour)\n",
    "                    box = cv2.boxPoints(rect)\n",
    "                    box = np.int0(box)\n",
    "                    rect_draw_time[temp_handness] = time.time()\n",
    "                    last_drawn_box[temp_handness] = box\n",
    "                    # 如果指向操作结束 轨迹列表有至少四个点的时候\n",
    "                    # 使用最小包围图形将画的不规则图案调整为一个矩形\n",
    "\n",
    "                start_drag_time[temp_handness] = 0\n",
    "                trajectory[temp_handness].clear()\n",
    "\n",
    "            for i in range(1, len(trajectory[temp_handness])):\n",
    "\n",
    "                pt1 = (int(trajectory[temp_handness][i-1][0]), int(trajectory[temp_handness][i-1][1]))\n",
    "                pt2 = (int(trajectory[temp_handness][i][0]), int(trajectory[temp_handness][i][1]))\n",
    "                cv2.line(image, pt1, pt2, (0, 0, 255), 2)\n",
    "                # 绘制连接轨迹点的线\n",
    "\n",
    "            if last_drawn_box[temp_handness] is not None:\n",
    "                elapsed_time[temp_handness] = time.time() - rect_draw_time[temp_handness]\n",
    "                \n",
    "                if elapsed_time[temp_handness] < wait_box:\n",
    "                    cv2.drawContours(image, [last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)\n",
    "                    # 将矩形框保留一段时间 否则一帧太快 无法看清效果\n",
    "                    \n",
    "                elif elapsed_time[temp_handness] >= wait_box - 0.1:\n",
    "                    \n",
    "                    box = last_drawn_box[temp_handness]\n",
    "                    x_min = max(0, min(box[:, 0]))\n",
    "                    y_min = max(0, min(box[:, 1]))\n",
    "                    x_max = min(image.shape[1], max(box[:, 0]))\n",
    "                    y_max = min(image.shape[0], max(box[:, 1]))\n",
    "                    cropped_image = image[y_min:y_max, x_min:x_max]\n",
    "                    filename = f\"../image/cropped_{temp_handness}_{int(time.time())}.jpg\"\n",
    "                    cv2.imwrite(filename, cropped_image)\n",
    "                    last_drawn_box[temp_handness] = None\n",
    "                # 不能直接剪裁画完的图像 可能会错误的将手剪裁进去\n",
    "                # 等待一段时间 有一个给手缓冲移动走的时间再将这一帧里的矩形提取出来\n",
    "                        \n",
    "            for i in range(21):\n",
    "                \n",
    "                cx = int(hand_21.landmark[i].x * width)\n",
    "                cy = int(hand_21.landmark[i].y * height)\n",
    "                cz = hand_21.landmark[i].z\n",
    "                depth_z = cz0 - cz\n",
    "                radius = max(int(6 * (1 + depth_z*5)), 0)\n",
    "                \n",
    "                if i == 0:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)\n",
    "                if i == 8:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)\n",
    "                    index_finger_tip_str += '{}:{:.2f}, '.format(hand_idx, depth_z)\n",
    "                if i in [1,5,9,13,17]: \n",
    "                    image = cv2.circle(image, (cx, cy), radius,  (0, 0, 255), thickness=-1)\n",
    "                if i in [2,6,10,14,18]:\n",
    "                    image = cv2.circle(image, (cx, cy), radius,  (75, 0, 130), thickness=-1)\n",
    "                if i in [3,7,11,15,19]:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)\n",
    "                if i in [4,12,16,20]:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)\n",
    "                # 提取出每一个关节点 赋予对应的颜色和根据掌根的深度\n",
    "        \n",
    "        scaler= 1\n",
    "        image = cv2.putText(image,handness_str, (25*scaler, 100*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)\n",
    "        image = cv2.putText(image,index_finger_tip_str, (25*scaler, 150*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)\n",
    "\n",
    "        spend_time = time.time() - start_time\n",
    "        if spend_time > 0:\n",
    "            FPS = 1.0 / spend_time\n",
    "        else:\n",
    "            FPS = 0\n",
    "            \n",
    "        image = cv2.putText(image,'FPS '+str(int(FPS)),(25*scaler,50*scaler),cv2.FONT_HERSHEY_SIMPLEX,1.25*scaler,(0,0,255),2,)\n",
    "        # 显示FPS 检测到的手和食指指尖对于掌根的深度值\n",
    "    \n",
    "    else:\n",
    "        clear_hand_states()\n",
    "        # 如果没检测到手就清空全部信息\n",
    "    \n",
    "    return image"
   ],
   "id": "51ff809ecaf1f899",
   "outputs": [],
   "execution_count": 6
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:19:32.248575Z",
     "start_time": "2024-09-07T05:11:28.934663Z"
    }
   },
   "cell_type": "code",
   "source": [
    "cap = cv2.VideoCapture(1)\n",
    "cap.open(0)\n",
    "\n",
    "while cap.isOpened():\n",
    "    success, frame = cap.read()\n",
    "    if not success:\n",
    "        print(\"Camera Error\")\n",
    "        break\n",
    "      \n",
    "    frame = process_image(frame)\n",
    "    cv2.imshow('Video', frame)\n",
    "    \n",
    "    if cv2.waitKey(1) & 0xFF == ord('q'):\n",
    "        break\n",
    "        \n",
    "cap.release()\n",
    "cv2.destroyAllWindows()  "
   ],
   "id": "b7ce23e80ed36041",
   "outputs": [],
   "execution_count": 7
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "",
   "id": "10fca4bc34a944ea"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/utils_zh/hand_gesture.py
+++ b/utils_zh/hand_gesture.py
@ -0,0 +1,56 @@
 import cv2
 from model import HandTracker
 from index_finger import IndexFingerHandler
 from gesture_data import HandState
 from kalman_filter import KalmanHandler
 from utils_zh.finger_drawer import FingerDrawer

 class HandGestureHandler:
    def __init__(self):
        self.hand_state = HandState()
        self.kalman_handler = KalmanHandler()
        self.hand_tracker = HandTracker()
        self.index_handler = IndexFingerHandler(self.hand_state, self.kalman_handler)

    def handle_hand_gestures(self, image, width, height, is_video):
        results = self.hand_tracker.process(image)

        if results.multi_hand_landmarks:
            handness_str = ''
            index_finger_tip_str = ''

            if len(results.multi_hand_landmarks) == 1:
                detected_hand = results.multi_handedness[0].classification[0].label
                self.hand_state.clear_hand_states(detected_hand)
                # 如果只检测到了一只手 那么就清空另一只手的信息 以免第二只手出现的时候数据冲突

            for hand_idx, hand_21 in enumerate(results.multi_hand_landmarks):
                self.hand_tracker.mp_drawing.draw_landmarks(
                    image, hand_21, self.hand_tracker.mp_hands.HAND_CONNECTIONS
                )
                # 绘制手部关键点连接

                temp_handness = results.multi_handedness[hand_idx].classification[0].label
                handness_str += f'{hand_idx}:{temp_handness}, '
                self.hand_state.is_index_finger_up[temp_handness] = False

                image = self.index_handler.handle_index_finger(
                    image, hand_21, temp_handness, width, height
                )
                # 处理食指

                image, index_finger_tip_str = FingerDrawer.draw_finger_points(image, hand_21, temp_handness, width, height)

            if is_video:
                image = cv2.flip(image, 1)
            image = cv2.putText(image, handness_str, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
            image = cv2.putText(image, index_finger_tip_str, (25, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
        else:
            if is_video:
                image = cv2.flip(image, 1)
                # 如果是后置摄像头的输入视频，则需要在处理前翻转图像，确保手势检测的左右手正确；
                # 处理完毕后再翻转回来，以防止最终输出的图像出现镜像错误。
            self.hand_state.clear_hand_states()
            # 如果未检测到手 则清空手部状态

        return image
--- a/utils_zh/index_finger.py
+++ b/utils_zh/index_finger.py
@ -0,0 +1,112 @@
 import cv2
 import time
 import numpy as np

 class IndexFingerHandler:
    def __init__(self, hand_state, kalman_handler):
        self.hand_state = hand_state
        self.kalman_handler = kalman_handler
        self.wait_time = 1.5
        self.kalman_wait_time = 0.5
        self.wait_box = 2

    def handle_index_finger(self, image, hand_21, temp_handness, width, height):

        cz0 = hand_21.landmark[0].z
        self.hand_state.index_finger_second[temp_handness] = hand_21.landmark[7]
        self.hand_state.index_finger_tip[temp_handness] = hand_21.landmark[8]

        index_x = int(self.hand_state.index_finger_tip[temp_handness].x * width)
        index_y = int(self.hand_state.index_finger_tip[temp_handness].y * height)

        self.update_index_finger_state(hand_21, temp_handness, index_x, index_y)
        self.draw_index_finger_gesture(image, temp_handness, index_x, index_y, cz0)

        return image
        # 处理食指的状态和手势效果，并更新图像

    def update_index_finger_state(self, hand_21, temp_handness, index_x, index_y):

        if all(self.hand_state.index_finger_second[temp_handness].y < hand_21.landmark[i].y
               for i in range(21) if i not in [7, 8]) and \
                self.hand_state.index_finger_tip[temp_handness].y < self.hand_state.index_finger_second[temp_handness].y:
            self.hand_state.is_index_finger_up[temp_handness] = True
            # 如果食指指尖和第一个关节都大于其他关键点 则判定为食指抬起

        if self.hand_state.is_index_finger_up[temp_handness]:
            if not self.hand_state.gesture_locked[temp_handness]:
                if self.hand_state.gesture_start_time[temp_handness] == 0:
                    self.hand_state.gesture_start_time[temp_handness] = time.time()
                elif time.time() - self.hand_state.gesture_start_time[temp_handness] > self.wait_time:
                    self.hand_state.dragging[temp_handness] = True
                    self.hand_state.gesture_locked[temp_handness] = True
                    self.hand_state.drag_point[temp_handness] = (index_x, index_y)
                    # 如果食指指向操作已经超过了等待的时间 则设定为正式进行指向操作
                self.hand_state.buffer_start_time[temp_handness] = 0
                # 防止识别错误导致指向操作迅速中断的缓冲时间
        else:
            if self.hand_state.buffer_start_time[temp_handness] == 0:
                self.hand_state.buffer_start_time[temp_handness] = time.time()
            elif time.time() - self.hand_state.buffer_start_time[temp_handness] > self.hand_state.buffer_duration[temp_handness]:
                self.hand_state.gesture_start_time[temp_handness] = 0
                self.hand_state.gesture_locked[temp_handness] = False
                self.hand_state.dragging[temp_handness] = False
                # 如果食指指向操作的中断时间已经超过了设定的缓冲时间 则正式终断

    def draw_index_finger_gesture(self, image, temp_handness, index_x, index_y, cz0):

        if self.hand_state.dragging[temp_handness]:
            if self.hand_state.start_drag_time[temp_handness] == 0:
                self.hand_state.start_drag_time[temp_handness] = time.time()
                self.kalman_handler.reset_kalman_filter(temp_handness, index_x, index_y)
                # 如果是首次操作 则记录时间并重置kalman滤波器

            smooth_x, smooth_y = self.kalman_handler.kalman_filter_point(temp_handness, index_x, index_y)
            # 使用kalman滤波器平滑生成的轨迹 减少噪声和抖动

            self.hand_state.drag_point[temp_handness] = (index_x, index_y)
            index_finger_radius = max(int(10 * (1 + (cz0 - self.hand_state.index_finger_tip[temp_handness].z) * 5)), 0)
            cv2.circle(image, self.hand_state.drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)
            # 根据离掌根的距离同步调整圆圈大小 但是要比FingerDrawer的同比增大一些 可以看清是否锁定指向操作
            drag_point_smooth = (smooth_x, smooth_y)

            if time.time() - self.hand_state.start_drag_time[temp_handness] > self.kalman_wait_time:
                self.hand_state.trajectory[temp_handness].append(drag_point_smooth)
                # 因为滤波器初始化时需要时间稳定数据 所以等待其稳定后再将坐标点加到轨迹中
        else:
            if len(self.hand_state.trajectory[temp_handness]) > 4:
                contour = np.array(self.hand_state.trajectory[temp_handness], dtype=np.int32)
                rect = cv2.minAreaRect(contour)
                box = cv2.boxPoints(rect)
                box = np.int0(box)
                # 当拖拽点数大于4时则计算最小外接矩形
                self.hand_state.rect_draw_time[temp_handness] = time.time()
                self.hand_state.last_drawn_box[temp_handness] = box

            self.hand_state.start_drag_time[temp_handness] = 0
            self.hand_state.trajectory[temp_handness].clear()
            # 重置 清空

        for i in range(1, len(self.hand_state.trajectory[temp_handness])):
            pt1 = (int(self.hand_state.trajectory[temp_handness][i-1][0]), int(self.hand_state.trajectory[temp_handness][i-1][1]))
            pt2 = (int(self.hand_state.trajectory[temp_handness][i][0]), int(self.hand_state.trajectory[temp_handness][i][1]))
            cv2.line(image, pt1, pt2, (0, 0, 255), 2)
            # 绘制拖拽路径

        if self.hand_state.last_drawn_box[temp_handness] is not None:
            elapsed_time = time.time() - self.hand_state.rect_draw_time[temp_handness]
            if elapsed_time < self.wait_box:
                cv2.drawContours(image, [self.hand_state.last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)
                # 为了方便观测 需要保留显示包围框一定时间
            elif elapsed_time >= self.wait_box - 0.1:
                box = self.hand_state.last_drawn_box[temp_handness]
                x_min = max(0, min(box[:, 0]))
                y_min = max(0, min(box[:, 1]))
                x_max = min(image.shape[1], max(box[:, 0]))
                y_max = min(image.shape[0], max(box[:, 1]))
                cropped_image = image[y_min:y_max, x_min:x_max]
                filename = f"../image/cropped_{temp_handness}_{int(time.time())}.jpg"
                cv2.imwrite(filename, cropped_image)
                self.hand_state.last_drawn_box[temp_handness] = None
                # 因为如果画完包围框立即剪裁 很有可能把手错误的剪裁进去
                # 所以在包围框消失的前0.1秒剪裁 这样有足够的时间让手移走
--- a/utils_zh/kalman_filter.py
+++ b/utils_zh/kalman_filter.py
@ -0,0 +1,36 @@
 import numpy as np
 from filterpy.kalman import KalmanFilter

 class KalmanHandler:
    def __init__(self):
        self.kalman_filters = {
            'Left': KalmanFilter(dim_x=4, dim_z=2),
            'Right': KalmanFilter(dim_x=4, dim_z=2)
        }
        for key in self.kalman_filters:
            self.kalman_filters[key].x = np.array([0., 0., 0., 0.])
            self.kalman_filters[key].F = np.array([[1, 0, 1, 0],
                                                   [0, 1, 0, 1],
                                                   [0, 0, 1, 0],
                                                   [0, 0, 0, 1]])
            self.kalman_filters[key].H = np.array([[1, 0, 0, 0],
                                                   [0, 1, 0, 0]])
            self.kalman_filters[key].P *= 1000.
            self.kalman_filters[key].R = 3
            self.kalman_filters[key].Q = np.eye(4) * 0.01
            # 这些参数通过多次测试得出 表现较为稳定

    def kalman_filter_point(self, hand_label, x, y):

        kf = self.kalman_filters[hand_label]
        kf.predict()
        kf.update([x, y])
        # 更新状态
        return (kf.x[0], kf.x[1])

    def reset_kalman_filter(self, hand_label, x, y):

        kf = self.kalman_filters[hand_label]
        kf.x = np.array([x, y, 0., 0.])
        kf.P *= 1000.
        # 重置
--- a/utils_zh/model.py
+++ b/utils_zh/model.py
@ -0,0 +1,17 @@
 import mediapipe as mp

 class HandTracker:
    def __init__(self):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=1,
            # 一只会更稳定
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )
        self.mp_drawing = mp.solutions.drawing_utils

    def process(self, image):
        results = self.hands.process(image)
        return results
--- a/utils_zh/process_images.py
+++ b/utils_zh/process_images.py
@ -0,0 +1,24 @@
 import cv2
 import time
 from hand_gesture import HandGestureHandler

 class HandGestureProcessor:
    def __init__(self):
        self.hand_handler = HandGestureHandler()

    def process_image(self, image, is_video):

        start_time = time.time()
        height, width = image.shape[:2]
        image = cv2.flip(image, 1)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # 预处理传入的视频帧

        image = self.hand_handler.handle_hand_gestures(image, width, height, is_video)

        spend_time = time.time() - start_time
        FPS = 1.0 / spend_time if spend_time > 0 else 0
        image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
        # 计算并显示帧率

        return image
--- a/utils_zh/video_recognition.py
+++ b/utils_zh/video_recognition.py
@ -0,0 +1,65 @@
 import cv2
 from process_images import HandGestureProcessor
 from tkinter import messagebox
 from PIL import Image, ImageTk

 def start_camera(canvas):
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        return "Cannot open camera"

    gesture_processor = HandGestureProcessor()
    show_frame(canvas, cap, gesture_processor)

 def show_frame(canvas, cap, gesture_processor):
    success, frame = cap.read()
    if success:
        processed_frame = gesture_processor.process_image(frame,False)
        img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        canvas.imgtk = imgtk
        canvas.create_image(0, 0, anchor="nw", image=imgtk)
        # 对该帧进行处理并转换为RGB显示在画布上
        canvas.after(10, show_frame, canvas, cap, gesture_processor)
        # 实现循环调用 持续处理并显示后续的每一帧
    else:
        cap.release()
        cv2.destroyAllWindows()

 def upload_and_process_video(canvas, video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return "Cannot open video file"

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    # 获取视频的参数

    output_filename = "../video/processed_output.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height))
    # 设置输出视频文件路径和编码

    gesture_processor = HandGestureProcessor()
    process_video_frame(canvas, cap, gesture_processor, out)

 def process_video_frame(canvas, cap, gesture_processor, out):
    success, frame = cap.read()
    if success:
        processed_frame = gesture_processor.process_image(frame,True)
        out.write(processed_frame)

        img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        canvas.imgtk = imgtk
        canvas.create_image(0, 0, anchor="nw", image=imgtk)
        canvas.after(10, process_video_frame, canvas, cap, gesture_processor, out)
    else:
        cap.release()
        out.release()
        cv2.destroyAllWindows()
        messagebox.showinfo("Info", "Processed video saved as processed_output.avi")
        print("Processed video saved as processed_output.avi")