Browse Source

init_screenshot_project

screenshot
ydw 2 months ago
parent
commit
9fd5385071
43 changed files with 1880 additions and 0 deletions
  1. +21
    -0
      LICENSE
  2. BIN
      image/cropped_Left_1725688066.jpg
  3. +0
    -0
      main.py
  4. +108
    -0
      utils_en/GUI.py
  5. +0
    -0
      utils_en/__init__.py
  6. BIN
      utils_en/__pycache__/gesture_data.cpython-39.pyc
  7. BIN
      utils_en/__pycache__/hand_gesture.cpython-39.pyc
  8. BIN
      utils_en/__pycache__/index_finger.cpython-39.pyc
  9. BIN
      utils_en/__pycache__/kalman_filter.cpython-39.pyc
  10. BIN
      utils_en/__pycache__/model.cpython-39.pyc
  11. BIN
      utils_en/__pycache__/process_images.cpython-39.pyc
  12. BIN
      utils_en/__pycache__/video_recognition.cpython-39.pyc
  13. +35
    -0
      utils_en/finger_drawer.py
  14. +43
    -0
      utils_en/gesture_data.py
  15. +24
    -0
      utils_en/gesture_process.py
  16. +406
    -0
      utils_en/gesture_recognition.ipynb
  17. +61
    -0
      utils_en/hand_gesture.py
  18. +114
    -0
      utils_en/index_finger.py
  19. +36
    -0
      utils_en/kalman_filter.py
  20. +17
    -0
      utils_en/model.py
  21. +24
    -0
      utils_en/process_images.py
  22. +65
    -0
      utils_en/video_recognition.py
  23. +112
    -0
      utils_zh/GUI.py
  24. +0
    -0
      utils_zh/__init__.py
  25. BIN
      utils_zh/__pycache__/__init__.cpython-39.pyc
  26. BIN
      utils_zh/__pycache__/finger_drawer.cpython-39.pyc
  27. BIN
      utils_zh/__pycache__/gesture_data.cpython-39.pyc
  28. BIN
      utils_zh/__pycache__/hand_gesture.cpython-39.pyc
  29. BIN
      utils_zh/__pycache__/index_finger.cpython-39.pyc
  30. BIN
      utils_zh/__pycache__/kalman_filter.cpython-39.pyc
  31. BIN
      utils_zh/__pycache__/model.cpython-39.pyc
  32. BIN
      utils_zh/__pycache__/process_images.cpython-39.pyc
  33. BIN
      utils_zh/__pycache__/video_recognition.cpython-39.pyc
  34. +34
    -0
      utils_zh/finger_drawer.py
  35. +43
    -0
      utils_zh/gesture_data.py
  36. +24
    -0
      utils_zh/gesture_process.py
  37. +403
    -0
      utils_zh/gesture_recognition.ipynb
  38. +56
    -0
      utils_zh/hand_gesture.py
  39. +112
    -0
      utils_zh/index_finger.py
  40. +36
    -0
      utils_zh/kalman_filter.py
  41. +17
    -0
      utils_zh/model.py
  42. +24
    -0
      utils_zh/process_images.py
  43. +65
    -0
      utils_zh/video_recognition.py

+ 21
- 0
LICENSE View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 EzraZephyr
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

BIN
image/cropped_Left_1725688066.jpg View File

Before After
Width: 214  |  Height: 203  |  Size: 9.1 KiB

+ 0
- 0
main.py View File


+ 108
- 0
utils_en/GUI.py View File

@ -0,0 +1,108 @@
import cv2
import tkinter as tk
from tkinter import filedialog, messagebox
from video_recognition import start_camera, upload_and_process_video, show_frame
from process_images import HandGestureProcessor
current_mode = None
current_cap = None
# To track the current mode and camera resources
def create_gui():
root = tk.Tk()
root.title("Gesture Recognition")
root.geometry("800x600")
canvas = tk.Canvas(root, width=640, height=480)
canvas.pack()
# Create a canvas to display video content
camera_button = tk.Button(
root,
text="Use Camera for Real-time Recognition",
command=lambda: switch_to_camera(canvas)
)
camera_button.pack(pady=10)
# Button to start real-time recognition using the camera
video_button = tk.Button(
root,
text="Upload Video File for Processing",
command=lambda: select_and_process_video(canvas, root)
)
video_button.pack(pady=10)
# Button to upload and process video files
root.mainloop()
def switch_to_camera(canvas):
global current_mode, current_cap
stop_current_operation()
# Stop the current operation and release the camera
current_mode = "camera"
canvas.delete("all")
# Set the current mode to camera and clear the Canvas
current_cap = cv2.VideoCapture(0)
if not current_cap.isOpened():
messagebox.showerror("Error", "Cannot open camera")
current_mode = None
return
# Start the camera
start_camera(canvas, current_cap)
# Pass the canvas and current_cap to start the camera
def select_and_process_video(canvas, root):
global current_mode, current_cap
stop_current_operation()
current_mode = "video"
canvas.delete("all")
video_path = filedialog.askopenfilename(
title="Select a Video File",
filetypes=(("MP4 files", "*.mp4"), ("AVI files", "*.avi"), ("All files", "*.*"))
)
# Select a video file
if video_path:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
messagebox.showerror("Error", "Cannot open video file")
return
# Get video width and height, and adjust Canvas size
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
canvas.config(width=frame_width, height=frame_height)
root.geometry(f"{frame_width + 160}x{frame_height + 200}") # Adjust window size
# Get video dimensions and dynamically adjust the canvas size
error_message = upload_and_process_video(canvas, video_path)
if error_message:
messagebox.showerror("Error", error_message)
# Upload and process the video file
def stop_current_operation():
global current_cap
if current_cap and current_cap.isOpened():
current_cap.release()
cv2.destroyAllWindows()
current_cap = None
# Stop the current operation, release camera resources, and close all windows
def start_camera(canvas, cap):
if not cap.isOpened():
return "Cannot open camera"
gesture_processor = HandGestureProcessor()
show_frame(canvas, cap, gesture_processor)
# Start the camera for real-time gesture recognition
if __name__ == "__main__":
create_gui()

+ 0
- 0
utils_en/__init__.py View File


BIN
utils_en/__pycache__/gesture_data.cpython-39.pyc View File


BIN
utils_en/__pycache__/hand_gesture.cpython-39.pyc View File


BIN
utils_en/__pycache__/index_finger.cpython-39.pyc View File


BIN
utils_en/__pycache__/kalman_filter.cpython-39.pyc View File


BIN
utils_en/__pycache__/model.cpython-39.pyc View File


BIN
utils_en/__pycache__/process_images.cpython-39.pyc View File


BIN
utils_en/__pycache__/video_recognition.cpython-39.pyc View File


+ 35
- 0
utils_en/finger_drawer.py View File

@ -0,0 +1,35 @@
import cv2
class FingerDrawer:
@staticmethod
def draw_finger_points(image, hand_21, temp_handness, width, height):
cz0 = hand_21.landmark[0].z
index_finger_tip_str = ''
for i in range(21):
cx = int(hand_21.landmark[i].x * width)
cy = int(hand_21.landmark[i].y * height)
cz = hand_21.landmark[i].z
depth_z = cz0 - cz
radius = max(int(6 * (1 + depth_z * 5)), 0)
# Adjust the radius of the circle based on depth
if i == 0:
image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)
elif i == 8:
image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)
index_finger_tip_str += f'{temp_handness}:{depth_z:.2f}, '
elif i in [1, 5, 9, 13, 17]:
image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)
elif i in [2, 6, 10, 14, 18]:
image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)
elif i in [3, 7, 11, 15, 19]:
image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)
elif i in [4, 12, 16, 20]:
image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)
# Draw circles of different colors based on each group of joints and adjust according to the
# depth relative to the wrist
return image, index_finger_tip_str

+ 43
- 0
utils_en/gesture_data.py View File

@ -0,0 +1,43 @@
from collections import deque
class HandState:
def __init__(self):
self.gesture_locked = {'Left': False, 'Right': False}
self.gesture_start_time = {'Left': 0, 'Right': 0}
self.buffer_start_time = {'Left': 0, 'Right': 0}
self.start_drag_time = {'Left': 0, 'Right': 0}
self.dragging = {'Left': False, 'Right': False}
self.drag_point = {'Left': (0, 0), 'Right': (0, 0)}
self.buffer_duration = {'Left': 0.25, 'Right': 0.25}
self.is_index_finger_up = {'Left': False, 'Right': False}
self.index_finger_second = {'Left': 0, 'Right': 0}
self.index_finger_tip = {'Left': 0, 'Right': 0}
self.trajectory = {'Left': [], 'Right': []}
self.square_queue = deque()
self.wait_time = 1.5
self.kalman_wait_time = 0.5
self.wait_box = 2
self.rect_draw_time = {'Left': 0, 'Right': 0}
self.last_drawn_box = {'Left': None, 'Right': None}
def clear_hand_states(self, detected_hand='Both'):
hands_to_clear = {'Left', 'Right'}
if detected_hand == 'Both':
hands_to_clear = hands_to_clear
else:
hands_to_clear -= {detected_hand}
for h in hands_to_clear:
self.gesture_locked[h] = False
self.gesture_start_time[h] = 0
self.buffer_start_time[h] = 0
self.dragging[h] = False
self.drag_point[h] = (0, 0)
self.buffer_duration[h] = 0.25
self.is_index_finger_up[h] = False
self.trajectory[h].clear()
self.start_drag_time[h] = 0
self.rect_draw_time[h] = 0
self.last_drawn_box[h] = None
# Used to record information for the left and right hands separately to avoid data conflicts

+ 24
- 0
utils_en/gesture_process.py View File

@ -0,0 +1,24 @@
import cv2
import time
from hand_gesture import HandGestureHandler
class HandGestureProcessor:
def __init__(self):
self.hand_handler = HandGestureHandler()
def process_image(self, image):
start_time = time.time()
height, width = image.shape[:2]
image = cv2.flip(image, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Get image dimensions, flip, and convert color space
image = self.hand_handler.handle_hand_gestures(image, width, height)
spend_time = time.time() - start_time
FPS = 1.0 / spend_time if spend_time > 0 else 0
image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
# Calculate and display the frame rate
return image

+ 406
- 0
utils_en/gesture_recognition.ipynb View File

@ -0,0 +1,406 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-09-07T05:10:50.912839Z",
"start_time": "2024-09-07T05:10:44.776680Z"
}
},
"source": [
"import cv2\n",
"import time\n",
"import mediapipe\n",
"import numpy as np\n",
"from collections import deque\n",
"from filterpy.kalman import KalmanFilter"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:10:50.928940Z",
"start_time": "2024-09-07T05:10:50.913896Z"
}
},
"cell_type": "code",
"source": [
"gesture_locked = {'Left':False,'Right':False}\n",
"gesture_start_time = {'Left':0,'Right':0}\n",
"buffer_start_time = {'Left':0,'Right':0}\n",
"start_drag_time = {'Left':0,'Right':0}\n",
"dragging = {'Left':False,'Right':False}\n",
"drag_point = {'Left':(0, 0),'Right':(0, 0)}\n",
"buffer_duration = {'Left':0.25,'Right':0.25}\n",
"is_index_finger_up = {'Left':False,'Right':False}\n",
"index_finger_second = {'Left':0,'Right':0}\n",
"index_finger_tip = {'Left':0,'Right':0}\n",
"trajectory = {'Left':[],'Right':[]}\n",
"square_queue = deque()\n",
"wait_time = 1.5\n",
"kalman_wait_time = 0.5\n",
"wait_box = 2\n",
"rect_draw_time = {'Left':0,'Right':0}\n",
"last_drawn_box = {'Left':None,'Right':None}\n",
"elapsed_time = {'Left':0,'Right':0}"
],
"id": "40aada17ccd31fe",
"outputs": [],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:10:55.708038Z",
"start_time": "2024-09-07T05:10:55.691926Z"
}
},
"cell_type": "code",
"source": [
"def clear_hand_states(detected_hand='Both'):\n",
" global gesture_locked, gesture_start_time, buffer_start_time, dragging, drag_point, buffer_duration, is_index_finger_up, trajectory, wait_time, kalman_wait_time, start_drag_time, rect_draw_time, last_drawn_box, wait_box, elapsed_time\n",
"\n",
" hands_to_clear = {'Left', 'Right'}\n",
" if detected_hand == 'Both':\n",
" hands_to_clear = hands_to_clear\n",
" else:\n",
" hands_to_clear -= {detected_hand}\n",
" # Reverse check for left and right hands\n",
"\n",
" for h in hands_to_clear:\n",
" gesture_locked[h] = False\n",
" gesture_start_time[h] = 0\n",
" buffer_start_time[h] = 0\n",
" dragging[h] = False\n",
" drag_point[h] = (0, 0)\n",
" buffer_duration[h] = 0.25\n",
" is_index_finger_up[h] = False\n",
" trajectory[h].clear()\n",
" start_drag_time[h] = 0\n",
" rect_draw_time[h] = 0\n",
" last_drawn_box[h] = None\n",
" elapsed_time[h] = 0\n",
" # Clear states for hands that are not detected"
],
"id": "2ee9323bb1c25cc0",
"outputs": [],
"execution_count": 3
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:10:56.547939Z",
"start_time": "2024-09-07T05:10:56.532265Z"
}
},
"cell_type": "code",
"source": [
"kalman_filters = {\n",
" 'Left': KalmanFilter(dim_x=4, dim_z=2),\n",
" 'Right': KalmanFilter(dim_x=4, dim_z=2)\n",
"}\n",
"\n",
"for key in kalman_filters:\n",
" kalman_filters[key].x = np.array([0., 0., 0., 0.])\n",
" kalman_filters[key].F = np.array([[1, 0, 1, 0],\n",
" [0, 1, 0, 1],\n",
" [0, 0, 1, 0],\n",
" [0, 0, 0, 1]])\n",
" # State transition matrix\n",
" kalman_filters[key].H = np.array([[1, 0, 0, 0],\n",
" [0, 1, 0, 0]])\n",
" # Observation matrix\n",
" kalman_filters[key].P *= 1000.\n",
" kalman_filters[key].R = 3\n",
" kalman_filters[key].Q = np.eye(4) * 0.01\n",
"\n",
"def kalman_filter_point(hand_label, x, y):\n",
" kf = kalman_filters[hand_label]\n",
" kf.predict()\n",
" kf.update([x, y])\n",
" # Update state\n",
" return (kf.x[0], kf.x[1])\n",
"\n",
"def reset_kalman_filter(hand_label, x, y):\n",
" kf = kalman_filters[hand_label]\n",
" kf.x = np.array([x, y, 0., 0.])\n",
" kf.P *= 1000.\n",
" # Reset"
],
"id": "96cf431d2562e7d",
"outputs": [],
"execution_count": 4
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:10:57.253008Z",
"start_time": "2024-09-07T05:10:57.231898Z"
}
},
"cell_type": "code",
"source": [
"mp_hands = mediapipe.solutions.hands\n",
"\n",
"hands = mp_hands.Hands(\n",
" static_image_mode=False,\n",
" max_num_hands=2,\n",
" # One hand is more stable\n",
" min_detection_confidence=0.5,\n",
" min_tracking_confidence=0.5\n",
")\n",
"\n",
"mp_drawing = mediapipe.solutions.drawing_utils\n",
"clear_hand_states()"
],
"id": "edc274b7ed495122",
"outputs": [],
"execution_count": 5
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:10:58.920644Z",
"start_time": "2024-09-07T05:10:58.881367Z"
}
},
"cell_type": "code",
"source": [
"def process_image(image):\n",
"\n",
" start_time = time.time()\n",
" height, width = image.shape[:2]\n",
" image = cv2.flip(image, 1)\n",
" image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
" # Preprocess the input frame\n",
"\n",
" results = hands.process(image)\n",
"\n",
" if results.multi_hand_landmarks:\n",
" # If hands are detected\n",
"\n",
" handness_str = ''\n",
" index_finger_tip_str = ''\n",
"\n",
" if len(results.multi_hand_landmarks) == 1:\n",
" clear_hand_states(detected_hand=results.multi_handedness[0].classification[0].label)\n",
" # If only one hand is detected, clear the data of the other hand to avoid conflicts that could cause instability.\n",
"\n",
" for hand_idx in range(len(results.multi_hand_landmarks)):\n",
"\n",
" hand_21 = results.multi_hand_landmarks[hand_idx]\n",
" mp_drawing.draw_landmarks(image, hand_21, mp_hands.HAND_CONNECTIONS)\n",
"\n",
" temp_handness = results.multi_handedness[hand_idx].classification[0].label\n",
" handness_str += '{}:{}, '.format(hand_idx, temp_handness)\n",
" is_index_finger_up[temp_handness] = False\n",
" # Set to False first to prevent incorrect updates to raised when lowered\n",
"\n",
" cz0 = hand_21.landmark[0].z\n",
" index_finger_second[temp_handness] = hand_21.landmark[7]\n",
" index_finger_tip[temp_handness] = hand_21.landmark[8]\n",
" # Index fingertip and first joint\n",
"\n",
" index_x, index_y = int(index_finger_tip[temp_handness].x * width), int(index_finger_tip[temp_handness].y * height)\n",
"\n",
" if all(index_finger_second[temp_handness].y < hand_21.landmark[i].y for i in range(21) if i not in [7, 8]) and index_finger_tip[temp_handness].y < index_finger_second[temp_handness].y:\n",
" is_index_finger_up[temp_handness] = True\n",
" # If the fingertip and second joint are higher than all other keypoints on the hand, consider it as a \"pointing\" gesture. \n",
"\n",
" if is_index_finger_up[temp_handness]:\n",
" if not gesture_locked[temp_handness]:\n",
" if gesture_start_time[temp_handness] == 0:\n",
" gesture_start_time[temp_handness] = time.time()\n",
" # Record the time when the index finger is raised\n",
" elif time.time() - gesture_start_time[temp_handness] > wait_time:\n",
" dragging[temp_handness] = True\n",
" gesture_locked[temp_handness] = True\n",
" drag_point[temp_handness] = (index_x, index_y)\n",
" # If the index finger is raised for longer than the set wait time, it is considered a \"pointing\" gesture.\n",
" buffer_start_time[temp_handness] = 0\n",
" # Refresh the buffer time whenever the index finger is raised\n",
" else:\n",
" if buffer_start_time[temp_handness] == 0:\n",
" buffer_start_time[temp_handness] = time.time()\n",
" elif time.time() - buffer_start_time[temp_handness] > buffer_duration[temp_handness]:\n",
" gesture_start_time[temp_handness] = 0\n",
" gesture_locked[temp_handness] = False\n",
" dragging[temp_handness] = False\n",
" # If the buffer time exceeds the set limit, it indicates the end of the pointing gesture.\n",
" # This prevents incorrect clearing of the pointing gesture due to recognition errors in a single frame.\n",
"\n",
" if dragging[temp_handness]:\n",
"\n",
" if start_drag_time[temp_handness] == 0:\n",
" start_drag_time[temp_handness] = time.time()\n",
" reset_kalman_filter(temp_handness, index_x, index_y)\n",
" # Initialize the filter whenever a line is drawn\n",
"\n",
" smooth_x, smooth_y = kalman_filter_point(temp_handness, index_x, index_y)\n",
" drag_point[temp_handness] = (index_x, index_y)\n",
" index_finger_radius = max(int(10 * (1 + (cz0 - index_finger_tip[temp_handness].z) * 5)), 0)\n",
" cv2.circle(image, drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)\n",
" # Create a circle based on the depth distance from the wrist root\n",
" # This is used to show that the pointing gesture has started\n",
" # The corresponding depth points below are scaled directly\n",
" drag_point_smooth = (smooth_x, smooth_y)\n",
"\n",
" if time.time() - start_drag_time[temp_handness] > kalman_wait_time:\n",
" trajectory[temp_handness].append(drag_point_smooth)\n",
" # The Kalman filter can be very unstable when initialized, with significant noise in the first few frames\n",
" # Wait until the first few frames have run before adding the coordinates to the trajectory list.\n",
" else:\n",
" if len(trajectory[temp_handness]) > 4:\n",
" contour = np.array(trajectory[temp_handness], dtype=np.int32)\n",
" rect = cv2.minAreaRect(contour)\n",
" box = cv2.boxPoints(rect)\n",
" box = np.int0(box)\n",
" rect_draw_time[temp_handness] = time.time()\n",
" last_drawn_box[temp_handness] = box\n",
" # If the pointing gesture ends and there are at least four points in the trajectory list,\n",
" # Use the minimum bounding box to adjust the irregular drawing to a rectangle.\n",
"\n",
" start_drag_time[temp_handness] = 0\n",
" trajectory[temp_handness].clear()\n",
"\n",
" for i in range(1, len(trajectory[temp_handness])):\n",
"\n",
" pt1 = (int(trajectory[temp_handness][i-1][0]), int(trajectory[temp_handness][i-1][1]))\n",
" pt2 = (int(trajectory[temp_handness][i][0]), int(trajectory[temp_handness][i][1]))\n",
" cv2.line(image, pt1, pt2, (0, 0, 255), 2)\n",
" # Draw lines connecting trajectory points\n",
"\n",
" if last_drawn_box[temp_handness] is not None:\n",
" elapsed_time[temp_handness] = time.time() - rect_draw_time[temp_handness]\n",
"\n",
" if elapsed_time[temp_handness] < wait_box:\n",
" cv2.drawContours(image, [last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)\n",
" # Keep the rectangle visible for a while, otherwise, it's too fast to observe.\n",
"\n",
" elif elapsed_time[temp_handness] >= wait_box - 0.1:\n",
"\n",
" box = last_drawn_box[temp_handness]\n",
" x_min = max(0, min(box[:, 0]))\n",
" y_min = max(0, min(box[:, 1]))\n",
" x_max = min(image.shape[1], max(box[:, 0]))\n",
" y_max = min(image.shape[0], max(box[:, 1]))\n",
" cropped_image = image[y_min:y_max, x_min:x_max]\n",
" filename = f\"../image/cropped_{temp_handness}_{int(time.time())}.jpg\"\n",
" cv2.imwrite(filename, cropped_image)\n",
" last_drawn_box[temp_handness] = None\n",
" # The drawn image cannot be cropped immediately, as it might wrongly crop the hand into it.\n",
" # Wait a while to give the hand time to move away before extracting the rectangle from this frame.\n",
"\n",
" for i in range(21):\n",
"\n",
" cx = int(hand_21.landmark[i].x * width)\n",
" cy = int(hand_21.landmark[i].y * height)\n",
" cz = hand_21.landmark[i].z\n",
" depth_z = cz0 - cz\n",
" radius = max(int(6 * (1 + depth_z * 5)), 0)\n",
"\n",
" if i == 0:\n",
" image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)\n",
" if i == 8:\n",
" image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)\n",
" index_finger_tip_str += '{}:{:.2f}, '.format(hand_idx, depth_z)\n",
" if i in [1, 5, 9, 13, 17]:\n",
" image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)\n",
" if i in [2, 6, 10, 14, 18]:\n",
" image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)\n",
" if i in [3, 7, 11, 15, 19]:\n",
" image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)\n",
" if i in [4, 12, 16, 20]:\n",
" image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)\n",
" # Extract each keypoint, assign corresponding colors, and set depth based on the wrist root.\n",
"\n",
" scaler = 1\n",
" image = cv2.putText(image, handness_str, (25 * scaler, 100 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (0, 0, 255), 2)\n",
" image = cv2.putText(image, index_finger_tip_str, (25 * scaler, 150 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (0, 0, 255), 2,)\n",
"\n",
" spend_time = time.time() - start_time\n",
" if spend_time > 0:\n",
" FPS = 1.0 / spend_time\n",
" else:\n",
" FPS = 0\n",
"\n",
" image = cv2.putText(image, 'FPS ' + str(int(FPS)), (25 * scaler, 50 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (0, 0, 255), 2,)\n",
" # Display FPS, detected hands, and the depth value of the index fingertip relative to the wrist root.\n",
"\n",
" else:\n",
" clear_hand_states()\n",
" # If no hands are detected, clear all information.\n",
"\n",
" return image"
],
"id": "51ff809ecaf1f899",
"outputs": [],
"execution_count": 6
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:15.392765Z",
"start_time": "2024-09-07T05:10:59.535594Z"
}
},
"cell_type": "code",
"source": [
"cap = cv2.VideoCapture(1)\n",
"cap.open(0)\n",
"\n",
"while cap.isOpened():\n",
" success, frame = cap.read()\n",
" if not success:\n",
" print(\"Camera Error\")\n",
" break\n",
"\n",
" frame = process_image(frame)\n",
" cv2.imshow('Video', frame)\n",
"\n",
" if cv2.waitKey(1) & 0xFF == ord('q'):\n",
" break\n",
"\n",
"cap.release()\n",
"cv2.destroyAllWindows() "
],
"id": "b7ce23e80ed36041",
"outputs": [],
"execution_count": 7
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "1102d2fc75310c6e"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

+ 61
- 0
utils_en/hand_gesture.py View File

@ -0,0 +1,61 @@
import cv2
from model import HandTracker
from index_finger import IndexFingerHandler
from gesture_data import HandState
from kalman_filter import KalmanHandler
from utils_zh.finger_drawer import FingerDrawer
class HandGestureHandler:
def __init__(self):
self.hand_state = HandState()
self.kalman_handler = KalmanHandler()
self.hand_tracker = HandTracker()
self.index_handler = IndexFingerHandler(self.hand_state, self.kalman_handler)
def handle_hand_gestures(self, image, width, height, is_video):
results = self.hand_tracker.process(image)
if results.multi_hand_landmarks:
handness_str = ''
index_finger_tip_str = ''
if len(results.multi_hand_landmarks) == 1:
detected_hand = results.multi_handedness[0].classification[0].label
self.hand_state.clear_hand_states(detected_hand)
# If only one hand is detected, clear the information of the other hand
# to prevent data conflicts when the second hand appears.
for hand_idx, hand_21 in enumerate(results.multi_hand_landmarks):
self.hand_tracker.mp_drawing.draw_landmarks(
image, hand_21, self.hand_tracker.mp_hands.HAND_CONNECTIONS
)
# Draw the connections of hand keypoints
temp_handness = results.multi_handedness[hand_idx].classification[0].label
handness_str += f'{hand_idx}:{temp_handness}, '
self.hand_state.is_index_finger_up[temp_handness] = False
image = self.index_handler.handle_index_finger(
image, hand_21, temp_handness, width, height
)
# Handle the index finger
image, index_finger_tip_str = FingerDrawer.draw_finger_points(
image, hand_21, temp_handness, width, height
)
if is_video:
image = cv2.flip(image, 1)
image = cv2.putText(image, handness_str, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
image = cv2.putText(image, index_finger_tip_str, (25, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
else:
if is_video:
image = cv2.flip(image, 1)
# If it's input video from a rear-facing camera, flip the image before processing
# to ensure correct left and right hand detection, and flip it back afterward
# to prevent mirrored output errors.
self.hand_state.clear_hand_states()
# Clear hand states if no hands are detected
return image

+ 114
- 0
utils_en/index_finger.py View File

@ -0,0 +1,114 @@
import cv2
import time
import numpy as np
class IndexFingerHandler:
def __init__(self, hand_state, kalman_handler):
self.hand_state = hand_state
self.kalman_handler = kalman_handler
self.wait_time = 1.5
self.kalman_wait_time = 0.5
self.wait_box = 2
def handle_index_finger(self, image, hand_21, temp_handness, width, height):
cz0 = hand_21.landmark[0].z
self.hand_state.index_finger_second[temp_handness] = hand_21.landmark[7]
self.hand_state.index_finger_tip[temp_handness] = hand_21.landmark[8]
index_x = int(self.hand_state.index_finger_tip[temp_handness].x * width)
index_y = int(self.hand_state.index_finger_tip[temp_handness].y * height)
self.update_index_finger_state(hand_21, temp_handness, index_x, index_y)
self.draw_index_finger_gesture(image, temp_handness, index_x, index_y, cz0)
return image
# Handle the index finger's state and gesture effect, and update the image
def update_index_finger_state(self, hand_21, temp_handness, index_x, index_y):
if all(self.hand_state.index_finger_second[temp_handness].y < hand_21.landmark[i].y
for i in range(21) if i not in [7, 8]) and \
self.hand_state.index_finger_tip[temp_handness].y < self.hand_state.index_finger_second[temp_handness].y:
self.hand_state.is_index_finger_up[temp_handness] = True
# If both the index fingertip and first joint are above other keypoints,
# consider the index finger as raised.
if self.hand_state.is_index_finger_up[temp_handness]:
if not self.hand_state.gesture_locked[temp_handness]:
if self.hand_state.gesture_start_time[temp_handness] == 0:
self.hand_state.gesture_start_time[temp_handness] = time.time()
elif time.time() - self.hand_state.gesture_start_time[temp_handness] > self.wait_time:
self.hand_state.dragging[temp_handness] = True
self.hand_state.gesture_locked[temp_handness] = True
self.hand_state.drag_point[temp_handness] = (index_x, index_y)
# If the pointing gesture has lasted longer than the wait time, confirm the pointing action.
self.hand_state.buffer_start_time[temp_handness] = 0
# Buffer time to prevent immediate interruption due to recognition errors.
else:
if self.hand_state.buffer_start_time[temp_handness] == 0:
self.hand_state.buffer_start_time[temp_handness] = time.time()
elif time.time() - self.hand_state.buffer_start_time[temp_handness] > self.hand_state.buffer_duration[temp_handness]:
self.hand_state.gesture_start_time[temp_handness] = 0
self.hand_state.gesture_locked[temp_handness] = False
self.hand_state.dragging[temp_handness] = False
# If the interruption time of the pointing gesture exceeds the set buffer duration, formally terminate.
def draw_index_finger_gesture(self, image, temp_handness, index_x, index_y, cz0):
if self.hand_state.dragging[temp_handness]:
if self.hand_state.start_drag_time[temp_handness] == 0:
self.hand_state.start_drag_time[temp_handness] = time.time()
self.kalman_handler.reset_kalman_filter(temp_handness, index_x, index_y)
# If it's the first operation, record the time and reset the Kalman filter.
smooth_x, smooth_y = self.kalman_handler.kalman_filter_point(temp_handness, index_x, index_y)
# Use the Kalman filter to smooth the generated trajectory, reducing noise and jitter.
self.hand_state.drag_point[temp_handness] = (index_x, index_y)
index_finger_radius = max(int(10 * (1 + (cz0 - self.hand_state.index_finger_tip[temp_handness].z) * 5)), 0)
cv2.circle(image, self.hand_state.drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)
# Adjust the circle size based on the distance from the wrist root, slightly larger than FingerDrawer for visibility during gesture lock.
drag_point_smooth = (smooth_x, smooth_y)
if time.time() - self.hand_state.start_drag_time[temp_handness] > self.kalman_wait_time:
self.hand_state.trajectory[temp_handness].append(drag_point_smooth)
# Wait for the Kalman filter to stabilize data before adding coordinates to the trajectory.
else:
if len(self.hand_state.trajectory[temp_handness]) > 4:
contour = np.array(self.hand_state.trajectory[temp_handness], dtype=np.int32)
rect = cv2.minAreaRect(contour)
box = cv2.boxPoints(rect)
box = np.int0(box)
# Calculate the minimum enclosing rectangle when the drag points exceed 4.
self.hand_state.rect_draw_time[temp_handness] = time.time()
self.hand_state.last_drawn_box[temp_handness] = box
self.hand_state.start_drag_time[temp_handness] = 0
self.hand_state.trajectory[temp_handness].clear()
# Reset and clear
for i in range(1, len(self.hand_state.trajectory[temp_handness])):
pt1 = (int(self.hand_state.trajectory[temp_handness][i-1][0]), int(self.hand_state.trajectory[temp_handness][i-1][1]))
pt2 = (int(self.hand_state.trajectory[temp_handness][i][0]), int(self.hand_state.trajectory[temp_handness][i][1]))
cv2.line(image, pt1, pt2, (0, 0, 255), 2)
# Draw the drag path
if self.hand_state.last_drawn_box[temp_handness] is not None:
elapsed_time = time.time() - self.hand_state.rect_draw_time[temp_handness]
if elapsed_time < self.wait_box:
cv2.drawContours(image, [self.hand_state.last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)
# Keep the bounding box visible for a set period for easier observation.
elif elapsed_time >= self.wait_box - 0.1:
box = self.hand_state.last_drawn_box[temp_handness]
x_min = max(0, min(box[:, 0]))
y_min = max(0, min(box[:, 1]))
x_max = min(image.shape[1], max(box[:, 0]))
y_max = min(image.shape[0], max(box[:, 1]))
cropped_image = image[y_min:y_max, x_min:x_max]
filename = f"../image/cropped_{temp_handness}_{int(time.time())}.jpg"
cv2.imwrite(filename, cropped_image)
self.hand_state.last_drawn_box[temp_handness] = None
# To avoid accidentally cropping the hand into the bounding box,
# perform the crop in the last 0.1 seconds before the box disappears,
# giving enough time for the hand to move away.

+ 36
- 0
utils_en/kalman_filter.py View File

@ -0,0 +1,36 @@
import numpy as np
from filterpy.kalman import KalmanFilter
class KalmanHandler:
def __init__(self):
self.kalman_filters = {
'Left': KalmanFilter(dim_x=4, dim_z=2),
'Right': KalmanFilter(dim_x=4, dim_z=2)
}
for key in self.kalman_filters:
self.kalman_filters[key].x = np.array([0., 0., 0., 0.])
self.kalman_filters[key].F = np.array([[1, 0, 1, 0],
[0, 1, 0, 1],
[0, 0, 1, 0],
[0, 0, 0, 1]])
self.kalman_filters[key].H = np.array([[1, 0, 0, 0],
[0, 1, 0, 0]])
self.kalman_filters[key].P *= 1000.
self.kalman_filters[key].R = 3
self.kalman_filters[key].Q = np.eye(4) * 0.01
# These parameters were obtained through multiple tests and have shown stable performance.
def kalman_filter_point(self, hand_label, x, y):
kf = self.kalman_filters[hand_label]
kf.predict()
kf.update([x, y])
# Update state
return (kf.x[0], kf.x[1])
def reset_kalman_filter(self, hand_label, x, y):
kf = self.kalman_filters[hand_label]
kf.x = np.array([x, y, 0., 0.])
kf.P *= 1000.
# Reset

+ 17
- 0
utils_en/model.py View File

@ -0,0 +1,17 @@
import mediapipe as mp
class HandTracker:
def __init__(self):
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(
static_image_mode=False,
max_num_hands=1,
# Setting it to one would be more stable
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
self.mp_drawing = mp.solutions.drawing_utils
def process(self, image):
results = self.hands.process(image)
return results

+ 24
- 0
utils_en/process_images.py View File

@ -0,0 +1,24 @@
import cv2
import time
from hand_gesture import HandGestureHandler
class HandGestureProcessor:
def __init__(self):
self.hand_handler = HandGestureHandler()
def process_image(self, image, is_video):
start_time = time.time()
height, width = image.shape[:2]
image = cv2.flip(image, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Preprocess the incoming video frame
image = self.hand_handler.handle_hand_gestures(image, width, height, is_video)
spend_time = time.time() - start_time
FPS = 1.0 / spend_time if spend_time > 0 else 0
image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
# Calculate and display the frame rate
return image

+ 65
- 0
utils_en/video_recognition.py View File

@ -0,0 +1,65 @@
import cv2
from process_images import HandGestureProcessor
from tkinter import messagebox
from PIL import Image, ImageTk
def start_camera(canvas):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
return "Cannot open camera"
gesture_processor = HandGestureProcessor()
show_frame(canvas, cap, gesture_processor)
def show_frame(canvas, cap, gesture_processor):
success, frame = cap.read()
if success:
processed_frame = gesture_processor.process_image(frame,False)
img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
imgtk = ImageTk.PhotoImage(image=img)
canvas.imgtk = imgtk
canvas.create_image(0, 0, anchor="nw", image=imgtk)
# 对该帧进行处理并转换为RGB显示在画布上
canvas.after(10, show_frame, canvas, cap, gesture_processor)
# 实现循环调用 持续处理并显示后续的每一帧
else:
cap.release()
cv2.destroyAllWindows()
def upload_and_process_video(canvas, video_path):
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return "Cannot open video file"
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
# 获取视频的参数
output_filename = "../video/processed_output.mp4"
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height))
# 设置输出视频文件路径和编码
gesture_processor = HandGestureProcessor()
process_video_frame(canvas, cap, gesture_processor, out)
def process_video_frame(canvas, cap, gesture_processor, out):
success, frame = cap.read()
if success:
processed_frame = gesture_processor.process_image(frame,True)
out.write(processed_frame)
img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
imgtk = ImageTk.PhotoImage(image=img)
canvas.imgtk = imgtk
canvas.create_image(0, 0, anchor="nw", image=imgtk)
canvas.after(10, process_video_frame, canvas, cap, gesture_processor, out)
else:
cap.release()
out.release()
cv2.destroyAllWindows()
messagebox.showinfo("Info", "Processed video saved as processed_output.avi")
print("Processed video saved as processed_output.avi")

+ 112
- 0
utils_zh/GUI.py View File

@ -0,0 +1,112 @@
import cv2
import tkinter as tk
from tkinter import filedialog, messagebox
from video_recognition import start_camera, upload_and_process_video, show_frame
from process_images import HandGestureProcessor
current_mode = None
current_cap = None
# 用于追踪当前模式和摄像头资源
def create_gui():
root = tk.Tk()
root.title("Gesture Recognition")
root.geometry("800x600")
canvas = tk.Canvas(root, width=640, height=480)
canvas.pack()
# 创建显示视频内容的画布
camera_button = tk.Button(
root,
text="Use Camera for Real-time Recognition",
command=lambda: switch_to_camera(canvas)
)
camera_button.pack(pady=10)
# 启动摄像头实时识别的按钮
video_button = tk.Button(
root,
text="Upload Video File for Processing",
command=lambda: select_and_process_video(canvas, root)
)
video_button.pack(pady=10)
# 上传并处理视频文件的按钮
root.mainloop()
def switch_to_camera(canvas):
global current_mode, current_cap
stop_current_operation()
# 停止当前操作并释放摄像头
current_mode = "camera"
canvas.delete("all")
# 设置当前模式为摄像头并清空Canvas
current_cap = cv2.VideoCapture(0)
if not current_cap.isOpened():
messagebox.showerror("Error", "Cannot open camera")
current_mode = None
return
# 启动摄像头
start_camera(canvas, current_cap)
# 传入canvas和current_cap
def select_and_process_video(canvas, root):
global current_mode, current_cap
stop_current_operation()
current_mode = "video"
canvas.delete("all")
video_path = filedialog.askopenfilename(
title="Select a Video File",
filetypes=(("MP4 files", "*.mp4"), ("AVI files", "*.avi"), ("All files", "*.*"))
)
# 选择视频文件
if video_path:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
messagebox.showerror("Error", "Cannot open video file")
return
# 获取视频的宽高并调整 Canvas 大小
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
canvas.config(width=frame_width, height=frame_height)
root.geometry(f"{frame_width + 160}x{frame_height + 200}") # 调整窗口大小
# 获取视频宽高并动态调整canvas的大小
error_message = upload_and_process_video(canvas, video_path)
if error_message:
messagebox.showerror("Error", error_message)
# 上传并处理视频文件
def stop_current_operation():
global current_cap
if current_cap and current_cap.isOpened():
current_cap.release()
cv2.destroyAllWindows()
current_cap = None
# 停止当前操作 释放摄像头资源并关闭所有窗口
def start_camera(canvas, cap):
if not cap.isOpened():
return "Cannot open camera"
gesture_processor = HandGestureProcessor()
show_frame(canvas, cap, gesture_processor)
# 启动摄像头进行实时手势识别
if __name__ == "__main__":
create_gui()

+ 0
- 0
utils_zh/__init__.py View File


BIN
utils_zh/__pycache__/__init__.cpython-39.pyc View File


BIN
utils_zh/__pycache__/finger_drawer.cpython-39.pyc View File


BIN
utils_zh/__pycache__/gesture_data.cpython-39.pyc View File


BIN
utils_zh/__pycache__/hand_gesture.cpython-39.pyc View File


BIN
utils_zh/__pycache__/index_finger.cpython-39.pyc View File


BIN
utils_zh/__pycache__/kalman_filter.cpython-39.pyc View File


BIN
utils_zh/__pycache__/model.cpython-39.pyc View File


BIN
utils_zh/__pycache__/process_images.cpython-39.pyc View File


BIN
utils_zh/__pycache__/video_recognition.cpython-39.pyc View File


+ 34
- 0
utils_zh/finger_drawer.py View File

@ -0,0 +1,34 @@
import cv2
class FingerDrawer:
@staticmethod
def draw_finger_points(image, hand_21, temp_handness, width, height):
cz0 = hand_21.landmark[0].z
index_finger_tip_str = ''
for i in range(21):
cx = int(hand_21.landmark[i].x * width)
cy = int(hand_21.landmark[i].y * height)
cz = hand_21.landmark[i].z
depth_z = cz0 - cz
radius = max(int(6 * (1 + depth_z * 5)), 0)
# 根据深度调整圆点的半径
if i == 0:
image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)
elif i == 8:
image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)
index_finger_tip_str += f'{temp_handness}:{depth_z:.2f}, '
elif i in [1, 5, 9, 13, 17]:
image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)
elif i in [2, 6, 10, 14, 18]:
image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)
elif i in [3, 7, 11, 15, 19]:
image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)
elif i in [4, 12, 16, 20]:
image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)
# 根据每组关节绘制不同颜色的圆点 同时根据距离掌根的深度信息进行调整
return image, index_finger_tip_str

+ 43
- 0
utils_zh/gesture_data.py View File

@ -0,0 +1,43 @@
from collections import deque
class HandState:
def __init__(self):
self.gesture_locked = {'Left': False, 'Right': False}
self.gesture_start_time = {'Left': 0, 'Right': 0}
self.buffer_start_time = {'Left': 0, 'Right': 0}
self.start_drag_time = {'Left': 0, 'Right': 0}
self.dragging = {'Left': False, 'Right': False}
self.drag_point = {'Left': (0, 0), 'Right': (0, 0)}
self.buffer_duration = {'Left': 0.25, 'Right': 0.25}
self.is_index_finger_up = {'Left': False, 'Right': False}
self.index_finger_second = {'Left': 0, 'Right': 0}
self.index_finger_tip = {'Left': 0, 'Right': 0}
self.trajectory = {'Left': [], 'Right': []}
self.square_queue = deque()
self.wait_time = 1.5
self.kalman_wait_time = 0.5
self.wait_box = 2
self.rect_draw_time = {'Left': 0, 'Right': 0}
self.last_drawn_box = {'Left': None, 'Right': None}
def clear_hand_states(self, detected_hand='Both'):
hands_to_clear = {'Left', 'Right'}
if detected_hand == 'Both':
hands_to_clear = hands_to_clear
else:
hands_to_clear -= {detected_hand}
for h in hands_to_clear:
self.gesture_locked[h] = False
self.gesture_start_time[h] = 0
self.buffer_start_time[h] = 0
self.dragging[h] = False
self.drag_point[h] = (0, 0)
self.buffer_duration[h] = 0.25
self.is_index_finger_up[h] = False
self.trajectory[h].clear()
self.start_drag_time[h] = 0
self.rect_draw_time[h] = 0
self.last_drawn_box[h] = None
# 用于记录左右手的信息 需要分开存放 否则可能会出现数据冲突

+ 24
- 0
utils_zh/gesture_process.py View File

@ -0,0 +1,24 @@
import cv2
import time
from hand_gesture import HandGestureHandler
class HandGestureProcessor:
def __init__(self):
self.hand_handler = HandGestureHandler()
def process_image(self, image):
start_time = time.time()
height, width = image.shape[:2]
image = cv2.flip(image, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 获取图像尺寸 翻转并转换颜色空间
image = self.hand_handler.handle_hand_gestures(image, width, height)
spend_time = time.time() - start_time
FPS = 1.0 / spend_time if spend_time > 0 else 0
image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
# 计算并显示帧率
return image

+ 403
- 0
utils_zh/gesture_recognition.ipynb View File

@ -0,0 +1,403 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.761076Z",
"start_time": "2024-09-07T05:11:22.404354Z"
}
},
"source": [
"import cv2\n",
"import time\n",
"import mediapipe\n",
"import numpy as np\n",
"from collections import deque\n",
"from filterpy.kalman import KalmanFilter"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.777139Z",
"start_time": "2024-09-07T05:11:28.761076Z"
}
},
"cell_type": "code",
"source": [
"gesture_locked = {'Left':False,'Right':False}\n",
"gesture_start_time = {'Left':0,'Right':0}\n",
"buffer_start_time = {'Left':0,'Right':0}\n",
"start_drag_time = {'Left':0,'Right':0}\n",
"dragging = {'Left':False,'Right':False}\n",
"drag_point = {'Left':(0, 0),'Right':(0, 0)}\n",
"buffer_duration = {'Left':0.25,'Right':0.25}\n",
"is_index_finger_up = {'Left':False,'Right':False}\n",
"index_finger_second = {'Left':0,'Right':0}\n",
"index_finger_tip = {'Left':0,'Right':0}\n",
"trajectory = {'Left':[],'Right':[]}\n",
"square_queue = deque()\n",
"wait_time = 1.5\n",
"kalman_wait_time = 0.5\n",
"wait_box = 2\n",
"rect_draw_time = {'Left':0,'Right':0}\n",
"last_drawn_box = {'Left':None,'Right':None}\n",
"elapsed_time = {'Left':0,'Right':0}"
],
"id": "40aada17ccd31fe",
"outputs": [],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.824573Z",
"start_time": "2024-09-07T05:11:28.777139Z"
}
},
"cell_type": "code",
"source": [
"def clear_hand_states(detected_hand ='Both'):\n",
" global gesture_locked, gesture_start_time, buffer_start_time, dragging, drag_point, buffer_duration,is_index_finger_up, trajectory,wait_time,kalman_wait_time, start_drag_time, rect_draw_time, last_drawn_box, wait_box, elapsed_time\n",
" \n",
" hands_to_clear = {'Left', 'Right'}\n",
" if detected_hand == 'Both':\n",
" hands_to_clear = hands_to_clear\n",
" else:\n",
" hands_to_clear -= {detected_hand}\n",
" # 反向判断左右手\n",
"\n",
" for h in hands_to_clear:\n",
" gesture_locked[h] = False\n",
" gesture_start_time[h] = 0\n",
" buffer_start_time[h] = 0\n",
" dragging[h] = False\n",
" drag_point[h] = (0, 0)\n",
" buffer_duration[h] = 0.25\n",
" is_index_finger_up[h] = False\n",
" trajectory[h].clear()\n",
" start_drag_time[h] = 0\n",
" rect_draw_time[h] = 0\n",
" last_drawn_box[h] = None\n",
" elapsed_time[h] = 0\n",
" # 清空没被检测的手"
],
"id": "2ee9323bb1c25cc0",
"outputs": [],
"execution_count": 3
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.855831Z",
"start_time": "2024-09-07T05:11:28.824573Z"
}
},
"cell_type": "code",
"source": [
"kalman_filters = {\n",
" 'Left': KalmanFilter(dim_x=4, dim_z=2),\n",
" 'Right': KalmanFilter(dim_x=4, dim_z=2)\n",
"}\n",
"\n",
"for key in kalman_filters:\n",
" kalman_filters[key].x = np.array([0., 0., 0., 0.])\n",
" kalman_filters[key].F = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]])\n",
" # 状态转移矩阵\n",
" kalman_filters[key].H = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])\n",
" # 观测矩阵\n",
" kalman_filters[key].P *= 1000.\n",
" kalman_filters[key].R = 3\n",
" kalman_filters[key].Q = np.eye(4) * 0.01\n",
"\n",
"def kalman_filter_point(hand_label, x, y):\n",
" kf = kalman_filters[hand_label]\n",
" kf.predict()\n",
" kf.update([x, y])\n",
" # 更新状态\n",
" return (kf.x[0], kf.x[1])\n",
"\n",
"def reset_kalman_filter(hand_label, x, y):\n",
" kf = kalman_filters[hand_label]\n",
" kf.x = np.array([x, y, 0., 0.])\n",
" kf.P *= 1000.\n",
" # 重置"
],
"id": "96cf431d2562e7d",
"outputs": [],
"execution_count": 4
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.887346Z",
"start_time": "2024-09-07T05:11:28.855831Z"
}
},
"cell_type": "code",
"source": [
"\n",
"mp_hands = mediapipe.solutions.hands\n",
"\n",
"hands = mp_hands.Hands(\n",
" static_image_mode=False,\n",
" max_num_hands=2,\n",
" # 一只更稳定\n",
" min_detection_confidence=0.5,\n",
" min_tracking_confidence=0.5\n",
")\n",
"\n",
"mp_drawing = mediapipe.solutions.drawing_utils\n",
"clear_hand_states()"
],
"id": "edc274b7ed495122",
"outputs": [],
"execution_count": 5
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.934274Z",
"start_time": "2024-09-07T05:11:28.887346Z"
}
},
"cell_type": "code",
"source": [
"def process_image(image):\n",
"\n",
" start_time = time.time()\n",
" height, width = image.shape[:2]\n",
" image = cv2.flip(image, 1)\n",
" image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
" # 预处理帧\n",
" \n",
" results = hands.process(image)\n",
" \n",
" if results.multi_hand_landmarks:\n",
" # 如果检测到手\n",
" \n",
" handness_str = ''\n",
" index_finger_tip_str = ''\n",
" \n",
" if len(results.multi_hand_landmarks) == 1:\n",
" clear_hand_states(detected_hand = results.multi_handedness[0].classification[0].label)\n",
" # 如果只有一只手 则清空另一只手的数据 避免后续冲突导致不稳定\n",
" \n",
" for hand_idx in range(len(results.multi_hand_landmarks)):\n",
" \n",
" hand_21 = results.multi_hand_landmarks[hand_idx]\n",
" mp_drawing.draw_landmarks(image, hand_21, mp_hands.HAND_CONNECTIONS)\n",
" \n",
" temp_handness = results.multi_handedness[hand_idx].classification[0].label\n",
" handness_str += '{}:{}, '.format(hand_idx, temp_handness)\n",
" is_index_finger_up[temp_handness] = False\n",
" # 先设置为false 防止放下被错误更新为竖起\n",
" \n",
" cz0 = hand_21.landmark[0].z\n",
" index_finger_second[temp_handness] = hand_21.landmark[7]\n",
" index_finger_tip[temp_handness] = hand_21.landmark[8]\n",
" # 食指指尖和第一个关节\n",
" \n",
" index_x, index_y = int(index_finger_tip[temp_handness].x * width), int(index_finger_tip[temp_handness].y * height)\n",
"\n",
" if all(index_finger_second[temp_handness].y < hand_21.landmark[i].y for i in range(21) if i not in [7, 8]) and index_finger_tip[temp_handness].y < index_finger_second[temp_handness].y:\n",
" is_index_finger_up[temp_handness] = True\n",
" # 如果指尖和第二个关节高度大于整只手所有关节点 则视为执行“指向”操作 \n",
"\n",
" if is_index_finger_up[temp_handness]:\n",
" if not gesture_locked[temp_handness]:\n",
" if gesture_start_time[temp_handness] == 0:\n",
" gesture_start_time[temp_handness] = time.time()\n",
" # 记录食指抬起的时间\n",
" elif time.time() - gesture_start_time[temp_handness] > wait_time:\n",
" dragging[temp_handness] = True\n",
" gesture_locked[temp_handness] = True\n",
" drag_point[temp_handness] = (index_x, index_y)\n",
" # 如果食指抬起的时间大于预设的等待时间则视为执行“指向”操作\n",
" buffer_start_time[temp_handness] = 0\n",
" # 检测到食指竖起就刷新缓冲时间\n",
" else:\n",
" if buffer_start_time[temp_handness] == 0:\n",
" buffer_start_time[temp_handness] = time.time()\n",
" elif time.time() - buffer_start_time[temp_handness] > buffer_duration[temp_handness]:\n",
" gesture_start_time[temp_handness] = 0\n",
" gesture_locked[temp_handness] = False\n",
" dragging[temp_handness] = False\n",
" # 如果缓冲时间大于设定 就证明已经结束指向操作\n",
" # 这样可以防止某一帧识别有误导致指向操作被错误清除\n",
" \n",
" if dragging[temp_handness]:\n",
"\n",
" if start_drag_time[temp_handness] == 0:\n",
" start_drag_time[temp_handness] = time.time()\n",
" reset_kalman_filter(temp_handness, index_x, index_y)\n",
" # 每次画线的时候初始化滤波器\n",
" \n",
" smooth_x, smooth_y = kalman_filter_point(temp_handness, index_x, index_y)\n",
" drag_point[temp_handness] = (index_x, index_y)\n",
" index_finger_radius = max(int(10 * (1 + (cz0 - index_finger_tip[temp_handness].z) * 5)), 0)\n",
" cv2.circle(image, drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)\n",
" # 根据离掌根的深度距离来构建一个圆\n",
" # 用来显示已经开始指向操作\n",
" # 和下方构建的深度点位对应 直接用倍数\n",
" drag_point_smooth = (smooth_x, smooth_y)\n",
" \n",
" if time.time() - start_drag_time[temp_handness] > kalman_wait_time:\n",
" trajectory[temp_handness].append(drag_point_smooth)\n",
" # 因为kalman滤波器初始化的时候会很不稳定 前几帧通常会有较为严重的噪声\n",
" # 所以直接等待前几帧运行完成之后再将点位加到轨迹列表中\n",
" else:\n",
" if len(trajectory[temp_handness]) > 4:\n",
" contour = np.array(trajectory[temp_handness], dtype=np.int32)\n",
" rect = cv2.minAreaRect(contour)\n",
" box = cv2.boxPoints(rect)\n",
" box = np.int0(box)\n",
" rect_draw_time[temp_handness] = time.time()\n",
" last_drawn_box[temp_handness] = box\n",
" # 如果指向操作结束 轨迹列表有至少四个点的时候\n",
" # 使用最小包围图形将画的不规则图案调整为一个矩形\n",
"\n",
" start_drag_time[temp_handness] = 0\n",
" trajectory[temp_handness].clear()\n",
"\n",
" for i in range(1, len(trajectory[temp_handness])):\n",
"\n",
" pt1 = (int(trajectory[temp_handness][i-1][0]), int(trajectory[temp_handness][i-1][1]))\n",
" pt2 = (int(trajectory[temp_handness][i][0]), int(trajectory[temp_handness][i][1]))\n",
" cv2.line(image, pt1, pt2, (0, 0, 255), 2)\n",
" # 绘制连接轨迹点的线\n",
"\n",
" if last_drawn_box[temp_handness] is not None:\n",
" elapsed_time[temp_handness] = time.time() - rect_draw_time[temp_handness]\n",
" \n",
" if elapsed_time[temp_handness] < wait_box:\n",
" cv2.drawContours(image, [last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)\n",
" # 将矩形框保留一段时间 否则一帧太快 无法看清效果\n",
" \n",
" elif elapsed_time[temp_handness] >= wait_box - 0.1:\n",
" \n",
" box = last_drawn_box[temp_handness]\n",
" x_min = max(0, min(box[:, 0]))\n",
" y_min = max(0, min(box[:, 1]))\n",
" x_max = min(image.shape[1], max(box[:, 0]))\n",
" y_max = min(image.shape[0], max(box[:, 1]))\n",
" cropped_image = image[y_min:y_max, x_min:x_max]\n",
" filename = f\"../image/cropped_{temp_handness}_{int(time.time())}.jpg\"\n",
" cv2.imwrite(filename, cropped_image)\n",
" last_drawn_box[temp_handness] = None\n",
" # 不能直接剪裁画完的图像 可能会错误的将手剪裁进去\n",
" # 等待一段时间 有一个给手缓冲移动走的时间再将这一帧里的矩形提取出来\n",
" \n",
" for i in range(21):\n",
" \n",
" cx = int(hand_21.landmark[i].x * width)\n",
" cy = int(hand_21.landmark[i].y * height)\n",
" cz = hand_21.landmark[i].z\n",
" depth_z = cz0 - cz\n",
" radius = max(int(6 * (1 + depth_z*5)), 0)\n",
" \n",
" if i == 0:\n",
" image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)\n",
" if i == 8:\n",
" image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)\n",
" index_finger_tip_str += '{}:{:.2f}, '.format(hand_idx, depth_z)\n",
" if i in [1,5,9,13,17]: \n",
" image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)\n",
" if i in [2,6,10,14,18]:\n",
" image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)\n",
" if i in [3,7,11,15,19]:\n",
" image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)\n",
" if i in [4,12,16,20]:\n",
" image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)\n",
" # 提取出每一个关节点 赋予对应的颜色和根据掌根的深度\n",
" \n",
" scaler= 1\n",
" image = cv2.putText(image,handness_str, (25*scaler, 100*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)\n",
" image = cv2.putText(image,index_finger_tip_str, (25*scaler, 150*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)\n",
"\n",
" spend_time = time.time() - start_time\n",
" if spend_time > 0:\n",
" FPS = 1.0 / spend_time\n",
" else:\n",
" FPS = 0\n",
" \n",
" image = cv2.putText(image,'FPS '+str(int(FPS)),(25*scaler,50*scaler),cv2.FONT_HERSHEY_SIMPLEX,1.25*scaler,(0,0,255),2,)\n",
" # 显示FPS 检测到的手和食指指尖对于掌根的深度值\n",
" \n",
" else:\n",
" clear_hand_states()\n",
" # 如果没检测到手就清空全部信息\n",
" \n",
" return image"
],
"id": "51ff809ecaf1f899",
"outputs": [],
"execution_count": 6
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:19:32.248575Z",
"start_time": "2024-09-07T05:11:28.934663Z"
}
},
"cell_type": "code",
"source": [
"cap = cv2.VideoCapture(1)\n",
"cap.open(0)\n",
"\n",
"while cap.isOpened():\n",
" success, frame = cap.read()\n",
" if not success:\n",
" print(\"Camera Error\")\n",
" break\n",
" \n",
" frame = process_image(frame)\n",
" cv2.imshow('Video', frame)\n",
" \n",
" if cv2.waitKey(1) & 0xFF == ord('q'):\n",
" break\n",
" \n",
"cap.release()\n",
"cv2.destroyAllWindows() "
],
"id": "b7ce23e80ed36041",
"outputs": [],
"execution_count": 7
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "10fca4bc34a944ea"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

+ 56
- 0
utils_zh/hand_gesture.py View File

@ -0,0 +1,56 @@
import cv2
from model import HandTracker
from index_finger import IndexFingerHandler
from gesture_data import HandState
from kalman_filter import KalmanHandler
from utils_zh.finger_drawer import FingerDrawer
class HandGestureHandler:
def __init__(self):
self.hand_state = HandState()
self.kalman_handler = KalmanHandler()
self.hand_tracker = HandTracker()
self.index_handler = IndexFingerHandler(self.hand_state, self.kalman_handler)
def handle_hand_gestures(self, image, width, height, is_video):
results = self.hand_tracker.process(image)
if results.multi_hand_landmarks:
handness_str = ''
index_finger_tip_str = ''
if len(results.multi_hand_landmarks) == 1:
detected_hand = results.multi_handedness[0].classification[0].label
self.hand_state.clear_hand_states(detected_hand)
# 如果只检测到了一只手 那么就清空另一只手的信息 以免第二只手出现的时候数据冲突
for hand_idx, hand_21 in enumerate(results.multi_hand_landmarks):
self.hand_tracker.mp_drawing.draw_landmarks(
image, hand_21, self.hand_tracker.mp_hands.HAND_CONNECTIONS
)
# 绘制手部关键点连接
temp_handness = results.multi_handedness[hand_idx].classification[0].label
handness_str += f'{hand_idx}:{temp_handness}, '
self.hand_state.is_index_finger_up[temp_handness] = False
image = self.index_handler.handle_index_finger(
image, hand_21, temp_handness, width, height
)
# 处理食指
image, index_finger_tip_str = FingerDrawer.draw_finger_points(image, hand_21, temp_handness, width, height)
if is_video:
image = cv2.flip(image, 1)
image = cv2.putText(image, handness_str, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
image = cv2.putText(image, index_finger_tip_str, (25, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
else:
if is_video:
image = cv2.flip(image, 1)
# 如果是后置摄像头的输入视频,则需要在处理前翻转图像,确保手势检测的左右手正确;
# 处理完毕后再翻转回来,以防止最终输出的图像出现镜像错误。
self.hand_state.clear_hand_states()
# 如果未检测到手 则清空手部状态
return image

+ 112
- 0
utils_zh/index_finger.py View File

@ -0,0 +1,112 @@
import cv2
import time
import numpy as np
class IndexFingerHandler:
def __init__(self, hand_state, kalman_handler):
self.hand_state = hand_state
self.kalman_handler = kalman_handler
self.wait_time = 1.5
self.kalman_wait_time = 0.5
self.wait_box = 2
def handle_index_finger(self, image, hand_21, temp_handness, width, height):
cz0 = hand_21.landmark[0].z
self.hand_state.index_finger_second[temp_handness] = hand_21.landmark[7]
self.hand_state.index_finger_tip[temp_handness] = hand_21.landmark[8]
index_x = int(self.hand_state.index_finger_tip[temp_handness].x * width)
index_y = int(self.hand_state.index_finger_tip[temp_handness].y * height)
self.update_index_finger_state(hand_21, temp_handness, index_x, index_y)
self.draw_index_finger_gesture(image, temp_handness, index_x, index_y, cz0)
return image
# 处理食指的状态和手势效果,并更新图像
def update_index_finger_state(self, hand_21, temp_handness, index_x, index_y):
if all(self.hand_state.index_finger_second[temp_handness].y < hand_21.landmark[i].y
for i in range(21) if i not in [7, 8]) and \
self.hand_state.index_finger_tip[temp_handness].y < self.hand_state.index_finger_second[temp_handness].y:
self.hand_state.is_index_finger_up[temp_handness] = True
# 如果食指指尖和第一个关节都大于其他关键点 则判定为食指抬起
if self.hand_state.is_index_finger_up[temp_handness]:
if not self.hand_state.gesture_locked[temp_handness]:
if self.hand_state.gesture_start_time[temp_handness] == 0:
self.hand_state.gesture_start_time[temp_handness] = time.time()
elif time.time() - self.hand_state.gesture_start_time[temp_handness] > self.wait_time:
self.hand_state.dragging[temp_handness] = True
self.hand_state.gesture_locked[temp_handness] = True
self.hand_state.drag_point[temp_handness] = (index_x, index_y)
# 如果食指指向操作已经超过了等待的时间 则设定为正式进行指向操作
self.hand_state.buffer_start_time[temp_handness] = 0
# 防止识别错误导致指向操作迅速中断的缓冲时间
else:
if self.hand_state.buffer_start_time[temp_handness] == 0:
self.hand_state.buffer_start_time[temp_handness] = time.time()
elif time.time() - self.hand_state.buffer_start_time[temp_handness] > self.hand_state.buffer_duration[temp_handness]:
self.hand_state.gesture_start_time[temp_handness] = 0
self.hand_state.gesture_locked[temp_handness] = False
self.hand_state.dragging[temp_handness] = False
# 如果食指指向操作的中断时间已经超过了设定的缓冲时间 则正式终断
def draw_index_finger_gesture(self, image, temp_handness, index_x, index_y, cz0):
if self.hand_state.dragging[temp_handness]:
if self.hand_state.start_drag_time[temp_handness] == 0:
self.hand_state.start_drag_time[temp_handness] = time.time()
self.kalman_handler.reset_kalman_filter(temp_handness, index_x, index_y)
# 如果是首次操作 则记录时间并重置kalman滤波器
smooth_x, smooth_y = self.kalman_handler.kalman_filter_point(temp_handness, index_x, index_y)
# 使用kalman滤波器平滑生成的轨迹 减少噪声和抖动
self.hand_state.drag_point[temp_handness] = (index_x, index_y)
index_finger_radius = max(int(10 * (1 + (cz0 - self.hand_state.index_finger_tip[temp_handness].z) * 5)), 0)
cv2.circle(image, self.hand_state.drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)
# 根据离掌根的距离同步调整圆圈大小 但是要比FingerDrawer的同比增大一些 可以看清是否锁定指向操作
drag_point_smooth = (smooth_x, smooth_y)
if time.time() - self.hand_state.start_drag_time[temp_handness] > self.kalman_wait_time:
self.hand_state.trajectory[temp_handness].append(drag_point_smooth)
# 因为滤波器初始化时需要时间稳定数据 所以等待其稳定后再将坐标点加到轨迹中
else:
if len(self.hand_state.trajectory[temp_handness]) > 4:
contour = np.array(self.hand_state.trajectory[temp_handness], dtype=np.int32)
rect = cv2.minAreaRect(contour)
box = cv2.boxPoints(rect)
box = np.int0(box)
# 当拖拽点数大于4时则计算最小外接矩形
self.hand_state.rect_draw_time[temp_handness] = time.time()
self.hand_state.last_drawn_box[temp_handness] = box
self.hand_state.start_drag_time[temp_handness] = 0
self.hand_state.trajectory[temp_handness].clear()
# 重置 清空
for i in range(1, len(self.hand_state.trajectory[temp_handness])):
pt1 = (int(self.hand_state.trajectory[temp_handness][i-1][0]), int(self.hand_state.trajectory[temp_handness][i-1][1]))
pt2 = (int(self.hand_state.trajectory[temp_handness][i][0]), int(self.hand_state.trajectory[temp_handness][i][1]))
cv2.line(image, pt1, pt2, (0, 0, 255), 2)
# 绘制拖拽路径
if self.hand_state.last_drawn_box[temp_handness] is not None:
elapsed_time = time.time() - self.hand_state.rect_draw_time[temp_handness]
if elapsed_time < self.wait_box:
cv2.drawContours(image, [self.hand_state.last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)
# 为了方便观测 需要保留显示包围框一定时间
elif elapsed_time >= self.wait_box - 0.1:
box = self.hand_state.last_drawn_box[temp_handness]
x_min = max(0, min(box[:, 0]))
y_min = max(0, min(box[:, 1]))
x_max = min(image.shape[1], max(box[:, 0]))
y_max = min(image.shape[0], max(box[:, 1]))
cropped_image = image[y_min:y_max, x_min:x_max]
filename = f"../image/cropped_{temp_handness}_{int(time.time())}.jpg"
cv2.imwrite(filename, cropped_image)
self.hand_state.last_drawn_box[temp_handness] = None
# 因为如果画完包围框立即剪裁 很有可能把手错误的剪裁进去
# 所以在包围框消失的前0.1秒剪裁 这样有足够的时间让手移走

+ 36
- 0
utils_zh/kalman_filter.py View File

@ -0,0 +1,36 @@
import numpy as np
from filterpy.kalman import KalmanFilter
class KalmanHandler:
def __init__(self):
self.kalman_filters = {
'Left': KalmanFilter(dim_x=4, dim_z=2),
'Right': KalmanFilter(dim_x=4, dim_z=2)
}
for key in self.kalman_filters:
self.kalman_filters[key].x = np.array([0., 0., 0., 0.])
self.kalman_filters[key].F = np.array([[1, 0, 1, 0],
[0, 1, 0, 1],
[0, 0, 1, 0],
[0, 0, 0, 1]])
self.kalman_filters[key].H = np.array([[1, 0, 0, 0],
[0, 1, 0, 0]])
self.kalman_filters[key].P *= 1000.
self.kalman_filters[key].R = 3
self.kalman_filters[key].Q = np.eye(4) * 0.01
# 这些参数通过多次测试得出 表现较为稳定
def kalman_filter_point(self, hand_label, x, y):
kf = self.kalman_filters[hand_label]
kf.predict()
kf.update([x, y])
# 更新状态
return (kf.x[0], kf.x[1])
def reset_kalman_filter(self, hand_label, x, y):
kf = self.kalman_filters[hand_label]
kf.x = np.array([x, y, 0., 0.])
kf.P *= 1000.
# 重置

+ 17
- 0
utils_zh/model.py View File

@ -0,0 +1,17 @@
import mediapipe as mp
class HandTracker:
def __init__(self):
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(
static_image_mode=False,
max_num_hands=1,
# 一只会更稳定
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
self.mp_drawing = mp.solutions.drawing_utils
def process(self, image):
results = self.hands.process(image)
return results

+ 24
- 0
utils_zh/process_images.py View File

@ -0,0 +1,24 @@
import cv2
import time
from hand_gesture import HandGestureHandler
class HandGestureProcessor:
def __init__(self):
self.hand_handler = HandGestureHandler()
def process_image(self, image, is_video):
start_time = time.time()
height, width = image.shape[:2]
image = cv2.flip(image, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 预处理传入的视频帧
image = self.hand_handler.handle_hand_gestures(image, width, height, is_video)
spend_time = time.time() - start_time
FPS = 1.0 / spend_time if spend_time > 0 else 0
image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
# 计算并显示帧率
return image

+ 65
- 0
utils_zh/video_recognition.py View File

@ -0,0 +1,65 @@
import cv2
from process_images import HandGestureProcessor
from tkinter import messagebox
from PIL import Image, ImageTk
def start_camera(canvas):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
return "Cannot open camera"
gesture_processor = HandGestureProcessor()
show_frame(canvas, cap, gesture_processor)
def show_frame(canvas, cap, gesture_processor):
success, frame = cap.read()
if success:
processed_frame = gesture_processor.process_image(frame,False)
img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
imgtk = ImageTk.PhotoImage(image=img)
canvas.imgtk = imgtk
canvas.create_image(0, 0, anchor="nw", image=imgtk)
# 对该帧进行处理并转换为RGB显示在画布上
canvas.after(10, show_frame, canvas, cap, gesture_processor)
# 实现循环调用 持续处理并显示后续的每一帧
else:
cap.release()
cv2.destroyAllWindows()
def upload_and_process_video(canvas, video_path):
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return "Cannot open video file"
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
# 获取视频的参数
output_filename = "../video/processed_output.mp4"
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height))
# 设置输出视频文件路径和编码
gesture_processor = HandGestureProcessor()
process_video_frame(canvas, cap, gesture_processor, out)
def process_video_frame(canvas, cap, gesture_processor, out):
success, frame = cap.read()
if success:
processed_frame = gesture_processor.process_image(frame,True)
out.write(processed_frame)
img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
imgtk = ImageTk.PhotoImage(image=img)
canvas.imgtk = imgtk
canvas.create_image(0, 0, anchor="nw", image=imgtk)
canvas.after(10, process_video_frame, canvas, cap, gesture_processor, out)
else:
cap.release()
out.release()
cv2.destroyAllWindows()
messagebox.showinfo("Info", "Processed video saved as processed_output.avi")
print("Processed video saved as processed_output.avi")

Loading…
Cancel
Save